diff --git a/CMakeLists.txt b/CMakeLists.txt index fb796103350ac4403d4151cf08eb4315bcde68fd..b1554fba5e1fa48b5cbdfe2e5b9f317a4f7fefb3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,8 +63,29 @@ if(WIN32) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif() + endforeach(flag_var) endif() - + + # windows build turn off warnings. + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") + endforeach(flag_var) + foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) + set(${flag_var} "${${flag_var}} /w") + endforeach(flag_var) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") message(STATUS "Using parallel compiling (/MP)") diff --git a/cmake/external/cryptopp.cmake b/cmake/external/cryptopp.cmake index af5dd0e2c9b2d19929f58363d08e7ff40d43b013..351ef1c7c7aebb698a5d41689352a913d0b950e8 100644 --- a/cmake/external/cryptopp.cmake +++ b/cmake/external/cryptopp.cmake @@ -22,23 +22,8 @@ SET(CRYPTOPP_TAG CRYPTOPP_8_2_0) IF(WIN32) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/cryptopp-static.lib" CACHE FILEPATH "cryptopp library." FORCE) - SET(CRYPTOPP_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") - set(CompilerFlags - CMAKE_CXX_FLAGS - CMAKE_CXX_FLAGS_DEBUG - CMAKE_CXX_FLAGS_RELEASE - CMAKE_C_FLAGS - CMAKE_C_FLAGS_DEBUG - CMAKE_C_FLAGS_RELEASE - ) - foreach(CompilerFlag ${CompilerFlags}) - string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}") - endforeach() ELSE(WIN32) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE) - SET(CRYPTOPP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) ENDIF(WIN32) set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} @@ -48,7 +33,7 @@ set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=${CRYPTOPP_INSTALL_DIR}/lib -DCMAKE_INSTALL_PREFIX=${CRYPTOPP_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CRYPTOPP_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 415e07c75425345f5f1ad29a8544e02a5bfb12e4..ed0bf8396b3faa22350811cf1711f5d1e5b89998 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -90,20 +90,6 @@ macro(safe_set_nvflag flag_name) endif() endmacro() -macro(safe_set_static_flag) # set c_flags and cxx_flags to static or shared - if (BUILD_SHARED_LIBS) - return() # if build shared libs, the flags keep same with '/MD' - endif(BUILD_SHARED_LIBS) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) -endmacro() CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS) @@ -229,20 +215,3 @@ endforeach() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") - -if(WIN32) - # windows build turn off warnings. - if(MSVC_STATIC_CRT) - safe_set_static_flag() - endif() - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") - endforeach(flag_var) - foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) - set(${flag_var} "${${flag_var}} /w") - endforeach(flag_var) -endif() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 20f27715e00457a8fe43f5c620e2a005387d7988..f19f0eb43d34bd0f3748d7beb1fcf403fa1c9037 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -13,18 +13,18 @@ # limitations under the License. # make package for paddle fluid shared and static library -set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING - "A path setting fluid shared and static libraries") +set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING + "A path setting paddle shared and static libraries") -set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING - "A path setting fluid inference shared and static libraries") +set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING + "A path setting paddle inference shared and static libraries") # TODO(zhaolong) # At present, the size of static lib in Windows exceeds the system limit, # so the generation of static lib is temporarily turned off. if(WIN32) #todo: remove the option - option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) + option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF) if(NOT PYTHON_EXECUTABLE) FIND_PACKAGE(PythonInterp REQUIRED) endif() @@ -142,14 +142,14 @@ set(inference_lib_deps third_party paddle_fluid paddle_fluid_c paddle_fluid_shar add_custom_target(inference_lib_dist DEPENDS ${inference_lib_deps}) -set(dst_dir "${FLUID_INFERENCE_INSTALL_DIR}/third_party/threadpool") +set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/threadpool") copy(inference_lib_dist SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h DSTS ${dst_dir}) # Only GPU need cudaErrorMessage.pb IF(WITH_GPU) - set(dst_dir "${FLUID_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data") + set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data") copy(inference_lib_dist SRCS ${cudaerror_INCLUDE_DIR} DSTS ${dst_dir}) @@ -158,65 +158,62 @@ ENDIF() # CMakeCache Info copy(inference_lib_dist SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${FLUID_INFERENCE_INSTALL_DIR}) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}) -copy_part_of_thrid_party(inference_lib_dist ${FLUID_INFERENCE_INSTALL_DIR}) +copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") if(WIN32) if(WITH_STATIC_LIB) - set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_fluid.lib) + set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_fluid.lib + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.*) else() set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.dll - ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.lib) + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.lib) endif() + copy(inference_lib_dist + SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) else(WIN32) set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*) -endif(WIN32) - -if(WIN32 AND NOT WITH_STATIC_LIB) - copy(inference_lib_dist - SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib - ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib) -else() - copy(inference_lib_dist + copy(inference_lib_dist SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib) -endif() + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) +endif(WIN32) copy(inference_lib_dist SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include/internal) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/internal) copy(inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io/crypto/cipher.h - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io) # CAPI inference library for only inference -set(FLUID_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_c_install_dir" CACHE STRING -"A path setting CAPI fluid inference shared") -copy_part_of_thrid_party(inference_lib_dist ${FLUID_INFERENCE_C_INSTALL_DIR}) +set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" CACHE STRING +"A path setting CAPI paddle inference shared") +copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*) copy(inference_lib_dist SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_fluid_c_lib} - DSTS ${FLUID_INFERENCE_C_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_C_INSTALL_DIR}/paddle/lib) + DSTS ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib) # fluid library for both train and inference set(fluid_lib_deps inference_lib_dist) add_custom_target(fluid_lib_dist ALL DEPENDS ${fluid_lib_deps}) -set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid") +set(dst_dir "${PADDLE_INSTALL_DIR}/paddle/fluid") set(module "inference") -if(WIN32 AND NOT WITH_STATIC_LIB) +if(WIN32) copy(fluid_lib_dist SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_fluid_lib} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ) -else() + else() copy(fluid_lib_dist SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_fluid_lib} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} @@ -273,22 +270,22 @@ copy(fluid_lib_dist DSTS ${dst_dir}/${module} ) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/eigen3") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/eigen3") copy(inference_lib_dist SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/boost") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/boost") copy(inference_lib_dist SRCS ${BOOST_INCLUDE_DIR}/boost DSTS ${dst_dir}) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/dlpack") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/dlpack") copy(inference_lib_dist SRCS ${DLPACK_INCLUDE_DIR}/dlpack DSTS ${dst_dir}) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/install/zlib") copy(inference_lib_dist SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib) @@ -296,8 +293,8 @@ copy(inference_lib_dist # CMakeCache Info copy(fluid_lib_dist - SRCS ${FLUID_INFERENCE_INSTALL_DIR}/third_party ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${FLUID_INSTALL_DIR} ${FLUID_INSTALL_DIR} + SRCS ${PADDLE_INFERENCE_INSTALL_DIR}/third_party ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt + DSTS ${PADDLE_INSTALL_DIR} ${PADDLE_INSTALL_DIR} ) # paddle fluid version @@ -323,6 +320,6 @@ function(version version_file) endif() endfunction() -version(${FLUID_INSTALL_DIR}/version.txt) -version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt) -version(${FLUID_INFERENCE_C_INSTALL_DIR}/version.txt) +version(${PADDLE_INSTALL_DIR}/version.txt) +version(${PADDLE_INFERENCE_INSTALL_DIR}/version.txt) +version(${PADDLE_INFERENCE_C_INSTALL_DIR}/version.txt) diff --git a/cmake/operators.cmake b/cmake/operators.cmake index aea972ab3db2af862f5230ea6c1eabeed8b611c5..21080fbe8fd2e14cf7fd805e01948f2f28535c22 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -127,7 +127,8 @@ function(op_library TARGET) "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op" "fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op" -"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op") +"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op" +"fused_bn_add_activation_op") if ("${TARGET}" STREQUAL "${manual_pybind_op}") set(pybind_flag 1) endif() diff --git a/go/README_cn.md b/go/README_cn.md index 57af05ce0af59360f02b919b376d1e8a8843a531..8ffc31adf85a638c4f4a4aa0bee6d3b7f09ef7fb 100644 --- a/go/README_cn.md +++ b/go/README_cn.md @@ -1,7 +1,7 @@ # Paddle 预测golang API ## 安装 -首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``fluid_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c` +首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``paddle_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c` ## 在Go中使用Paddle预测 首先创建预测配置 diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index a3cc4d1721e20a72817606bd773129230a8154ce..8281ec2143890aa2bb886347ccc0eff8145c67f3 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -74,6 +74,7 @@ set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto eager_deletion_pass buffer_shared_inplace_op_pass buffer_shared_cross_op_memory_reuse_pass + inplace_addto_op_pass set_reader_device_info_utils add_reader_dependency_pass) cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS}) diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 7fc08f3e0f20f243425b351b43c124d4519753f6..939a2fc8fc9c73472ff5c25633610fa70c7cec6d 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/all_reduce_op_handle.h" + #include + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -34,14 +36,24 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, const std::vector &places, const platform::NCCLCommunicator *ctxs) : NCCLOpHandleBase(node, places, ctxs), local_scopes_(local_scopes) { - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); } #else AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places) : OpHandleBase(node), local_scopes_(local_scopes), places_(places) { - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); } #endif @@ -60,13 +72,25 @@ void AllReduceOpHandle::AllReduceImpl( const std::vector &in_var_handles, const std::vector &out_var_handles) { size_t num_places = places_.size(); - PADDLE_ENFORCE_EQ( - in_var_handles.size(), num_places, - "The NoDummyInputSize should be equal to the number of places."); + PADDLE_ENFORCE_EQ(in_var_handles.size(), num_places, + platform::errors::InvalidArgument( + "The NoDummyInputSize should be equal " + "to the number of places, but got NoDummyInputSize is " + "%d and the number of place is %d.", + in_var_handles.size(), num_places)); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), - "The NoDummyInputSize and NoDummyOutputSize should be equal."); - PADDLE_ENFORCE_EQ(local_exec_scopes_.size(), num_places); + platform::errors::InvalidArgument( + "The NoDummyInputSize and NoDummyOutputSize should be " + "equal, but got NoDummyInputSize is %d and NoDummyOutputSize is %d.", + in_var_handles.size(), out_var_handles.size())); + PADDLE_ENFORCE_EQ( + local_exec_scopes_.size(), num_places, + platform::errors::InvalidArgument( + "The number of local scopes should be equal " + "to the number of places, but got the number of local scopes is " + "%d and the number of place is %d.", + in_var_handles.size(), num_places)); std::vector lod_tensor_data; std::vector places; @@ -78,23 +102,36 @@ void AllReduceOpHandle::AllReduceImpl( for (size_t i = 0; i < local_exec_scopes_.size(); ++i) { auto &local_scope = local_exec_scopes_[i]; auto var = local_scope->FindVar(in_var_handles[i]->name()); - PADDLE_ENFORCE_NOT_NULL(var, "%s is not found int scope.", - in_var_handles[i]->name()); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::NotFound( + "Variable %s is not found in local scope.", + in_var_handles[i]->name())); auto &lod_tensor = var->Get(); if (i == 0) { numel = static_cast(lod_tensor.numel()); // only enforce place0, we will enforce other palce numel == place0 numel PADDLE_ENFORCE_GT( - numel, 0, platform::errors::InvalidArgument( - "The numel of tensos=[%s] must > 0. But now numel=[%d]", - in_var_handles[i]->name(), numel)); + numel, 0, + platform::errors::PreconditionNotMet( + "The numel of tensor %s should be > 0, but got numel is %d.", + in_var_handles[i]->name(), numel)); dtype = lod_tensor.type(); is_gpu_place = platform::is_gpu_place(lod_tensor.place()); } - PADDLE_ENFORCE_EQ(numel, static_cast(lod_tensor.numel())); - PADDLE_ENFORCE_EQ(dtype, lod_tensor.type()); - PADDLE_ENFORCE_EQ(is_gpu_place, platform::is_gpu_place(lod_tensor.place())); + PADDLE_ENFORCE_EQ( + numel, static_cast(lod_tensor.numel()), + platform::errors::PreconditionNotMet( + "The size of tensors of the same variable in different local " + "scopes should be equal.")); + PADDLE_ENFORCE_EQ( + dtype, lod_tensor.type(), + platform::errors::PreconditionNotMet( + "The dtype of tensors of the same variable in different local " + "scopes should be equal.")); + PADDLE_ENFORCE_EQ(is_gpu_place, platform::is_gpu_place(lod_tensor.place()), + platform::errors::PreconditionNotMet( + "The place type of tensors of the same variable " + "in different local scopes should be equal.")); lod_tensor_data.emplace_back(lod_tensor.data()); places.emplace_back(lod_tensor.place()); @@ -102,8 +139,12 @@ void AllReduceOpHandle::AllReduceImpl( VLOG(10) << "place:" << i << ", input_name:" << in_var_handles[i]->name() << ", out_name:" << out_var_handles[i]->name(); - PADDLE_ENFORCE_EQ(in_var_handles[i]->name(), out_var_handles[i]->name(), - "The name of input and output should be equal."); + PADDLE_ENFORCE_EQ( + in_var_handles[i]->name(), out_var_handles[i]->name(), + platform::errors::InvalidArgument( + "The name of input and output of all_reduce op should be equal, " + "but got input is %s and output is %s.", + in_var_handles[i]->name(), out_var_handles[i]->name())); } std::vector grad_var_names; @@ -122,7 +163,9 @@ void AllReduceOpHandle::AllReduceFunc( const std::vector &out_var_names) { if (is_gpu_place(places[0])) { #if defined(PADDLE_WITH_NCCL) - PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, "nccl_ctxs should not be nullptr."); + PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, + platform::errors::InvalidArgument( + "The nccl context should not be NULL.")); ncclDataType_t nccl_dtype = platform::ToNCCLDataType(dtype); std::vector> all_reduce_calls; for (size_t i = 0; i < local_exec_scopes_.size(); ++i) { @@ -134,7 +177,8 @@ void AllReduceOpHandle::AllReduceFunc( } NCCLAllReduceFunc(all_reduce_calls); #else - PADDLE_THROW("Not compiled with CUDA."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { // Special handle CPU only Operator's gradient. Like CRF auto &trg = *local_exec_scopes_[0] diff --git a/paddle/fluid/framework/details/async_ssa_graph_executor.cc b/paddle/fluid/framework/details/async_ssa_graph_executor.cc index d42bd0b16d7a84987517326af9567809fd29da4d..12c0d6749029c657a829e8d2b04a2113fbe8946a 100644 --- a/paddle/fluid/framework/details/async_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/async_ssa_graph_executor.cc @@ -89,8 +89,19 @@ AsyncSSAGraphExecutor::AsyncSSAGraphExecutor( places_(std::move(places)), graphs_(std::move(graphs)) { VLOG(3) << "build AsyncSSAGraphExecutor"; - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); - PADDLE_ENFORCE_EQ(local_scopes_.size(), local_exec_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); + PADDLE_ENFORCE_EQ( + local_scopes_.size(), local_exec_scopes_.size(), + platform::errors::InvalidArgument( + "The number of local scopes and the number of local execution scopes " + "should be equal, but got number of local scopes is %d and " + "number of local execution scopes is %d.", + local_scopes_.size(), local_exec_scopes_.size())); // set the correct size of thread pool to each device. strategy_.num_threads_ = strategy_.num_threads_ < places_.size() diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 5388df6bc504203abb57237f2d23a324367ce087..01d496d4ea7f7f0d0347b552e13d988fdc68e0c7 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -19,6 +19,7 @@ #include #include #include + #include "boost/optional.hpp" #include "paddle/fluid/framework/ir/pass_builder.h" #include "paddle/fluid/framework/program_desc.h" @@ -119,6 +120,9 @@ struct BuildStrategy { // Turn on inplace by default. bool enable_inplace_{true}; + // Turn off inplace addto by default. + bool enable_addto_{false}; + // FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode, // num_trainers is 1, so the current fields of build_strategy doesn't tell if // it's distributed model. diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index e440dff2af6b5649d34f47c3b696edeb8a1ba0a2..7f1d3c9b340c9ee92c45c038bf42cf409d535158 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/fetch_async_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" @@ -48,7 +50,9 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( bootstrap_ops_.emplace_back(op); } } - PADDLE_ENFORCE_GT(op_deps_.size(), 0, "The graph doesn't have operators."); + PADDLE_ENFORCE_GT(op_deps_.size(), 0, + platform::errors::PreconditionNotMet( + "The graph doesn't have operators.")); PrepareAtomicOpDeps(); } diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index ae69960ef78c3e35143c66226133bd0dceac8b79..aedb8db46a5d9c90f176588d1dfd206e0abaf616 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/details/fetch_op_handle.h" + #include #include #include + #include "paddle/fluid/platform/profiler.h" namespace paddle { @@ -138,8 +140,10 @@ void FetchOpHandle::RunImpl() { auto *var_handle = static_cast(inputs_[i]); auto &scope = scopes.at(var_handle->scope_idx()); auto *var = scope->FindVar(var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope", - var_handle->name()); + PADDLE_ENFORCE_NOT_NULL( + var, + platform::errors::NotFound( + "Cannot find variable %s in execution scope.", var_handle->name())); if (var->IsType()) { auto &t = var->Get(); diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 35fe5d631fbaad61ce64ccf70d58d176aa3d3a20..459bcff5c0b740be0d495a6ad648da7424bd1a42 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/op_handle_base.h" + #include #include @@ -88,6 +89,12 @@ void OpHandleBase::Run(bool use_cuda) { PADDLE_ENFORCE(!use_cuda); #endif + // skip running current op, used with inplace_addto_op_pass + if (skip_running_) { + VLOG(4) << "skip running: " << Name(); + return; + } + RunImpl(); } diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index c5aa1295aad695175e53b17d729006ffc67ce3ab..097f54d5d5891390fdd479d3e6f62ae0e97cd0d4 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/platform/device_context.h" @@ -52,6 +53,10 @@ class OpHandleBase { virtual Priority GetPriority() const { return kNormal; } + virtual bool GetSkipRunning() const { return skip_running_; } + + virtual void SetSkipRunning(bool skip_runing) { skip_running_ = skip_runing; } + virtual std::string Name() const = 0; void Run(bool use_cuda); @@ -131,6 +136,7 @@ class OpHandleBase { std::map dev_ctxes_; std::vector local_exec_scopes_; + bool skip_running_ = false; #ifdef PADDLE_WITH_CUDA std::unordered_map events_; diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc index e7d466c4af0711219c5a10a4c739ae3eb998e27d..35834fe5d7480819311a15ec54ab9412fc0a7cee 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h" + #include #include #include + #include "paddle/fluid/framework/ir/graph_helper.h" namespace paddle { @@ -104,7 +106,12 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( places_(places), graphs_(std::move(graphs)), feed_status_(places.size(), FeedStatus::kNone) { - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); PADDLE_ENFORCE_EQ(places_.size(), graphs_.size(), platform::errors::InvalidArgument( diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index fe86d002ca8b33695839be3c5d2ff5fd20672952..7cc1f54131416ed454846c75c8c8a6849ec20e6c 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" + #include #include #include #include + #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" @@ -37,7 +39,13 @@ ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( var_infos_(std::move(var_infos)), places_(std::move(places)), scope_monitor_(places_, local_exec_scopes_) { - PADDLE_ENFORCE_EQ(local_scopes_.size(), local_exec_scopes_.size()); + PADDLE_ENFORCE_EQ( + local_scopes_.size(), local_exec_scopes_.size(), + platform::errors::InvalidArgument( + "The number of local scopes and the number of local execution scopes " + "should be equal, but got number of local scopes is %d and " + "number of local execution scopes is %d.", + local_scopes_.size(), local_exec_scopes_.size())); PrepareLocalExeScopes(); } diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index 6fdec553f3d65debdf8f6d95eeeb8ebe30b4a36a..5fbaf3cbfe028638ad9219d9e1286480ae16ee6b 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" + #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/enforce.h" @@ -29,7 +31,8 @@ static inline const Tensor &GetTensorFromVar(const Variable *var) { if (var->IsType()) { return var->Get(); } else { - PADDLE_THROW("Variable must be type of LoDTensor"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable must be type of LoDTensor.")); } } @@ -37,20 +40,27 @@ static inline Tensor *GetMutableTensorFromVar(Variable *var) { if (var->IsType()) { return var->GetMutable(); } else { - PADDLE_THROW("Variable must be type of LoDTensor"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable must be type of LoDTensor.")); } } ShareTensorBufferFunctor::ShareTensorBufferFunctor( Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names) + const std::vector &out_var_names, bool share_dims) : scope_(scope), scope_idx_(scope_idx), op_type_(op_type), in_var_infos_(in_var_infos), - out_var_names_(out_var_names) { - PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size()); + out_var_names_(out_var_names), + share_dims_(share_dims) { + PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size(), + platform::errors::PreconditionNotMet( + "The number of input variables and output variables " + "should be equal, but got number of input variables is " + "%d and number of output variables is %d.", + in_var_infos_.size(), out_var_names_.size())); for (size_t i = 0; i < in_var_infos_.size(); ++i) { AddReuseVarPair(in_var_infos_[i], out_var_names_[i]); } @@ -67,32 +77,59 @@ ShareTensorBufferFunctor::ReusedVars() const { void ShareTensorBufferFunctor::AddReuseVarPair( const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name) { - PADDLE_ENFORCE_NOT_NULL(in_var_info, "in_var_info cannot be nullptr"); + PADDLE_ENFORCE_NOT_NULL( + in_var_info, + platform::errors::InvalidArgument( + "The input variables to be inplaced should not be NULL.")); PADDLE_ENFORCE_NE(in_var_info->Name(), out_var_name, - "in/out cannot have same name: %s", out_var_name); + platform::errors::InvalidArgument( + "The input variable and output variable to be inplaced " + "cannot have the same name: %s.", + out_var_name)); in_var_infos_.emplace_back(in_var_info); out_var_names_.emplace_back(out_var_name); } void ShareTensorBufferFunctor::CallOnce() { - PADDLE_ENFORCE(in_out_vars_.empty(), "in_out_vars_ must be initialized here"); + PADDLE_ENFORCE(in_out_vars_.empty(), + platform::errors::InvalidArgument( + "The input-output variable pairs to be " + "inplaced should be initialized here.")); for (size_t i = 0; i < in_var_infos_.size(); ++i) { auto *in_var = exec_scope_->FindVar(in_var_infos_[i]->Name()); auto *out_var = exec_scope_->FindVar(out_var_names_[i]); - PADDLE_ENFORCE_NOT_NULL(in_var); - PADDLE_ENFORCE_NOT_NULL(out_var); - PADDLE_ENFORCE_NE(in_var, out_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "The input variable(%s)to be inplaced should not be NULL.", + in_var_infos_[i]->Name())); + PADDLE_ENFORCE_NOT_NULL( + out_var, + platform::errors::NotFound( + "The output variable(%s) to be inplaced should not be NULL.", + out_var_names_[i])); + PADDLE_ENFORCE_NE( + in_var, out_var, + platform::errors::PreconditionNotMet( + "The input variable and output variable to be inplaced " + "cannot be the same variable(%s).", + out_var_names_[i])); in_out_vars_.emplace_back(in_var, out_var); } } void ShareTensorBufferFunctor::operator()(Scope *exec_scope) { if (!exec_scope_) { - PADDLE_ENFORCE_NOT_NULL(exec_scope); + PADDLE_ENFORCE_NOT_NULL(exec_scope, + platform::errors::InvalidArgument( + "The given execution scope should not be NULL " + "if the cached scope is NULL.")); exec_scope_ = exec_scope; CallOnce(); } else { - PADDLE_ENFORCE(exec_scope_ == exec_scope, "Scope must be the same"); + PADDLE_ENFORCE_EQ(exec_scope_, exec_scope, + platform::errors::InvalidArgument( + "The given execution scope and the cached execution " + "scope should be the same.")); } for (size_t i = 0; i < in_var_infos_.size(); ++i) { @@ -115,6 +152,13 @@ void ShareTensorBufferFunctor::operator()(Scope *exec_scope) { } else { out_tensor->ShareBufferWith(in_tensor); + // NOTE(zhiqiu): In the case of inplace addto, if the operator of + // the in_out_vars is skipped during running, we should set the dims of + // output as the same as input. + if (share_dims_) { + out_tensor->Resize(in_tensor.dims()); + } + VLOG(2) << "Share tensor buffer when running " << op_type_ << " : " << in_var_info->Name() << " -> " << out_var_names_[i]; } diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h index 774dcd056e59bc8f090a5ceb916e73843c8c9df6..be49d1c432b2ab2b9741d873ba005b400e9f0829 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/scope.h" @@ -40,11 +41,13 @@ class ShareTensorBufferFunctor { ShareTensorBufferFunctor( Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names); + const std::vector &out_var_names, bool share_dims = false); void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name); + void SetShareDims(bool share_dims) { share_dims_ = share_dims; } + void operator()(Scope *exec_scope); std::unordered_map ReusedVars() const; @@ -66,6 +69,11 @@ class ShareTensorBufferFunctor { std::vector out_var_names_; std::vector> in_out_vars_; + + // NOTE(zhiqiu): In the case of inplace addto, if the operator of + // the in_out_vars is skipped during running, we should set the dims of output + // as the same as input. + bool share_dims_{false}; }; } // namespace details diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc index f06507257f1e9fc8b1783201adb533ec7b032c09..be3f5515a971900258ab5914b579deffe5d5b7d6 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" + #include #include + #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -32,26 +34,35 @@ ComputationOpHandle *GetUniquePendingComputationOpHandle( for (ir::Node *pending_op : out_var->outputs) { auto &op = pending_op->Wrapper(); auto *compute_op = dynamic_cast(&op); - PADDLE_ENFORCE_NOT_NULL(compute_op); + PADDLE_ENFORCE_NOT_NULL( + compute_op, + platform::errors::PreconditionNotMet( + "The pending OpHandle should be ComputationOpHandle.")); if (result_op == nullptr) { result_op = compute_op; } else { - PADDLE_ENFORCE_EQ(result_op, compute_op); + PADDLE_ENFORCE_EQ( + result_op, compute_op, + platform::errors::PreconditionNotMet( + "The pending OpHandle should be the unique one.")); } } } - PADDLE_ENFORCE_NOT_NULL(result_op); + PADDLE_ENFORCE_NOT_NULL(result_op, + platform::errors::PreconditionNotMet( + "The pending OpHandle should not be NULL.")); return result_op; } ShareTensorBufferOpHandle::ShareTensorBufferOpHandle( ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names) + const std::vector &out_var_names, bool share_dims) : OpHandleBase(node), - functor_(scope, scope_idx, op_type, in_var_infos, out_var_names) {} + functor_(scope, scope_idx, op_type, in_var_infos, out_var_names, + share_dims) {} std::unordered_map ShareTensorBufferOpHandle::ReusedVars() const { @@ -63,6 +74,10 @@ void ShareTensorBufferOpHandle::AddReuseVarPair( functor_.AddReuseVarPair(in_var_info, out_var_name); } +void ShareTensorBufferOpHandle::SetShareDims(bool share_dims) { + functor_.SetShareDims(share_dims); +} + void ShareTensorBufferOpHandle::InitCUDA() { #ifdef PADDLE_WITH_CUDA int dev_id = diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h index b22f5621fe44d887d70d82ce4dc9e26596d23f4e..a02c346485eca813f0d0f0b432b8b647e2fe4414 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" @@ -31,7 +32,7 @@ class ShareTensorBufferOpHandle : public OpHandleBase { ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_vars_infos, - const std::vector &out_var_names); + const std::vector &out_var_names, bool share_dims = false); std::unordered_map ReusedVars() const; @@ -42,6 +43,8 @@ class ShareTensorBufferOpHandle : public OpHandleBase { void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name); + void SetShareDims(bool share_dims); + const ShareTensorBufferFunctor &Functor() const { return functor_; } protected: diff --git a/paddle/fluid/framework/details/ssa_graph_executor.cc b/paddle/fluid/framework/details/ssa_graph_executor.cc index 71123f708e3ca149d9fd634f55652cede5a57b50..2723a46dcfae3582a9286bcacba8d2e0a4990ac5 100644 --- a/paddle/fluid/framework/details/ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/ssa_graph_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/ssa_graph_executor.h" + #include "paddle/fluid/framework/details/fetch_async_op_handle.h" namespace paddle { @@ -27,8 +28,9 @@ void ClearFetchOp(ir::Graph* graph, std::vector* fetch_ops) { PADDLE_ENFORCE_EQ(dynamic_cast(op) != nullptr || dynamic_cast(op) != nullptr, true, - "The input ops of ClearFetchOp function should be " - "FetchOpHandle or FetchAsyncOpHandle."); + platform::errors::PreconditionNotMet( + "The input ops of ClearFetchOp function should be " + "FetchOpHandle or FetchAsyncOpHandle.")); for (auto& out_var : op->Node()->outputs) { graph->RemoveNode(out_var); } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 92c3a0cd6b9c01497199fece0a9bdafc89f64678..2ed52b3bd94733e329ccf8270054b23b1ad29d87 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/platform/profiler.h" @@ -138,7 +139,10 @@ inline FetchResultType ThreadedSSAGraphExecutor::RunImpl( } } } - PADDLE_ENFORCE(ready_ops.empty()); + PADDLE_ENFORCE_EQ( + ready_ops.empty(), true, + platform::errors::Fatal("After the execution of computation graph, " + "there are unexecuted operators left.")); } // Wait FetchOps. @@ -165,9 +169,8 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( FetchResultType *fetch_data, bool return_merged) { std::unordered_map> fetched_vars; std::unordered_set local_ready_vars; - std::unordered_set fetch_tensor_set(fetch_tensors.begin(), - fetch_tensors.end()); - for (auto &fetch_var_name : fetch_tensor_set) { + + for (auto &fetch_var_name : fetch_tensors) { for (auto &var_map : graph_->Get(details::kGraphVars)) { auto it = var_map.find(fetch_var_name); if (it != var_map.end()) { @@ -231,7 +234,11 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( ready_ops->insert(static_cast(op)); } } - PADDLE_ENFORCE_EQ(local_ready_vars.size(), 0); + PADDLE_ENFORCE_EQ( + local_ready_vars.size(), 0, + platform::errors::Fatal( + "The number of ready variables should be 0, but got %d.", + local_ready_vars.size())); } void ThreadedSSAGraphExecutor::InsertPendingOp( @@ -277,7 +284,9 @@ void ThreadedSSAGraphExecutor::PrepareOpDeps() { } } op_deps_->num_ops_ = ready_ops.size() + pending_ops.size(); - PADDLE_ENFORCE_GT(op_deps_->num_ops_, 0, "The graph doesn't have operators."); + PADDLE_ENFORCE_GT( + op_deps_->num_ops_, 0, + platform::errors::InvalidArgument("The graph doesn't have operators.")); for (auto ready_var : ready_vars) { pending_vars.erase(ready_var); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index b8b584f27200bd3f89efcc20be2c6a3435274a56..45fa3adbf14080317fe004a7113b58d34145447d 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -14,6 +14,8 @@ #pragma once +#include // ThreadPool in thrird party + #include #include #include @@ -24,8 +26,6 @@ #include #include -#include // ThreadPool in thrird party - #include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/details/exception_holder.h" #include "paddle/fluid/framework/details/execution_strategy.h" diff --git a/paddle/fluid/framework/details/var_handle.h b/paddle/fluid/framework/details/var_handle.h index 86428f8b7613760f59a1166189c61f3217d8017d..bb38424d3ae2d74f6f0a48e11df95b60dbf432f3 100644 --- a/paddle/fluid/framework/details/var_handle.h +++ b/paddle/fluid/framework/details/var_handle.h @@ -54,8 +54,10 @@ struct VarHandleBase { void AddOutput(OpHandleBase* out, ir::Node* node) { if (pending_ops_.find(out) == pending_ops_.end()) { - PADDLE_ENFORCE(out != nullptr, "The output of %s should not be nullptr", - this->Node()->Name()); + PADDLE_ENFORCE_NOT_NULL(out, + platform::errors::InvalidArgument( + "The output added to VarHandle %s is NULL.", + this->Node()->Name())); pending_ops_.insert(out); node_->outputs.push_back(node); } @@ -120,7 +122,10 @@ struct VarHandle : public VarHandleBase { bool HasEvent() { return has_event_; } const cudaEvent_t& GetEvent() { - PADDLE_ENFORCE(HasEvent(), "The event is not set."); + PADDLE_ENFORCE_EQ( + HasEvent(), true, + platform::errors::PreconditionNotMet( + "The cuda event is not set, maybe InitCUDA() is not called.")); return event_; } diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc index 134f759081a0778194c20785e215420d6e2bb622..fba0c1bf463ee0b9a434c350474af4be0c589e30 100644 --- a/paddle/fluid/framework/details/variable_visitor.cc +++ b/paddle/fluid/framework/details/variable_visitor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/variable_visitor.h" + #include "paddle/fluid/framework/selected_rows.h" namespace paddle { namespace framework { @@ -24,7 +25,9 @@ static void VisitVariable(Variable* var, Func* func) { } else if (var->IsType()) { (*func)(var->GetMutable()); } else { - PADDLE_THROW("Not supported type %s", ToTypeName(var->Type())); + PADDLE_THROW(platform::errors::Unimplemented( + "VisitVariable is not supported for type %s.", + ToTypeName(var->Type()))); } } @@ -35,7 +38,8 @@ static void VisitVariable(const Variable& var, Func* func) { } else if (var.IsType()) { (*func)(var.Get()); } else { - PADDLE_THROW("Not supported type %s", ToTypeName(var.Type())); + PADDLE_THROW(platform::errors::Unimplemented( + "VisitVariable is not supported for type %s.", ToTypeName(var.Type()))); } } @@ -50,7 +54,8 @@ struct TensorVisitor { template void operator()() { - PADDLE_THROW("Not Support to get LoDTensor from %s", typeid(T).name()); + PADDLE_THROW(platform::errors::Unimplemented( + "Getting tensor from type %s is not supported.", typeid(T).name())); } }; @@ -78,8 +83,8 @@ struct ShareDimsAndLoDVisitor { template void operator()(const T&) { - PADDLE_ENFORCE("ShareDimsAndLoD is not supported by type %s", - typeid(T).name()); + PADDLE_THROW(platform::errors::Unimplemented( + "ShareDimsAndLoD is not supported for type %s.", typeid(T).name())); } }; @@ -89,42 +94,54 @@ void VariableVisitor::ShareDimsAndLoD(const Variable& src, Variable* trg) { } struct EnforceShapeAndDTypeEQVisitor { - const Variable* trg_; + const Variable* dst_; void operator()(const LoDTensor& src) { - auto& tensor = trg_->Get(); - PADDLE_ENFORCE_EQ( - src.place().which(), tensor.place().which(), - "The Places of the two Variable must be all on CPU or all on GPU."); + auto& tensor = dst_->Get(); + PADDLE_ENFORCE_EQ(src.place().which(), tensor.place().which(), + platform::errors::PreconditionNotMet( + "The place type of the two variables is not equal.")); PADDLE_ENFORCE_EQ(src.type(), tensor.type(), - "The dtype of the two Variable is not equal."); - PADDLE_ENFORCE_EQ(src.dims(), tensor.dims(), - "The dims of the two Variable is not equal."); + platform::errors::PreconditionNotMet( + "The dtype of the two variables is not equal.")); + PADDLE_ENFORCE_EQ( + src.dims(), tensor.dims(), + platform::errors::PreconditionNotMet( + "The layout of the two variables' tensors is not equal.")); PADDLE_ENFORCE_EQ(src.lod(), tensor.lod(), - "The lod of the two Variable is not equal."); - PADDLE_ENFORCE_EQ(src.layout(), tensor.layout(), - "The layout of the two Variable's tensor is not equal."); + platform::errors::PreconditionNotMet( + "The lod of the two variable is not equal.")); + PADDLE_ENFORCE_EQ( + src.layout(), tensor.layout(), + platform::errors::PreconditionNotMet( + "The layout of the two variables' tensors tensor is not equal.")); } void operator()(const SelectedRows& src) { - auto& selected_rows = trg_->Get(); - PADDLE_ENFORCE_EQ( - src.place().which(), selected_rows.place().which(), - "The Places of the two Variable must be all on CPU or all on GPU."); + auto& selected_rows = dst_->Get(); + PADDLE_ENFORCE_EQ(src.place().which(), selected_rows.place().which(), + platform::errors::PreconditionNotMet( + "The place type of the two variables is not equal.")); PADDLE_ENFORCE_EQ(src.value().type(), selected_rows.value().type(), - "The dtype of the two Variable is not equal."); - PADDLE_ENFORCE_EQ(src.value().layout(), selected_rows.value().layout(), - "The layout of the two Variable's tensor is not equal."); + platform::errors::PreconditionNotMet( + "The dtype of the two variables is not equal.")); + PADDLE_ENFORCE_EQ( + src.value().layout(), selected_rows.value().layout(), + platform::errors::PreconditionNotMet( + "The layout of the two variables' tensors is not equal.")); PADDLE_ENFORCE_EQ(src.height(), selected_rows.height(), - "The height of the two Variable is not equal."); + platform::errors::PreconditionNotMet( + "The height of the two variables is not equal.")); PADDLE_ENFORCE_EQ(src.GetCompleteDims(), selected_rows.GetCompleteDims(), - "The dims of the two Variable is not equal."); + platform::errors::PreconditionNotMet( + "The dims of the two variables is not equal.")); } template void operator()(const T&) { - PADDLE_ENFORCE("EnforceShapeAndDTypeEQ is not supported by type %s", - typeid(T).name()); + PADDLE_THROW(platform::errors::Unimplemented( + "EnforceShapeAndDTypeEQ is not supported for type %s.", + typeid(T).name())); } }; diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc index c50b7476c6a9616a784646b3ef6a43140ac2d401..02e3e2542f6e8dea47c53fd298c7ae7512a72c36 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -23,6 +23,8 @@ #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/framework/op_version_registry.h" + namespace paddle { namespace framework { namespace ir { @@ -34,7 +36,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, // Build pattern PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x")) - ->assert_is_op_input("lookup_table") + ->assert_is_op_input("lookup_table_v2") ->assert_var_not_persistable(); patterns::Embedding embedding_pattern(pattern, name_scope); // TODO(jczaja): Intermediate can only be for val that are not used anywhere @@ -256,3 +258,11 @@ void EmbeddingFCLSTMFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(embedding_fc_lstm_fuse_pass, paddle::framework::ir::EmbeddingFCLSTMFusePass); +REGISTER_PASS_CAPABILITY(embedding_fc_lstm_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("lookup_table_v2", 0) + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("lstm", 0) + .EQ("fused_embedding_fc_lstm", 0)); diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index 066a8fb975740ad5e45b4840a7404160d086b6f0..d60510a4074997a028cd914ca7a0e76335801c80 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -18,6 +18,7 @@ #include #include #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -182,3 +183,10 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const { REGISTER_PASS(fc_fuse_pass, paddle::framework::ir::FCFusePass) .RequirePassAttr("use_gpu"); +REGISTER_PASS_CAPABILITY(fc_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("relu", 0) + .EQ("fc", 0)); diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index a2185cdc5593cc36ed6ceda839fb13c28b45600c..f5fea90ac2fcee8e9c48ca21203b3b60cd7f7166 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -16,6 +16,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -125,7 +126,6 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, auto* x_n = subgraph.at(x); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); - GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, gru_pattern); GET_IR_NODE_FROM_SUBGRAPH(gru, gru, gru_pattern); GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, gru_pattern); @@ -136,10 +136,17 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, gru_pattern); GET_IR_NODE_FROM_SUBGRAPH(BatchHidden, BatchHidden, gru_pattern); + // TODO(wilber): Support origin_mode=True. + if (gru->Op()->GetAttrIfExists("origin_mode") == true) { + LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True."; + return; + } + if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias); // Remove unneeded nodes. @@ -188,3 +195,16 @@ void FCGRUFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(mul_gru_fuse_pass, paddle::framework::ir::MulGRUFusePass); REGISTER_PASS(fc_gru_fuse_pass, paddle::framework::ir::FCGRUFusePass); +REGISTER_PASS_CAPABILITY(mul_gru_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("gru", 0) + .EQ("fusion_gru", 0)); +REGISTER_PASS_CAPABILITY(fc_gru_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("gru", 0) + .EQ("fusion_gru", 0)); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 12c7fc051e23a946ec9049e061499056f009bfa3..a3c57e14e1aedbed1e4cf462d4883cd83bf2fa10 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -16,6 +16,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -196,3 +197,17 @@ void FCLstmFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(mul_lstm_fuse_pass, paddle::framework::ir::MulLstmFusePass); REGISTER_PASS(fc_lstm_fuse_pass, paddle::framework::ir::FCLstmFusePass); + +REGISTER_PASS_CAPABILITY(fc_lstm_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("lstm", 0) + .EQ("fusion_lstm", 0)); +REGISTER_PASS_CAPABILITY(mul_lstm_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("lstm", 0) + .EQ("fusion_lstm", 0)); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt index 726a2d90fcf03c3e2023485e983ea64f93231f73..a8c0973cac488ceb96249a898e819af7565c6c7a 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt @@ -13,4 +13,6 @@ cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handl cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass) cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass) +cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass) + cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op) diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc index 0b42f2ebd5555a5c73527d9819ff254411a399d4..ce7f27d27559c70cf164f6bb641fa0ee6f02a2a0 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" @@ -141,11 +142,12 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const { VLOG(4) << "Inplace performed in op " << op_type << ": " << in_var_handle_ptr->Name() << " -> " << out_var_handle_ptr->Name() - << ". Debug String is: " << op->GetOp()->DebugString(); + << ". Debug String is: " << op->GetOp()->DebugString() + << ". ReuseType: " << ReuseType(); } else { VLOG(3) << "Inplace failed in op " << op_type << ": " << in_var_handle_ptr->Name() << " -> " - << out_var_handle_ptr->Name(); + << out_var_handle_ptr->Name() << ". ReuseType: " << ReuseType(); } } } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..81c63f46bda453ec8705cf4bc93dd9e3acf844ec --- /dev/null +++ b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc @@ -0,0 +1,221 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "paddle/fluid/framework/details/computation_op_handle.h" +#include "paddle/fluid/framework/details/multi_devices_helper.h" +#include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" +#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" +#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h" +#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class InplaceAddToOpPass : public MemoryReusePass { + protected: + std::string ReuseType() const override { return "inplace_addto"; } + + void Run(Graph *graph) const override; + + private: + // 1. Add last living op of in_var, add any last living op of out_var + // 2. Set reference count of in_var to be 2 + void UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, + details::VarHandle *in_var, + details::VarHandle *out_var) const override { + size_t scope_idx = op->GetScopeIdx(); + auto *last_live_ops_of_vars_ = + &Get>(kLastLiveOpsOfVars); + auto *var_infos_ = &(Get(kMemOptVarInfoMapList)); + auto out_var_op_iter = + (*last_live_ops_of_vars_)[scope_idx].find(out_var->Name()); + + // In Reduce mode, some output variable(gradient of parameter) does not have + // last live ops + details::ComputationOpHandle *last_live_op_of_in_var = nullptr; + if (out_var_op_iter == (*last_live_ops_of_vars_)[scope_idx].end()) { + last_live_op_of_in_var = op; + } else { + PADDLE_ENFORCE_EQ( + out_var_op_iter->second.ops().empty(), false, + platform::errors::InvalidArgument( + "Var(%s)'s last live op should not empty.", out_var->Name())); + last_live_op_of_in_var = *(out_var_op_iter->second.ops().begin()); + } + + auto *last_live_ops_of_in_var = + (*last_live_ops_of_vars_)[scope_idx][in_var->Name()].mutable_ops(); + // last_live_ops_of_in_var->clear(); + last_live_ops_of_in_var->insert(last_live_op_of_in_var); + + auto in_var_info_iter = (*var_infos_)[scope_idx].find(in_var->Name()); + PADDLE_ENFORCE_NE( + in_var_info_iter, (*var_infos_)[scope_idx].end(), + platform::errors::NotFound("Cannot find variable %s.", in_var->Name())); + + in_var_info_iter->second->SetRefCnt(2); // before inplace, it is 1 + } +}; + +void InplaceAddToOpPass::Run(Graph *graph) const { + const auto &last_live_ops = + Get>(kLastLiveOpsOfVars); + + bool use_cuda = Get(kUseCuda); + + // Currently, only perform InplaceAddToOpPass on cuda place + if (!use_cuda) { + return; + } + + // Step 1: Build a reverse map of last_live_ops + // i.e.: op -> vars + std::unordered_map> + candidate_ops; + for (auto &each_scope_ops : last_live_ops) { + for (auto &pair : each_scope_ops) { + // If variable has more than 1 last lived ops, this variable cannot + // be inplaced. + if (pair.second.ops().size() != 1) { + continue; + } + + auto *op = *(pair.second.ops().begin()); + const std::string &op_type = op->GetOp()->Type(); + const framework::OpDesc *op_desc = op->Node()->Op(); + PADDLE_ENFORCE_NOT_NULL( + op_desc, platform::errors::NotFound("Op(%s) can not find opdesc.", + op->Name())); + + // only grad op should be processed. + if (op_type != "grad_add") { + continue; + } + + const std::string &var_name = pair.first; + auto in_nodes = this->FindNodesByName(var_name, op->Node()->inputs); + if (in_nodes.size() == 1) { + candidate_ops[op][var_name] = *in_nodes.begin(); + } + VLOG(4) << "Find op " << op_type << " with input(" << var_name + << ") that can do inplace add to"; + } + } + + // Step 2: Check which vars can be inplaced indeed + for (auto &op_vars_pair : candidate_ops) { + auto *op = op_vars_pair.first; + + // The original gradient accumulation is g = sum(g_0, g_1,..., g_n), and it + // could be changed as follws if inplace addto is enabled: + // g_sum_0 = g_0 + // g_sum_1 = grad_add(g_sum_0, g_1) + // g_sum_2 = grad_add(g_sum_1, g_2) + // ... + // g_sum_n = grad_add(g_sum_n-1, g_n) + + // here we will add inplace for each grad_add, for example, for the first + // grad_add, g_sum_0 -> g1, g_sum_1 -> g1, and set grad_add as skipped. + + const std::string &op_type = op->GetOp()->Type(); + + PADDLE_ENFORCE_EQ(op->Node()->inputs.size(), 2, + platform::errors::InvalidArgument( + "The size of inputs of %s should be 2, but got %d", + op_type, op->Node()->inputs.size())); + + PADDLE_ENFORCE_EQ(op->Node()->outputs.size(), 1, + platform::errors::InvalidArgument( + "The size of outputs of %s should be 1, but got %d", + op_type, op->Node()->outputs.size())); + + auto *left_var_ptr = dynamic_cast( + &(op->Node()->inputs[0]->Wrapper())); + auto *right_var_ptr = dynamic_cast( + &(op->Node()->inputs[1]->Wrapper())); + auto *out_var_ptr = dynamic_cast( + &(op->Node()->outputs[0]->Wrapper())); + + if (left_var_ptr == nullptr || right_var_ptr == nullptr || + out_var_ptr == nullptr) { + continue; + } + + // auto *left_generated_op = dynamic_cast( + // left_var_ptr->GeneratedOp()); + + auto *right_generated_op = dynamic_cast( + right_var_ptr->GeneratedOp()); + + auto *out_generated_op = dynamic_cast( + out_var_ptr->GeneratedOp()); + + // NOTE(zhiqiu): currently, only conv2d_grad supports addto strategy + if (right_generated_op->Name() != "conv2d_grad") { + continue; + } + + // NOTE(zhiqiu): Normally, if we inplace a->b, we should let a generated + // before b. However, in the situation of inplace addto, we do not care + // the order, since a+b is equal to b+a. Is there any exception for that? + + // AddDependencyVar(right_generated_op, left_generated_op); + // no need, as discussed above. + + // step (a): inplace right_var->left_var of grad_add + + this->AddReuseVar(right_generated_op, left_var_ptr, right_var_ptr); + UpdateLastLiveOpOfVar(right_generated_op, left_var_ptr, right_var_ptr); + VLOG(4) << "Inplace performed in op " << right_generated_op->GetOp()->Type() + << ": " << left_var_ptr->Name() << " -> " << right_var_ptr->Name() + << ". Debug String is: " + << right_generated_op->GetOp()->DebugString() + << ". ReuseType: " << ReuseType(); + + // step (b): inplace out -> right_var of grad_add + + this->AddReuseVar(out_generated_op, right_var_ptr, out_var_ptr, true); + + VLOG(4) << "Inplace performed in op " << op_type << ": " + << left_var_ptr->Name() << " -> " << out_var_ptr->Name() + << ". Debug String is: " << op->GetOp()->DebugString() + << ". ReuseType: " << ReuseType(); + + // step (c): make right_var cannot inplace afterwards. canbe done + // aotomatically since CollectReusedVars is called before any reuse. + + // step (d): make right_var's generated op use addto + right_generated_op->GetOp()->SetAttr("use_addto", true); + + // step (e): make grad_add skip running + op->SetSkipRunning(true); + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(inplace_addto_op_pass, paddle::framework::ir::InplaceAddToOpPass) + .RequirePassAttr(paddle::framework::ir::kMemOptVarInfoMapList) + .RequirePassAttr(paddle::framework::ir::kLastLiveOpsOfVars) + .RequirePassAttr(paddle::framework::ir::kUseCuda); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc index 221b0a76e7ef5b01d87c63fb466a9b980f1e69b4..3e3b9864a7b408267ac73de053c1692628e9a14c 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h" + #include #include #include @@ -73,6 +74,7 @@ bool MemoryReusePass::TryReuseVar(details::VarHandle *in_var, out_var->Name())); if (IsVarPairReusable(*in_var, *out_var)) { AddReuseVar(op, in_var, out_var); + UpdateLastLiveOpOfVar(op, in_var, out_var); return true; } else { return false; @@ -324,7 +326,8 @@ bool MemoryReusePass::IsVarPairReusable( void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, - details::VarHandle *out_var) const { + details::VarHandle *out_var, + bool share_dims) const { PADDLE_ENFORCE_GT( (*var_infos_)[op->GetScopeIdx()].count(in_var->Name()), 0, platform::errors::NotFound("Var(%s) does not in mem opt var infos.", @@ -344,13 +347,15 @@ void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, share_buffer_op->AddInput(in_var); } + if (share_dims) { + share_buffer_op->SetShareDims(true); + } + share_buffer_op->AddReuseVarPair( (*var_infos_)[op->GetScopeIdx()].at(in_var->Name()).get(), out_var->Name()); reused_in_var_names_[op->GetScopeIdx()].insert(in_var->Name()); reused_out_var_names_[op->GetScopeIdx()].insert(out_var->Name()); - - UpdateLastLiveOpOfVar(op, in_var, out_var); } // 1. Set last living op of in_var to be any last living op of out_var diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h index 822744191847586dc429b6896ff6f490381c5901..1c0c6ae60205b14f97bd15bceeb126d0eb54f654 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" @@ -92,6 +93,12 @@ class MemoryReusePass : public Pass { int64_t GetMemorySize(const details::VarHandle &var) const; + void AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, + details::VarHandle *out_var, bool share_dims = false) const; + virtual void UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, + details::VarHandle *in_var, + details::VarHandle *out_var) const; + private: VarDesc *GetVarDesc(const details::VarHandle &var) const; @@ -109,13 +116,6 @@ class MemoryReusePass : public Pass { void CollectReusedVars() const; - void AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, - details::VarHandle *out_var) const; - - void UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, - details::VarHandle *in_var, - details::VarHandle *out_var) const; - private: mutable Graph *graph_; mutable bool use_cuda_; diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc index 23f794c11c239225b31cea8a7e7f11f576c87081..9f6032ffa5b87daece107ad6bd3d5f9444719e44 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc @@ -176,7 +176,8 @@ void BuildRepeatedFCReluPattern(PDPattern* pattern, return false; } if (x->IsVar() && x->Var() && x->Var()->GetShape().size() > 2) { - LOG(WARNING) << "repeated fc relu only supports input dims = 2"; + VLOG(3) << "repeated fc relu only supports input dims = 2, so it " + "is not applied."; return false; } int fc_idx = FindFCIdx(x); diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc index 1485a84d001acef8542a9dda5436cfeb57518d69..75ab04f1b9130dccd42cea39dc0e074e2e2838eb 100644 --- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc @@ -16,6 +16,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -98,3 +99,9 @@ void SeqConvEltAddReluFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(seqconv_eltadd_relu_fuse_pass, paddle::framework::ir::SeqConvEltAddReluFusePass); +REGISTER_PASS_CAPABILITY(seqconv_eltadd_relu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("sequence_conv", 0) + .EQ("elementwise_add", 0) + .EQ("relu", 0)); diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index 74ba0093a17beb5d30cd0234faf948d8a7dd620d..8bdf3940928c768fc7b0a9c7fa3d084d95f60859 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -35,8 +35,6 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { const std::string pattern_name = "shufflechannel_pattern"; FusePassBase::Init(pattern_name, graph); - LOG(WARNING) << "There is fluid.layers.shuffle_channel API already, you can " - "use it instead of (reshape + transpose +reshape)"; GraphPatternDetector gpd; auto* x = gpd.mutable_pattern() ->NewNode("x") @@ -85,6 +83,9 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { // Delete the unneeded nodes. GraphSafeRemoveNodes(graph, {reshape1_op, reshape1_out, transpose_op, transpose_out, reshape2_op}); + LOG_FIRST_N(WARNING, 1) + << "There is fluid.layers.shuffle_channel API already, maybe you can " + "use it instead of (reshape + transpose + reshape)"; }; gpd(graph, handler); diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc index 035b198bdcc51800be62acce58a538145413e92f..d74843611cdd238f1fb78153e6b946ae8a1c8473 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc @@ -17,6 +17,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -77,7 +78,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, }; auto is_fusion_input_var = [=](Node* x, const std::string& arg_name) { - bool basic = var_is_op_input(x, "matmul", arg_name) && + bool basic = (var_is_op_input(x, "matmul_v2", arg_name) || + var_is_op_input(x, "matmul", arg_name)) && var_is_op_input(x, "square", "X"); if (!basic) { return false; @@ -88,7 +90,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, } auto* squared_x = squared_x_op->outputs[0]; bool next_is_matmul_from_arg = - var_is_op_input(squared_x, "matmul", arg_name) && + (var_is_op_input(squared_x, "matmul_v2", arg_name) || + var_is_op_input(squared_x, "matmul", arg_name)) && squared_x->outputs.size() == 1 && squared_x->outputs[0]->outputs.size() == 1; if (!next_is_matmul_from_arg) { @@ -103,7 +106,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto is_fusion_first_mul_out = [=](Node* x) -> bool { bool input_is_matmul_op = x && x->inputs.size() == 1 && x->inputs[0]->IsOp() && - x->inputs[0]->Op()->Type() == "matmul"; + (x->inputs[0]->Op()->Type() == "matmul_v2" || + x->inputs[0]->Op()->Type() == "matmul"); if (!input_is_matmul_op) { return false; } @@ -167,7 +171,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto* matmul_xy_op = pattern->NewNode( [=](Node* x) { - return x && x->IsOp() && x->Op()->Type() == "matmul" && + return x && x->IsOp() && (x->Op()->Type() == "matmul_v2" || + x->Op()->Type() == "matmul") && is_fusion_first_mul_out(x->outputs[0]); }, name_scope + "/matmul_xy_op"); @@ -189,7 +194,9 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto is_fusion_mat_squared_x_y_op_out = [=](Node* x) -> bool { bool basic = x && x->IsVar() && x->inputs.size() == 1 && - x->inputs[0]->IsOp() && x->inputs[0]->Op()->Type() == "matmul"; + x->inputs[0]->IsOp() && + (x->inputs[0]->Op()->Type() == "matmul_v2" || + x->inputs[0]->Op()->Type() == "matmul"); if (!basic) { return false; } @@ -206,7 +213,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto* matmul_squared_x_y_op = pattern->NewNode( [=](Node* x) { - return x && x->IsOp() && x->Op()->Type() == "matmul" && + return x && x->IsOp() && (x->Op()->Type() == "matmul_v2" || + x->Op()->Type() == "matmul") && is_fusion_mat_squared_x_y_op_out(x->outputs[0]); }, name_scope + "/matmul_squared_x_y_op"); @@ -378,3 +386,13 @@ void SquaredMatSubFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(squared_mat_sub_fuse_pass, paddle::framework::ir::SquaredMatSubFusePass); +REGISTER_PASS_CAPABILITY(squared_mat_sub_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("matmul", 0) + .EQ("matmul_v2", 0) + .EQ("square", 0) + .EQ("elementwise_mul", 0) + .EQ("elementwise_sub", 0) + .EQ("fill_constant", 0) + .EQ("fusion_squared_mat_sub", 0)); diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h index b6165a512acdb9b6e3bdbf49196692ef83edb58f..56b7ec9b84314bd3634c406c31e20dd421f7fa92 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h @@ -24,7 +24,7 @@ namespace framework { namespace ir { /** - * Fuse ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar + * Fuse ( (A * B).^2 - (A.^2 * B.^2) ) .* scalar */ class SquaredMatSubFusePass : public FusePassBase { public: diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index ebecbf0498c384a55627e2b5cb31304d098a444c..bd52d7ffef5040f596bfb5ca9521a6e1062bb5aa 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -157,6 +157,14 @@ class OperatorBase { platform::errors::NotFound("(%s) is not found in AttributeMap.", name)); return BOOST_GET_CONST(T, attrs_.at(name)); } + void SetAttr(const std::string& name, const Attribute& v) { + PADDLE_ENFORCE_EQ( + HasAttr(name), true, + platform::errors::NotFound( + "The attribute %s is not found in operator %s", name, Type())); + + attrs_[name] = v; + } const AttributeMap& Attrs() const { return attrs_; } const VariableNameMap& Inputs() const { return inputs_; } diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 12e0f97f1262ca0f6bf8fc70ab5b482fb0bdd305..535ec9cd7d950588fd7877d0913e3e851f8fe8dc 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -13,12 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/details/async_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" @@ -108,6 +110,11 @@ class ParallelExecutorPrivate { * them. */ inline void SetSkipMemoryReuse(size_t scope_idx, const std::string &name) { + if (mem_opt_var_infos_.size() == 0) { + VLOG(4) << "The mem_opt_var_infos_ is empty, maybe no memory " + "optimization strategy is enabled"; + return; + } auto iter = mem_opt_var_infos_[scope_idx].find(name); if (iter != mem_opt_var_infos_[scope_idx].end()) { iter->second->SetSkipMemoryReuse(true); @@ -308,6 +315,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { } bool need_mem_opt = build_strategy_.enable_inplace_ || + build_strategy_.enable_addto_ || build_strategy_.memory_optimize_.get() || is_gc_enabled; if (!need_mem_opt) return graph; @@ -320,6 +328,16 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { graph = ref_cnt_pass->Apply(graph); VLOG(10) << "ReferenceCountPass Applied"; + if (build_strategy_.enable_addto_) { + auto addto_pass = ir::PassRegistry::Instance().Get("inplace_addto_op_pass"); + addto_pass->SetNotOwned(ir::kMemOptVarInfoMapList, &mem_opt_var_infos_); + addto_pass->SetNotOwned(ir::kLastLiveOpsOfVars, &last_live_ops_of_vars); + addto_pass->SetNotOwned(ir::kUseCuda, &use_cuda_); + VLOG(10) << "Start to apply inplace_addto_op_pass"; + graph = addto_pass->Apply(graph); + VLOG(10) << "inplace_addto_op_pass Applied"; + } + if (build_strategy_.enable_inplace_) { auto inplace_pass = ir::PassRegistry::Instance().Get("buffer_shared_inplace_pass"); @@ -1068,3 +1086,4 @@ USE_PASS(reference_count_pass); USE_PASS(eager_deletion_pass); USE_PASS(buffer_shared_inplace_pass); USE_PASS(buffer_shared_cross_op_memory_reuse_pass); +USE_PASS(inplace_addto_op_pass); diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 9dc96fdfe8622e3e78673664637ab50970fe93c6..cf6fcb7b64365b382c648dd83639e0c44670014d 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -44,10 +44,11 @@ add_subdirectory(api) set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg}) -if(WIN32) +# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU +if(WIN32 AND WITH_GPU) cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_API}) else() - create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) + create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) endif() if(NOT APPLE AND NOT WIN32) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ac914700643af2e7e8eca5dcf0bdf8de88e320d6..42e62011f84c18b875a3fa48b95a05f152fb5791 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1048,6 +1048,7 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { + LOG(WARNING) << "Deprecated. Please use CreatePredictor instead."; return CreatePaddlePredictor( config); } diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index ca0a5148f0622a8c848cb18afb94f600a547bbfe..c78cdf24dec561f5fd5643cb50ee243a58b3ab6a 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -373,6 +373,7 @@ std::unique_ptr CreatePaddlePredictor< template <> std::unique_ptr CreatePaddlePredictor( const NativeConfig &config) { + LOG(WARNING) << "Deprecated. Please use CreatePredictor instead."; return CreatePaddlePredictor(config); } diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 08a1a5428193c2d506f511112e4a26d73c382ff1..6a3760e1f749b2b4875df00b01def57c979b3c93 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -51,8 +51,8 @@ if (WIN32) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + safe_set_static_flag() if (WITH_STATIC_LIB) - safe_set_static_flag() add_definitions(-DSTATIC_LIB) endif() endif() @@ -136,7 +136,7 @@ else() set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} glog gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) - set(DEPS ${DEPS} libcmt shlwapi.lib) + set(DEPS ${DEPS} shlwapi.lib) endif(NOT WIN32) if(WITH_GPU) diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index d8d9e2187815dcad78ad4ea6be10ad677940bf39..a3e7bec398af7e193a75395ad40175336f5f7503 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -6,8 +6,8 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode DATA_DIR=$4 # dataset TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, default to /usr/local/TensorRT/include TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib - -inference_install_dir=${PADDLE_ROOT}/build/fluid_inference_install_dir +MSVC_STATIC_CRT=$7 +inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir cd `dirname $0` current_dir=`pwd` @@ -66,43 +66,54 @@ mkdir -p build cd build rm -rf * -if [ $(echo `uname` | grep "Win") != "" ]; then - # -----simple_on_word2vec on windows----- - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ - -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=OFF - msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln - Release/simple_on_word2vec.exe \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=False - if [ $? -ne 0 ]; then - echo "simple_on_word2vec demo runs fail." - exit 1 - fi - - # -----vis_demo on windows----- - rm -rf * - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ - -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=OFF - msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln - for vis_demo_name in $vis_demo_list; do - Release/vis_demo.exe \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=False - if [ $? -ne 0 ]; then - echo "vis demo $vis_demo_name runs fail." - exit 1 +for WITH_STATIC_LIB in ON OFF; do + if [ $(echo `uname` | grep "Win") != "" ]; then + # TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready. + if [ $TEST_GPU_CPU == ON] && [ $WITH_STATIC_LIB ==ON ]; then + return 0 fi - done -else - for WITH_STATIC_LIB in ON OFF; do + + # -----simple_on_word2vec on windows----- + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=simple_on_word2vec \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ + -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln + for use_gpu in $use_gpu_list; do + Release/simple_on_word2vec.exe \ + --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ + --use_gpu=$use_gpu + if [ $? -ne 0 ]; then + echo "simple_on_word2vec demo runs fail." + exit 1 + fi + done + + # -----vis_demo on windows----- + rm -rf * + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=vis_demo \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ + -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln + for use_gpu in $use_gpu_list; do + for vis_demo_name in $vis_demo_list; do + Release/vis_demo.exe \ + --modeldir=$DATA_DIR/$vis_demo_name/model \ + --data=$DATA_DIR/$vis_demo_name/data.txt \ + --refer=$DATA_DIR/$vis_demo_name/result.txt \ + --use_gpu=$use_gpu + if [ $? -ne 0 ]; then + echo "vis demo $vis_demo_name runs fail." + exit 1 + fi + done + done + else # -----simple_on_word2vec on linux/mac----- rm -rf * cmake .. -DPADDLE_LIB=${inference_install_dir} \ @@ -123,7 +134,6 @@ else fi done fi - # ---------vis_demo on linux/mac--------- rm -rf * cmake .. -DPADDLE_LIB=${inference_install_dir} \ @@ -145,7 +155,6 @@ else fi done done - # --------tensorrt mobilenet on linux/mac------ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then rm -rf * @@ -167,6 +176,6 @@ else exit 1 fi fi - done -fi + fi +done set +x diff --git a/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat b/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat index 5199b83413af87eacba6f26f4fc0a9acb6a39808..523dafa6649b9faa019edc1c1926b5fa408e03d5 100644 --- a/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat +++ b/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat @@ -21,7 +21,7 @@ if /i "%use_mkl%"=="N" ( ) :set_paddle_infernece_lib -SET /P paddle_infernece_lib="Please input the path of paddle inference library, such as D:\fluid_inference_install_dir =======>" +SET /P paddle_infernece_lib="Please input the path of paddle inference library, such as D:\paddle_inference_install_dir =======>" set tmp_var=!paddle_infernece_lib! call:remove_space set paddle_infernece_lib=!tmp_var! diff --git a/paddle/fluid/inference/api/paddle_infer_declare.h b/paddle/fluid/inference/api/paddle_infer_declare.h index 39c9653f16cefb71a9f2a0ddcc08723d189d411c..e8525f440fe7f2d54d045eedb79aed228513e550 100644 --- a/paddle/fluid/inference/api/paddle_infer_declare.h +++ b/paddle/fluid/inference/api/paddle_infer_declare.h @@ -17,11 +17,7 @@ #if defined(_WIN32) #ifndef PD_INFER_DECL #ifdef PADDLE_DLL_INFERENCE -#ifndef PADDLE_ON_INFERENCE -#define PD_INFER_DECL -#else #define PD_INFER_DECL __declspec(dllexport) -#endif // PADDLE_ON_INFERENCE #else #define PD_INFER_DECL __declspec(dllimport) #endif // PADDLE_DLL_INFERENCE diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index c19e77d2714bcfc18c2cf2a98511d31a97295daa..19f52422b441faf45204f47adbcf4e6aae30f6f1 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -156,7 +156,8 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { // "seqpool_concat_fuse_pass", // "seqpool_cvm_concat_fuse_pass", // // "embedding_fc_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // + // TODO(wilber): fix correctness problem. + // "fc_lstm_fuse_pass", // "mul_lstm_fuse_pass", // "fc_gru_fuse_pass", // "mul_gru_fuse_pass", // diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index 0509a6190211c25b6461c1d683daa6b33110b4e0..c1bf4c974fac8c80c3e8e31fbd247332a325e2aa 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -130,7 +130,10 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, VLOG(3) << "The inputs' size is " << input_names.size(); PADDLE_ENFORCE_EQ( input_names.size(), in_size, - "The number of input and the number of model's input must match. "); + paddle::platform::errors::InvalidArgument( + "The number of input and the number of model's input must match. The " + "number of input is %d, the number of model's input is %d.", + input_names.size(), in_size)); for (int i = 0; i < in_size; ++i) { auto input_t = predictor->GetInputTensor(inputs[i].name); std::vector tensor_shape; diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index c497ab384b5fac74b5241d61517485fd8f2b40c4..84e011c6505a8fe974effbecf54101e0e51d29fa 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -47,7 +47,9 @@ void Init(const std::vector argv) { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -133,9 +135,10 @@ std::unique_ptr Load(framework::Executor* executor, std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); // model_from_memory is false in separate parameters. LoadPersistables(executor, scope, *main_program, dirname, "", @@ -151,9 +154,10 @@ std::unique_ptr Load( std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); LoadPersistables(executor, scope, *main_program, "", param_filename, false /* model_from_memory */); @@ -165,9 +169,10 @@ std::unique_ptr LoadFromMemory( const std::string& prog_buffer, const std::string& param_buffer) { std::unique_ptr main_program( new framework::ProgramDesc(prog_buffer)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); LoadPersistables(executor, scope, *main_program, "", param_buffer, true /* model_filename */); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc index 3c20b6d1e725273dbfdc20c01fb01deea4e8d88e..0bf8a1691e2192b278fcd209162135027ed24e71 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc @@ -25,8 +25,10 @@ PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name, const char* plugin_type; DeserializeValue(&serial_data, &serial_length, &plugin_type); - PADDLE_ENFORCE(Has(plugin_type), - "trt plugin type %s does not exists, check it.", plugin_type); + PADDLE_ENFORCE_EQ( + Has(plugin_type), true, + platform::errors::NotFound("TensorRT plugin type `%s` does not exists.", + plugin_type)); auto plugin = plugin_registry_[plugin_type](serial_data, serial_length); owned_plugins_.emplace_back(plugin); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 18037179c7b98952b6088361954e869ecedfb2c7..16751c764bd03af9bbb7cbd77dd9287c17150dd5 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -103,7 +103,11 @@ struct Serializer, DeserializeValue(buffer, buffer_size, &size); value->resize(size); size_t nbyte = value->size() * sizeof(T); - PADDLE_ENFORCE_GE(*buffer_size, nbyte); + PADDLE_ENFORCE_GE(*buffer_size, nbyte, + platform::errors::InvalidArgument( + "Insufficient data in buffer, expect contains %d " + "byte, but actually only contains %d byte.", + *buffer_size, nbyte)); std::memcpy(value->data(), *buffer, nbyte); reinterpret_cast(*buffer) += nbyte; *buffer_size -= nbyte; diff --git a/paddle/fluid/inference/utils/singleton.h b/paddle/fluid/inference/utils/singleton.h index 990bef359499834c3a7cb025c3fb1d94ceea958e..6828924c300fdfec6640e7b19a2c06b0826aa455 100644 --- a/paddle/fluid/inference/utils/singleton.h +++ b/paddle/fluid/inference/utils/singleton.h @@ -46,7 +46,9 @@ struct Registry { template void Register(const std::string& name) { - PADDLE_ENFORCE_EQ(items_.count(name), 0); + PADDLE_ENFORCE_EQ(items_.count(name), 0, + platform::errors::AlreadyExists( + "Item `%s` has beed registered.", name)); items_[name] = new ItemChild; } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index f0a04d850dff01e0776e96bbe518cde2ce8bb88b..53e6f4aa6e41bb8c02c01b4897e35c103260e167 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -92,7 +92,7 @@ cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEP set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project -sequence_pooling executor device_memory_aligment generator) +sequence_pooling segment_pooling executor device_memory_aligment generator) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse) diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index 629fedba6e3db474869ebddc02470c2ff007e658..e5fcd270eb8b8fa58175e11e955161ebfbb2846c 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -69,12 +69,18 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("alpha", "The scale of Original Embedding.") .SetDefault(1.0f) .AddCustomChecker([](const float& alpha) { - PADDLE_ENFORCE(alpha >= 0.0f, "'alpha' must be above 0.0."); + PADDLE_ENFORCE_GE( + alpha, 0.0f, + platform::errors::InvalidArgument( + "Attribute 'alpha' must be greater than or equal to 0.0.")); }); AddAttr("beta", "The scale of Position Embedding.") .SetDefault(1.0f) .AddCustomChecker([](const float& beta) { - PADDLE_ENFORCE(beta >= 0.0f, "'beta' must be between 0.0."); + PADDLE_ENFORCE_GE( + beta, 0.0f, + platform::errors::InvalidArgument( + "Attribute 'beta' must be greater than or equal to 0.0.")); }); AddComment(R"DOC( Add Position Encoding Operator. diff --git a/paddle/fluid/operators/argsort_op.cu b/paddle/fluid/operators/argsort_op.cu index cbd7e33bc6b7238eacb29ebab1306802d974a90b..7fc2a92b7d9129b3ab0724832d2e5f72adafb0e3 100644 --- a/paddle/fluid/operators/argsort_op.cu +++ b/paddle/fluid/operators/argsort_op.cu @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include +#include #include #include "cub/cub.cuh" #include "paddle/fluid/framework/op_registry.h" @@ -58,6 +60,16 @@ static __global__ void FillIndex(T* indices, T num_rows, T num_cols) { } } +template +static __global__ void FillFlattenGrad(const T* dO, const IndType* indices, + int64_t size, T* dX) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < size; i += stride) { + dX[indices[i]] = dO[i]; + } +} + template static __global__ void FillGrad(const T* dO, const IndType* indices, T* dX, IndType num_rows, IndType num_cols) { @@ -193,6 +205,23 @@ void ArgFullAssign(const platform::CUDADeviceContext& ctx, const Tensor* dO, } template +void ArgFlattenAssign(const platform::CUDADeviceContext& ctx, const Tensor* dO, + const Tensor* indices, int64_t size, Tensor* dX) { + auto cu_stream = ctx.stream(); + + const int64_t block_size = + std::min(size, static_cast(ctx.GetMaxThreadsPerBlock())); + int64_t max_threads = ctx.GetMaxPhysicalThreadCount(); + const int64_t max_blocks = + std::max(((max_threads - 1) / block_size + 1), static_cast(1)); + const int64_t grid_size = + std::min(max_blocks, (size + block_size - 1) / block_size); + + FillFlattenGrad<<>>( + dO->data(), indices->data(), size, dX->data()); +} + +template class ArgsortOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -205,8 +234,25 @@ class ArgsortOpCUDAKernel : public framework::OpKernel { auto in_dims = input->dims(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; - int64_t numel = input->numel(); - int64_t groups = numel / in_dims[axis]; + const T* in_data = input->data(); + auto size = input->numel(); + T* out_data = output->mutable_data(ctx.GetPlace()); + int64_t* ids_data = indices->mutable_data(ctx.GetPlace()); + + // Use thrust for parallel acceleration when the input size is equal to the + // length of the ‘axis’ dimension. + // Compared to the following 'Special case for full sort', ascending sort is + // 34 times faster and descending sort is 31 times faster. + if (size == in_dims[axis]) { + thrust::sequence(thrust::device, ids_data, ids_data + size); + thrust::copy(thrust::device, in_data, in_data + size, out_data); + thrust::sort_by_key(thrust::device, out_data, out_data + size, ids_data); + if (descending) { + thrust::reverse(thrust::device, out_data, out_data + size); + thrust::reverse(thrust::device, ids_data, ids_data + size); + } + return; + } // Special case for full sort, speedup ~190x. if (axis == -1 || axis + 1 == in_dims.size()) { @@ -276,23 +322,28 @@ class ArgsortGradOpCUDAKernel : public framework::OpKernel { int axis = ctx.Attr("axis"); dX->mutable_data(ctx.GetPlace()); - auto dxt = framework::EigenVector::Flatten(*dX); - auto& place = *ctx.template device_context() - .eigen_device(); - dxt.device(place) = dxt.constant(static_cast(0)); if (dO->numel() == 0) return; - auto in_dims = indices->dims(); + auto in_dims = dX->dims(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; - int64_t numel = indices->numel(); + int64_t size = dX->numel(); + const auto& dev_ctx = ctx.cuda_device_context(); + + // Parallel acceleration when the input size is equal to the length of the + // ‘axis’ dimension. + // Compared to 'special case for full sort' below, the gradient calculation + // is 10 times faster. + if (size == in_dims[axis]) { + ArgFlattenAssign(dev_ctx, dO, indices, size, dX); + return; + } // Special case for full sort, speedup ~190x. if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = framework::product( framework::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; - const auto& dev_ctx = ctx.cuda_device_context(); ArgFullAssign(dev_ctx, dO, indices, dX, input_height, input_width); } else { @@ -316,7 +367,6 @@ class ArgsortGradOpCUDAKernel : public framework::OpKernel { Tensor trans_ind; trans_ind.mutable_data(trans_dims, ctx.GetPlace()); int ndims = trans.size(); - const auto& dev_ctx = ctx.cuda_device_context(); // Do transpose TransCompute(ndims, dev_ctx, *dO, &trans_dO, trans); @@ -345,11 +395,17 @@ class ArgsortGradOpCUDAKernel : public framework::OpKernel { } // namespace paddle REGISTER_OP_CUDA_KERNEL( - argsort, paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel); + argsort, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel); REGISTER_OP_CUDA_KERNEL( argsort_grad, paddle::operators::ArgsortGradOpCUDAKernel, paddle::operators::ArgsortGradOpCUDAKernel, diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index b462c43d23a534c3520a2a852252fe0333222d77..1418d96b67b75ea3a2d4b3d95d3e4bdfb17618ee 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -76,7 +76,10 @@ class AssignValueKernel : public framework::OpKernel { value_name = "int64_values"; break; default: - PADDLE_THROW("Unsupported dtype for assign_value_op: %d", dtype); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type(code %d) for AssignValue operator, only " + "supports bool, int32, float32 and int64.", + dtype)); break; } CopyVecotorToTensor(value_name, out, ctx); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index c92f72e653dbe843d76ec65954d17f3264ed1cc0..dcfe8bb1bb48a505f5526f6471e8ce9ba848b5b3 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -831,6 +831,401 @@ void BatchNormGradMaker::Apply(GradOpPtr op) const { op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); } +template +void BatchNormDoubleGradMaker::Apply(GradOpPtr op) const { + op->SetType("batch_norm_grad_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Scale", this->Input("Scale")); + op->SetInput("SavedMean", this->Input("SavedMean")); + op->SetInput("SavedVariance", this->Input("SavedVariance")); + if (BOOST_GET_CONST(bool, this->GetAttr("use_global_stats"))) { + op->SetInput("Variance", this->Input("Variance")); + } + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetInput("DDScale", this->OutputGrad(framework::GradVarName("Scale"))); + op->SetInput("DDBias", this->OutputGrad(framework::GradVarName("Bias"))); + op->SetInput("DY", this->Input(framework::GradVarName("Y"))); + + op->SetAttrMap(this->Attrs()); + op->SetOutput("DX", this->InputGrad("X")); + op->SetOutput("DScale", this->InputGrad("Scale")); + op->SetOutput("DDY", this->InputGrad(framework::GradVarName("Y"))); +} + +void BatchNormDoubleGradOp::InferShape( + framework::InferShapeContext *ctx) const { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean", + "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance", + "BatchNormDoubleGrad"); + + const bool use_global_stats = ctx->Attrs().Get("use_global_stats"); + if (use_global_stats) { + OP_INOUT_CHECK(ctx->HasInput("Variance"), "Input", "VarianceOut", + "BatchNormDoubleGrad"); + } + + OP_INOUT_CHECK(ctx->HasInput("DDX"), "Input", "DDX", "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("DY"), "Input", "DY", "BatchNormDoubleGrad"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput("DX"), "Output", "DX", "BatchNormDoubleGrad"); + + const auto x_dims = ctx->GetInputDim("X"); + const int C = x_dims[1]; + if (ctx->HasOutput("DX")) { + ctx->SetOutputDim("DX", x_dims); + } + if (ctx->HasOutput("DScale")) { + ctx->SetOutputDim("DScale", {C}); + } + if (ctx->HasOutput("DDY")) { + ctx->ShareDim("X", "DDY"); + } +} + +framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + const auto *var = ctx.InputVar("DY"); + if (var == nullptr) { + PADDLE_THROW( + platform::errors::NotFound("cannot find gradient variable of Y")); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW( + platform::errors::InvalidArgument("gradient variable of Y is empty")); + } + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); +} + +template +class BatchNormDoubleGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *X = ctx.Input("X"); + const auto *Scale = ctx.Input("Scale"); + const auto *dY = ctx.Input("DY"); + const auto *Saved_mean = ctx.Input("SavedMean"); + const auto *Saved_variance = ctx.Input("SavedVariance"); + const float epsilon = ctx.Attr("epsilon"); + const bool use_global_stats = ctx.Attr("use_global_stats"); + const bool is_test = ctx.Attr("is_test"); + + PADDLE_ENFORCE_EQ( + is_test, false, + platform::errors::InvalidArgument( + "`is_test = True` CANNOT be used in train program. If " + "you want to use global status in pre_train model, " + "please set `use_global_stats = True`")); + + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = + framework::StringToDataLayout(data_layout_str); + + const auto *ddX = ctx.Input("DDX"); + const auto *ddScale = ctx.Input("DDScale"); + const auto *ddBias = ctx.Input("DDBias"); + + auto *dX = ctx.Output("DX"); + auto *dScale = ctx.Output("DScale"); + auto *ddY = ctx.Output("DDY"); + dX->mutable_data(ctx.GetPlace()); + ddY->mutable_data(ctx.GetPlace()); + + auto &dev_ctx = ctx.template device_context(); + + const auto &x_dims = X->dims(); + const int C = + (data_layout == DataLayout::kNCHW ? x_dims[1] + : x_dims[x_dims.size() - 1]); + const int sample_size = X->numel() / C; + math::SetConstant set_constant; + + const T *mean_data = Saved_mean->data(); + const T *inv_var_data = Saved_variance->data(); + + Tensor inv_var_tensor; + if (use_global_stats) { + const auto *running_variance = ctx.Input("Variance"); + inv_var_tensor.Resize({C}); + + T *running_inv_var_data = inv_var_tensor.mutable_data(ctx.GetPlace()); + EigenVectorArrayMap inv_var_tmp(running_inv_var_data, C); + ConstEigenVectorArrayMap var_arr(running_variance->data(), C); + + inv_var_tmp = (var_arr + epsilon).sqrt().inverse(); + inv_var_data = running_inv_var_data; + } + + // transpose NCHW -> NHWC for easy calculate + Tensor transformed_x(X->type()); + Tensor transformed_dy(dY->type()); + Tensor transformed_ddx(ddX->type()); + + Tensor transformed_dx(dX->type()); + Tensor transformed_ddy(ddY->type()); + if (data_layout == DataLayout::kNCHW && x_dims.size() > 2) { + VLOG(3) << "Transform batchnorm output from NCHW to NHWC"; + // Input Tensor + ResizeToChannelLast(ctx, X, + &transformed_x); + TransToChannelLast(ctx, X, &transformed_x); + ResizeToChannelLast(ctx, dY, + &transformed_dy); + TransToChannelLast(ctx, dY, + &transformed_dy); + ResizeToChannelLast(ctx, ddX, + &transformed_ddx); + TransToChannelLast(ctx, ddX, + &transformed_ddx); + // Output Tensor + ResizeToChannelLast(ctx, dX, + &transformed_dx); + ResizeToChannelLast(ctx, ddY, + &transformed_ddy); + } else { + transformed_x.ShareDataWith(*X); + transformed_dy.ShareDataWith(*dY); + transformed_ddx.ShareDataWith(*ddX); + + transformed_dx.ShareDataWith(*dX); + transformed_ddy.ShareDataWith(*ddY); + } + + ConstEigenArrayMap x_arr(transformed_x.data(), C, sample_size); + ConstEigenVectorArrayMap mean_arr(mean_data, C); + ConstEigenVectorArrayMap inv_var_arr(inv_var_data, C); + + Tensor mean_tile; + mean_tile.Resize({C, sample_size}); + mean_tile.mutable_data(ctx.GetPlace()); + EigenArrayMap mean_tile_data(mean_tile.mutable_data(ctx.GetPlace()), + C, sample_size); + + Tensor inv_var_tile; + inv_var_tile.Resize({C, sample_size}); + inv_var_tile.mutable_data(ctx.GetPlace()); + EigenArrayMap inv_var_tile_data( + inv_var_tile.mutable_data(ctx.GetPlace()), C, sample_size); + + mean_tile_data = mean_arr.replicate(1, sample_size); + inv_var_tile_data = inv_var_arr.replicate(1, sample_size); + + Tensor Scale_data; + if (!Scale) { + Scale_data.mutable_data({C}, ctx.GetPlace()); + set_constant(dev_ctx, &Scale_data, static_cast(1)); + } + ConstEigenVectorArrayMap scale_arr( + Scale ? Scale->data() : Scale_data.data(), C); + + Tensor scale_tile; + scale_tile.Resize({C, sample_size}); + scale_tile.mutable_data(ctx.GetPlace()); + EigenArrayMap scale_tile_data(scale_tile.mutable_data(ctx.GetPlace()), + C, sample_size); + scale_tile_data = scale_arr.replicate(1, sample_size); + + ConstEigenArrayMap dy_arr(transformed_dy.data(), C, sample_size); + ConstEigenArrayMap ddx_arr(transformed_ddx.data(), C, sample_size); + + Tensor x_sub_mean_mul_invstd; + x_sub_mean_mul_invstd.Resize({C, sample_size}); + x_sub_mean_mul_invstd.mutable_data(ctx.GetPlace()); + EigenArrayMap x_sub_mean_mul_invstd_arr( + x_sub_mean_mul_invstd.mutable_data(ctx.GetPlace()), C, sample_size); + x_sub_mean_mul_invstd_arr = (x_arr - mean_tile_data) * inv_var_tile_data; + + if (dX) { + dX->mutable_data(ctx.GetPlace()); + EigenArrayMap dx_arr(transformed_dx.mutable_data(ctx.GetPlace()), C, + sample_size); + dx_arr.setZero(); + if (use_global_stats) { + // math: dx = (ddscale * dy) * inv_var + if (ddScale) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + dx_arr = dy_arr * ddscale_tile_data * inv_var_tile_data; + } + } else { + // math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, + // axis=(n,h,w)) * + // np.sum(dy, axis=(n,h,w)) - + // np.sum(dy * ddx, axis=(n,h,w)) + 3 * np.mean(dy * (x - + // mean), + // axis=(n,h,w)) * inv_var.pow(2) * + // np.sum(ddx * (x - mean), axis=(n,h,w))) + inv_var.pow(3) / + // NxHxW * + // np.sum(ddx * (x - mean)) * + // (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * + // np.sum(dy, + // axis=(n,h,w)) * (x - mean) * + // (np.mean(ddx, axis=(n,h,w)) - ddx) + ddr * (dy * inv_var - + // inv_var + // * + // np.mean(dy, axis=(n,h,w)) - + // inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), + // axis=(n,h,w)))) + + if (ddX) { + dx_arr += + (x_sub_mean_mul_invstd_arr * inv_var_tile_data * + inv_var_tile_data / sample_size) + .colwise() * + (ddx_arr.rowwise().sum() * dy_arr.rowwise().sum() / sample_size - + (dy_arr * ddx_arr).rowwise().sum() + + 3. * (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() * + (ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / + sample_size); + + dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() * + (ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / + sample_size * + (dy_arr.rowwise().sum() / sample_size - dy_arr); + + dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() * + (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / + sample_size * + (ddx_arr.rowwise().sum() / sample_size - ddx_arr); + + dx_arr = scale_tile_data * dx_arr; + } + if (ddScale) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + dx_arr += (dy_arr * inv_var_tile_data - + (dy_arr.rowwise().sum().replicate(1, sample_size) / + sample_size) * + inv_var_tile_data - + x_sub_mean_mul_invstd_arr * inv_var_tile_data * + (dy_arr * x_sub_mean_mul_invstd_arr) + .rowwise() + .sum() + .replicate(1, sample_size) / + sample_size) * + ddscale_tile_data; + } + } + if (data_layout == DataLayout::kNCHW) { + VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; + TransToChannelFirst( + ctx, &transformed_dx, dX); + } + } + if (dScale) { + dScale->mutable_data(ctx.GetPlace()); + EigenVectorArrayMap dscale_arr(dScale->mutable_data(ctx.GetPlace()), + C); + dscale_arr.setZero(); + if (use_global_stats) { + // math: dscale = np.sum(ddx * dy, axis=(n,h,w)) * inv_var + if (ddX) { + dscale_arr = (ddx_arr * dy_arr * inv_var_tile_data).rowwise().sum(); + } + } else { + // math: dscale = inv_var * (dy - np.mean(dy, axis=(n,h,w) - (x-mean) * + // inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) * + // ddx + if (ddX) { + Tensor first_grad; + first_grad.Resize({C, sample_size}); + EigenArrayMap first_grad_arr( + first_grad.mutable_data(ctx.GetPlace()), C, sample_size); + first_grad_arr.setZero(); + + first_grad_arr += + inv_var_tile_data * + (dy_arr - + dy_arr.rowwise().sum().replicate(1, sample_size) / sample_size - + x_sub_mean_mul_invstd_arr * + (dy_arr * x_sub_mean_mul_invstd_arr) + .rowwise() + .sum() + .replicate(1, sample_size) / + sample_size); + dscale_arr = (first_grad_arr * ddx_arr).rowwise().sum(); + } + } + } + + if (ddY) { + ddY->mutable_data(ctx.GetPlace()); + EigenArrayMap ddy_arr(transformed_ddy.mutable_data(ctx.GetPlace()), + C, sample_size); + ddy_arr.setZero(); + if (use_global_stats) { + // math: ddy = r * ddx * inv_var + if (ddX) { + ddy_arr = scale_tile_data * ddx_arr * inv_var_tile_data; + } + } else { + // math: ddy = (x - mean) * inv_var * ddscale + ddbias + + // scale * inv_var * (ddx - (x - mean) * inv_var.pow(2) * + // np.mean(ddx * (x - mean), axis=(n,h,w))) + if (ddX) { + ddy_arr += + scale_tile_data * inv_var_tile_data * + (ddx_arr - + ddx_arr.rowwise().sum().replicate(1, sample_size) / sample_size - + x_sub_mean_mul_invstd_arr * + (ddx_arr * x_sub_mean_mul_invstd_arr) + .rowwise() + .sum() + .replicate(1, sample_size) / + sample_size); + } + if (ddScale && ddBias) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + ConstEigenVectorArrayMap ddbias_arr(ddBias->data(), C); + Tensor ddbias_tile; + ddbias_tile.Resize({C, sample_size}); + EigenArrayMap ddbias_tile_data( + ddbias_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddbias_tile_data = ddbias_arr.replicate(1, sample_size); + + ddy_arr += x_sub_mean_mul_invstd_arr * ddscale_tile_data; + ddy_arr += ddbias_tile_data; + } + } + if (data_layout == DataLayout::kNCHW) { + VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; + TransToChannelFirst( + ctx, &transformed_ddy, ddY); + } + } + } +}; + +DECLARE_INPLACE_OP_INFERER(BatchNormDoubleGradOpInplaceInferer, {"DY", "DDY"}); + } // namespace operators } // namespace paddle @@ -839,7 +1234,11 @@ REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, ops::BatchNormOpInferVarType, ops::BatchNormGradMaker, ops::BatchNormGradMaker); -REGISTER_OPERATOR(batch_norm_grad, ops::BatchNormGradOp); +REGISTER_OPERATOR(batch_norm_grad, ops::BatchNormGradOp, + ops::BatchNormDoubleGradMaker, + ops::BatchNormDoubleGradMaker); +REGISTER_OPERATOR(batch_norm_grad_grad, ops::BatchNormDoubleGradOp, + ops::BatchNormDoubleGradOpInplaceInferer); REGISTER_OP_CPU_KERNEL( batch_norm, ops::BatchNormKernel, @@ -848,3 +1247,7 @@ REGISTER_OP_CPU_KERNEL( batch_norm_grad, ops::BatchNormGradKernel, ops::BatchNormGradKernel); +REGISTER_OP_CPU_KERNEL( + batch_norm_grad_grad, + ops::BatchNormDoubleGradKernel, + ops::BatchNormDoubleGradKernel); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index be834772679acb1717ae77e3729822dbdb609db8..2d5b395ac6807dade59d473c9fcffb925e4abe3a 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/batch_norm_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/norm_utils.cu.h" #include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/float16.h" @@ -840,6 +841,45 @@ class BatchNormGradKernel } }; +template +class BatchNormDoubleGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *X = ctx.Input("X"); + const auto *Scale = ctx.Input("Scale"); + const auto *dY = ctx.Input("DY"); + const auto *Saved_mean = ctx.Input("SavedMean"); + const auto *Saved_variance = ctx.Input("SavedVariance"); + const double epsilon = static_cast(ctx.Attr("epsilon")); + const bool use_global_stats = ctx.Attr("use_global_stats"); + const bool is_test = ctx.Attr("is_test"); + + PADDLE_ENFORCE_EQ( + is_test, false, + platform::errors::InvalidArgument( + "`is_test = True` CANNOT be used in train program. If " + "you want to use global status in pre_train model, " + "please set `use_global_stats = True`")); + + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = + framework::StringToDataLayout(data_layout_str); + + const auto *ddX = ctx.Input("DDX"); + const auto *ddScale = ctx.Input("DDScale"); + const auto *ddBias = ctx.Input("DDBias"); + + auto *dX = ctx.Output("DX"); + auto *dScale = ctx.Output("DScale"); + auto *ddY = ctx.Output("DDY"); + + NormDoubleGradFunctor( + ctx, data_layout, X, Scale, dY, Saved_mean, Saved_variance, epsilon, + use_global_stats, ddX, ddScale, ddBias, dX, dScale, ddY); + } +}; + } // namespace operators } // namespace paddle @@ -853,3 +893,7 @@ REGISTER_OP_CUDA_KERNEL( batch_norm_grad, ops::BatchNormGradKernel, ops::BatchNormGradKernel, ops::BatchNormGradKernel); +REGISTER_OP_CUDA_KERNEL( + batch_norm_grad_grad, + ops::BatchNormDoubleGradKernel, + ops::BatchNormDoubleGradKernel); diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 9f844b7c078bb7397d98dad57d9fad475283f397..1440b74290ce43a9e30d59ff5ad94e00eb13f9f1 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -103,6 +103,42 @@ inline void TransToChannelFirst(const framework::ExecutionContext& context, } } +template +inline void ResizeToChannelLast(const framework::ExecutionContext& context, + const Tensor* input, + Tensor* transformed_input) { + int dim = input->dims().size() - 2; + if (dim == 3) { + transformed_input->Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[2]; + in_dims_vec[2] = input->dims()[3]; + in_dims_vec[3] = input->dims()[4]; + in_dims_vec[4] = input->dims()[1]; + transformed_input->Resize(framework::make_ddim(in_dims_vec)); + transformed_input->mutable_data(context.GetPlace()); + + } else if (dim == 2) { + transformed_input->Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[2]; + in_dims_vec[2] = input->dims()[3]; + in_dims_vec[3] = input->dims()[1]; + transformed_input->Resize(framework::make_ddim(in_dims_vec)); + transformed_input->mutable_data(context.GetPlace()); + } else if (dim == 1) { + transformed_input->Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[2]; + in_dims_vec[2] = input->dims()[1]; + transformed_input->Resize(framework::make_ddim(in_dims_vec)); + transformed_input->mutable_data(context.GetPlace()); + } +} + template inline void TransToChannelLast(const framework::ExecutionContext& context, const Tensor* input, Tensor* transformed_input) { @@ -154,6 +190,16 @@ class BatchNormGradOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const override; }; +class BatchNormDoubleGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override; @@ -168,6 +214,15 @@ class BatchNormGradMaker : public framework::SingleGradOpMaker { void Apply(GradOpPtr op) const override; }; +template +class BatchNormDoubleGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override; +}; + class BatchNormOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { protected: @@ -190,5 +245,11 @@ class BatchNormGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override; }; +template +class BatchNormDoubleGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 5b7bcde21a99f23b653cc8b822aa3e22539e9d82..d67d90c348e6f1db9fff604b3eff7b6a79141d07 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -33,29 +33,37 @@ class CoalesceTensorOpKernel : public framework::OpKernel { auto out_vars = context.MultiOutputVar("Output"); PADDLE_ENFORCE_GT(in_var_names.size(), static_cast(0), - "The CoalesceTensorOp has no input."); - PADDLE_ENFORCE_EQ( - in_var_names.size(), out_var_names.size(), - "The number of CoalesceTensorOp's input and output is not match."); + platform::errors::InvalidArgument( + "The CoalesceTensor operator has no input.")); + PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size(), + platform::errors::InvalidArgument( + "The number of CoalesceTensor operator's input and " + "output is not match, " + "input number is %u, output number is %u.", + in_var_names.size(), out_var_names.size())); // Input & Output check: only support LoDTensor for (size_t i = 0; i < in_var_names.size(); ++i) { PADDLE_ENFORCE_NOT_NULL( in_vars[i], - "The input variable %s of CoalesceTensorOp does not exist.", - in_var_names[i]); + platform::errors::NotFound("The input variable %s of CoalesceTensor " + "operator does not exist.", + in_var_names[i])); PADDLE_ENFORCE_NOT_NULL( out_vars[i], - "The output variable %s of CoalesceTensorOp does not exist.", - out_var_names[i]); - PADDLE_ENFORCE_EQ( - in_vars[i]->IsType(), true, - "The input variable %s of CoalesceTensorOp is not LoDTensor.", - in_var_names[i]); - PADDLE_ENFORCE_EQ( - out_vars[i]->IsType(), true, - "The output variable %s of CoalesceTensorOp is not LoDTensor.", - in_var_names[i]); + platform::errors::NotFound("The output variable %s of CoalesceTensor " + "operator does not exist.", + out_var_names[i])); + PADDLE_ENFORCE_EQ(in_vars[i]->IsType(), true, + platform::errors::InvalidArgument( + "The input variable %s of CoalesceTensor operator " + "is not LoDTensor.", + in_var_names[i])); + PADDLE_ENFORCE_EQ(out_vars[i]->IsType(), true, + platform::errors::InvalidArgument( + "The output variable %s of CoalesceTensor operator " + "is not LoDTensor.", + in_var_names[i])); } auto in_tensors = context.MultiInput("Input"); @@ -64,7 +72,10 @@ class CoalesceTensorOpKernel : public framework::OpKernel { for (size_t i = 0; i < in_var_names.size(); ++i) { PADDLE_ENFORCE_EQ( in_var_names[i], out_var_names[i], - "The input and output variable of CoalesceTensorOp is different."); + platform::errors::InvalidArgument( + "The input and output variable of CoalesceTensor operator is " + "different, %dth input is %s, %dth output is %s.", + i, in_var_names[i], i, out_var_names[i])); } } else { // Init the output as input @@ -134,16 +145,25 @@ class CoalesceTensorOpKernel : public framework::OpKernel { const std::vector &lod_tensors, const std::vector var_names, size_t *numel, const size_t &size_of_dtype, const platform::Place &place) const { - PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size()); + PADDLE_ENFORCE_EQ( + lod_tensors.size(), var_names.size(), + platform::errors::InvalidArgument( + "The number of input tensor and variable does not match, the " + "number of input tensor is %u, the number of input variable is %u.", + lod_tensors.size(), var_names.size())); *numel = 0; std::stringstream ss; ss << "alloc_space_for_vars: "; for (size_t i = 0; i < var_names.size(); ++i) { PADDLE_ENFORCE_EQ(lod_tensors[i]->IsInitialized(), true, - "%s is not initialized.", var_names[i]); + platform::errors::InvalidArgument( + "Tensor `%s` is not initialized.", var_names[i])); auto size = lod_tensors[i]->numel(); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT( + size, 0, + platform::errors::InvalidArgument( + "The number of tensor `%s`'s elements is 0.", var_names[i])); ss << "input(" << var_names[i] << ") dim:(" << lod_tensors[i]->dims() << ") " << " addres:" << lod_tensors[i]->data() << ", "; diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 4f337c03599a548ac3d95ddd06c726be30d7c13f..7937e432d22faa3ffd93e46a39b7b1cc5500dbf8 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/concat_op.h" + #include #include #include @@ -78,7 +79,8 @@ class ConcatOp : public framework::OperatorWithKernel { } } if (flag == 0) { - PADDLE_THROW("All Inputs of Concat OP are Empty!"); + PADDLE_THROW(platform::errors::InvalidArgument( + "All Inputs of Concat OP are Empty!")); } #ifdef PADDLE_WITH_MKLDNN if (platform::CanMKLDNNBeUsed(ctx)) { diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index 7f705755915924de4ca6ab4c698e46a437bb649c..00af724ac7fce64b9a210bf43a150acf20f34dce 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" @@ -287,7 +288,9 @@ class CUDNNConvOpKernel : public framework::OpKernel { #endif // ------------------- cudnn conv forward --------------------- - ScalingParamType alpha = 1.0f, beta = 0.0f; + ScalingParamType alpha = 1.0f; + ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* workspace_ptr) { @@ -609,9 +612,13 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { } // ------------------- cudnn conv backward data --------------------- - ScalingParamType alpha = 1.0f, beta = 0.0f; + ScalingParamType alpha = 1.0f; + ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + VLOG(4) << "Conv_grad: use_addto = " << ctx.Attr("use_addto"); + if (input_grad) { - // Because beta is zero, it is unnecessary to reset input_grad. + // When beta is 0, it is unnecessary to reset input_grad. + // When beta is 1, the output cannot be reset since addt strategy used. for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* cudnn_workspace_ptr) { @@ -653,6 +660,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { ctx, &transformed_input_grad_channel, input_grad); } } + + // filter_grad do not use inplace addto. + ScalingParamType beta_filter = 0.0f; // ------------------- cudnn conv backward filter --------------------- if (filter_grad) { // Because beta is zero, it is unnecessary to reset filter_grad. @@ -665,7 +675,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { input_data + i * group_offset_in, args2.odesc.desc(), output_grad_data + i * group_offset_out, args2.cdesc.desc(), filter_algo, cudnn_workspace_ptr, - workspace_size, &beta, args2.wdesc.desc(), + workspace_size, &beta_filter, args2.wdesc.desc(), filter_grad_data + i * group_offset_filter)); }, workspace_size); @@ -1017,7 +1027,14 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { int group_offset_out = o_c / groups * o_h * o_w * o_d; int group_offset_filter = W->numel() / groups; - ScalingParamType alpha = 1.0f, beta = 0.0f; + ScalingParamType alpha = 1.0f; + ScalingParamType beta = 0.0f; + + // NOTE(zhiqiu): inplace addto is not supportted in double grad yet. + // ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : + // 0.0f; + // VLOG(4) << "Conv_grad_grad: use_addto = " << ctx.Attr("use_addto"); + auto wkspace_handle = dev_ctx.cudnn_workspace_handle(); if (ddO) { diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 9ed169fe3502e0c34b9f37d6520edc1a3fbfa91c..bf97b9d03c455182a8d95b6987896b9a580c84fe 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -305,6 +305,11 @@ void Conv2DOpMaker::Make() { .SetDefault(0.0f); AddAttr("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel") .SetDefault(0.0f); + AddAttr( + "use_addto", + "(bool, default false) If use addto strategy or not, only used in " + "cudnn kernel") + .SetDefault(false); AddAttr("fuse_residual_connection", "(bool, default false) Only used in mkldnn kernel. Used " "whenever convolution output is as an input to residual " @@ -460,6 +465,11 @@ void Conv3DOpMaker::Make() { .SetDefault(0.0f); AddAttr("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel") .SetDefault(0.0f); + AddAttr( + "use_addto", + "(bool, default false) If use addto strategy or not, only used in " + "cudnn kernel") + .SetDefault(false); AddAttr("fuse_residual_connection", "(bool, default false) Only used in mkldnn kernel. Used " "whenever convolution output is as an input to residual " diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cc b/paddle/fluid/operators/dequantize_abs_max_op.cc index 48743f2e48c8a7686497adff52f23f31346aeda7..0d4d68d9f622fef9df4819d6092411a4d7db65f7 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cc +++ b/paddle/fluid/operators/dequantize_abs_max_op.cc @@ -45,10 +45,8 @@ class DequantizeMaxAbsOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - "Input(X) of DequantizeMaxAbsOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - "Output(Out) of DequantizeMaxAbsOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "DequantizeMaxAbs"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "DequantizeMaxAbs"); ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/detection/gpc.cc b/paddle/fluid/operators/detection/gpc.cc index b46d231d0ff7774c64745b3b77953cf2ed8d82f7..6b1b0cd8b3578a344978afae642b66759589ffde 100644 --- a/paddle/fluid/operators/detection/gpc.cc +++ b/paddle/fluid/operators/detection/gpc.cc @@ -532,7 +532,8 @@ static int count_contours(polygon_node *polygon) { } static void add_left(polygon_node *p, double x, double y) { - PADDLE_ENFORCE_NOT_NULL(p); + PADDLE_ENFORCE_NOT_NULL(p, paddle::platform::errors::InvalidArgument( + "Input polygon node is nullptr.")); vertex_node *nv = NULL; /* Create a new vertex node and set its fields */ @@ -588,7 +589,8 @@ static void add_right(polygon_node *p, double x, double y) { } static void merge_right(polygon_node *p, polygon_node *q, polygon_node *list) { - PADDLE_ENFORCE_NOT_NULL(p); + PADDLE_ENFORCE_NOT_NULL(p, paddle::platform::errors::InvalidArgument( + "Input polygon node is nullptr.")); polygon_node *target = NULL; /* Label contour as external */ @@ -664,7 +666,8 @@ void add_vertex(vertex_node **t, double x, double y) { } void gpc_vertex_create(edge_node *e, int p, int s, double x, double y) { - PADDLE_ENFORCE_NOT_NULL(e); + PADDLE_ENFORCE_NOT_NULL(e, paddle::platform::errors::InvalidArgument( + "Input edge node is nullptr.")); add_vertex(&(e->outp[p]->v[s]), x, y); e->outp[p]->active++; } @@ -693,7 +696,8 @@ static bbox *create_contour_bboxes(gpc_polygon *p) { gpc_malloc(box, p->num_contours * sizeof(bbox), const_cast("Bounding box creation")); - PADDLE_ENFORCE_NOT_NULL(box); + PADDLE_ENFORCE_NOT_NULL(box, paddle::platform::errors::ResourceExhausted( + "Failed to malloc box memory.")); /* Construct contour bounding boxes */ for (c = 0; c < p->num_contours; c++) { @@ -857,7 +861,9 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) { /* Create an extended hole array */ gpc_malloc(extended_hole, (p->num_contours + 1) * sizeof(int), const_cast("contour hole addition")); - PADDLE_ENFORCE_NOT_NULL(extended_hole); + PADDLE_ENFORCE_NOT_NULL(extended_hole, + paddle::platform::errors::ResourceExhausted( + "Failed to malloc extended hole memory.")); /* Create an extended contour array */ gpc_malloc(extended_contour, @@ -975,7 +981,9 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); - PADDLE_ENFORCE_NOT_NULL(sbt); + PADDLE_ENFORCE_NOT_NULL(sbt, paddle::platform::errors::ResourceExhausted( + "Failed to malloc scanbeam table memory.")); + build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1017,7 +1025,9 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, e0 = aet; e1 = aet; /* Set up bundle fields of first edge */ - PADDLE_ENFORCE_NOT_NULL(aet); + PADDLE_ENFORCE_NOT_NULL(aet, paddle::platform::errors::InvalidArgument( + "Edge node AET is nullptr.")); + aet->bundle[ABOVE][aet->type] = (aet->top.y != yb); aet->bundle[ABOVE][!aet->type] = 0; aet->bstate[ABOVE] = UNBUNDLED; @@ -1612,7 +1622,8 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); - PADDLE_ENFORCE_NOT_NULL(sbt); + PADDLE_ENFORCE_NOT_NULL(sbt, paddle::platform::errors::ResourceExhausted( + "Failed to malloc scanbeam table memory.")); build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1650,7 +1661,8 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, e1 = aet; /* Set up bundle fields of first edge */ - PADDLE_ENFORCE_NOT_NULL(aet); + PADDLE_ENFORCE_NOT_NULL(aet, paddle::platform::errors::InvalidArgument( + "Edge node AET is nullptr.")); aet->bundle[ABOVE][aet->type] = (aet->top.y != yb); aet->bundle[ABOVE][!aet->type] = 0; aet->bstate[ABOVE] = UNBUNDLED; diff --git a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc index b064265917b2a36b2261c6c43d355f9891aa9811..c9f9daf3b3c0442e379cd7a22fcf48dbe3acbb5d 100644 --- a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc @@ -48,7 +48,9 @@ class FetchBarrierOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, + platform::errors::Unavailable( + "Internal error occurred in RPCClient.")); } } }; diff --git a/paddle/fluid/operators/distributed_ops/send_recv_util.h b/paddle/fluid/operators/distributed_ops/send_recv_util.h index c05a1ff1da8803c1ef3161d0e9d8604f9f1e5f3b..7dc0596ac31e2506ae02de11b33bd0532f02cc7a 100644 --- a/paddle/fluid/operators/distributed_ops/send_recv_util.h +++ b/paddle/fluid/operators/distributed_ops/send_recv_util.h @@ -34,16 +34,16 @@ inline bool NeedSend(const framework::Scope& scope, std::string::npos) return false; auto* var = scope.FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", - varname); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "Can not find variable '%s' in the send side.", varname)); if (var->IsType()) { return var->Get().IsInitialized(); } else if (var->IsType()) { return var->Get().rows().size() > 0UL; } else { - PADDLE_THROW( - "Variable type in send side should be in " - "[LodTensor, SelectedRows]"); + PADDLE_THROW(platform::errors::Unimplemented( + "Variable type in send side should be LodTensor or SelectedRows.")); } return false; } diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 534a19bd94a231f0522dd15d2510917be8c71a4b..97624944ca109f27322f151f0742c72447fd5c39 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -13,8 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" + #include #include + +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" namespace paddle { @@ -129,3 +132,18 @@ REGISTER_OP_CPU_KERNEL( int>, ops::ElementwiseAddDoubleGradKernel); + +// A specialization elementwise_add operator, used in gradient accumulation with +// inplace addto. +REGISTER_OPERATOR( + grad_add, paddle::operators::ElementwiseOp, + paddle::operators::ElementwiseAddOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OP_CPU_KERNEL( + grad_add, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu index 71019872802eaca964373fd58a7ccc6445d9c489..a4cbd14388b4dd5ceab6417db79fafeeff41ccb7 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu @@ -111,3 +111,10 @@ REGISTER_OP_CUDA_KERNEL( ops::ElementwiseAddDoubleGradKernel, ops::ElementwiseAddDoubleGradKernel); + +REGISTER_OP_CUDA_KERNEL( + grad_add, ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index 04ac4a35208a54361a4f434e68095e9519ee12e9..e9b4c7dacf8b4493fcfa0504ecf7421bd50de90c 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -174,7 +174,64 @@ struct ChannelClipAndFakeQuantFunctor { template struct ChannelClipAndFakeQuantFunctor; +template +struct ChannelClipFakeQuantDequantFunctor { + void operator()(const platform::CPUDeviceContext& ctx, + const framework::Tensor& in, const framework::Tensor& scale, + const int bin_cnt, const int quant_axis, + framework::Tensor* out) { + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + auto* scale_data = scale.data(); + auto* in_data = in.data(); + auto* out_data = out->mutable_data(ctx.GetPlace()); + auto in_dims = in.dims(); + const int64_t channel = in_dims[quant_axis]; + platform::Transform trans; + if (quant_axis == 0) { + const int64_t channel_size = in.numel() / channel; + for (int i = 0; i < channel; i++) { + T s = scale_data[i]; + auto* start = in_data + i * channel_size; + auto* end = in_data + (i + 1) * channel_size; + trans(ctx, start, end, out_data + i * channel_size, + ClipFunctor(-s, s)); + } + for (int i = 0; i < channel; i++) { + T s = scale_data[i]; + T inv_s = inverse(s); + framework::Tensor one_channel_out = out->Slice(i, i + 1); + auto out_e = framework::EigenVector::Flatten(one_channel_out); + out_e.device(*ctx.eigen_device()) = + (bin_cnt * inv_s * out_e).round() * s / static_cast(bin_cnt); + } + } else if (quant_axis == 1) { + const int64_t step_i = in.numel() / in_dims[0]; + const int64_t step_j = in.numel() / (in_dims[0] * in_dims[1]); + for (int i = 0; i < in_dims[0]; i++) { + for (int j = 0; j < in_dims[1]; j++) { + T s = scale_data[j]; + T inv_s = inverse(s); + auto* start = in_data + i * step_i + j * step_j; + auto* end = in_data + i * step_i + (j + 1) * step_j; + auto* cur_out_data = out_data + i * step_i + j * step_j; + trans(ctx, start, end, cur_out_data, ClipFunctor(-s, s)); + for (int k = 0; k < step_j; k++) { + cur_out_data[k] = std::round(bin_cnt * inv_s * cur_out_data[k]) * + s / static_cast(bin_cnt); + } + } + } + } + } +}; + +template struct ChannelClipFakeQuantDequantFunctor; template struct FindRangeAbsMaxFunctor { void operator()(const platform::CPUDeviceContext& ctx, @@ -360,6 +417,75 @@ $$0 \leq c \lt \ the\ channel\ number\ of\ X$$ } }; +class FakeChannelWiseQuantizeDequantizeAbsMaxOp + : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + "FakeChannelWiseQuantizeDequantizeAbsMax"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + "FakeChannelWiseQuantizeDequantizeAbsMax"); + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + "FakeChannelWiseQuantizeDequantizeAbsMax"); + int quant_axis = ctx->Attrs().Get("quant_axis"); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->SetOutputDim("OutScale", {ctx->GetInputDim("X")[quant_axis]}); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } +}; + +class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) Input is float data type."); + AddOutput("Out", + "(Tensor) Output of quantized and dequantized low level tensor, " + "saved as float data type."); + AddOutput("OutScale", "(Tensor) Current channel wise scale"); + AddAttr("quant_axis", + "(int, default 0) The axis for quantization. " + "For conv2d, depthwise_conv2d, conv2d_transpose " + "and mul, the quant_axis is equal to the cout axis.") + .SetDefault(0) + .AddCustomChecker([](const int& quant_axis) { + PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument( + "'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + }); + AddAttr("bit_length", "(int, default 8)") + .SetDefault(8) + .AddCustomChecker([](const int& bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + platform::errors::InvalidArgument( + "'bit_length' should be between 1 and 16, but " + "the received is %d", + bit_length)); + }); + AddComment(R"DOC( +The scale of FakeChannelWiseQuantize operator is a vector. +In detail, each channel of the input X has a scale value. + +$$scale_c = max(abs(X_c))$$ +$$range = 2^{bit\_length - 1} - 1$$ +$$Out_c = round(\frac{X_c * range} {scale_c}) * \frac{scale_c} {range}$$ +In above three formulas, the range value of c is as follow: +$$0 \leq c \lt \ the\ channel\ number\ of\ X$$ +)DOC"); + } +}; + class FakeQuantizeRangeAbsMaxOp : public framework::OperatorWithKernel { public: FakeQuantizeRangeAbsMaxOp(const std::string& type, @@ -666,3 +792,12 @@ REGISTER_OP_CPU_KERNEL(moving_average_abs_max_scale, REGISTER_OPERATOR(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradOp); REGISTER_OP_CPU_KERNEL(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradKernel); + +REGISTER_OPERATOR(fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxOp, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker, + ops::FakeQuantDequantGradMaker, + ops::FakeQuantDequantGradMaker); +REGISTER_OP_CPU_KERNEL( + fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 6ff3c7ec632f236fe4ae6c6504537df3b8a46b7a..8bc14dde8636822354bbaeaf659880ee754dc5b9 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -417,8 +417,90 @@ struct FindMovingAverageAbsMaxFunctor { } }; -template struct FindMovingAverageAbsMaxFunctor; +// ChannelClipAndQuantDequantKernel for quant_axis is 0 +template +__global__ void ChannelClipAndQuantDequantKernelQuantAxis0( + const T* in, const T* scale, const int bin_cnt, const int n, const int c, + T* out) { + int tid = threadIdx.x; + + int channel_size = n / c; + const T* in_c = in + blockIdx.x * channel_size; + T* out_c = out + blockIdx.x * channel_size; + + T s = scale[blockIdx.x]; + T inv_s = inverse(s); + + for (int i = tid; i < channel_size; i += blockDim.x) { + T x = in_c[i]; + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v) * s / bin_cnt; + } +} + +// ChannelClipAndQuantDequantKernel for quant_axis is 1 +template +__global__ void ChannelClipAndQuantDequantKernelQuantAxis1( + const T* in, const T* scale, const int bin_cnt, const int n, const int cin, + const int cout, T* out) { + T s = scale[blockIdx.x % cout]; + T inv_s = inverse(s); + + int wh_size = n / (cin * cout); + const T* in_c = in + blockIdx.x * wh_size; + T* out_c = out + blockIdx.x * wh_size; + + for (int i = threadIdx.x; i < wh_size; i += blockDim.x) { + T x = in_c[i]; + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v) * s / bin_cnt; + } +} + +template +struct ChannelClipFakeQuantDequantFunctor { + void operator()(const platform::CUDADeviceContext& ctx, + const framework::Tensor& in, const framework::Tensor& scale, + const int bin_cnt, const int quant_axis, + framework::Tensor* out) { + // At present, channelwise quantization supports conv2d, depthwise_conv2d + // conv2d_transpose and mul + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + + int num = in.numel(); + auto in_dims = in.dims(); + + const T* in_data = in.data(); + const T* scale_data = scale.data(); + T* out_data = out->mutable_data(ctx.GetPlace()); + + if (quant_axis == 0) { + int grid = in_dims[0]; + int block = 1024; + ChannelClipAndQuantDequantKernelQuantAxis0< + T><<>>(in_data, scale_data, bin_cnt, + num, in_dims[0], out_data); + } else if (quant_axis == 1) { + int grid = in_dims[0] * in_dims[1]; + int block = 1024; + + ChannelClipAndQuantDequantKernelQuantAxis1< + T><<>>( + in_data, scale_data, bin_cnt, num, in_dims[0], in_dims[1], out_data); + } + } +}; + +template struct ChannelClipFakeQuantDequantFunctor; } // namespace operators } // namespace paddle @@ -443,3 +525,6 @@ REGISTER_OP_CUDA_KERNEL( ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel); REGISTER_OP_CUDA_KERNEL(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradKernel); +REGISTER_OP_CUDA_KERNEL( + fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 5c6e0b1f6e26d84462a18da910b412f03b93285d..2f5afbe0eedf98ac7219772a6705d502069f0385 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -72,6 +72,13 @@ struct ChannelClipAndFakeQuantFunctor { const int quant_axis, framework::Tensor* out); }; +template +struct ChannelClipFakeQuantDequantFunctor { + void operator()(const DeviceContext& ctx, const framework::Tensor& in, + const framework::Tensor& scale, const int bin_cnt, + const int quant_axis, framework::Tensor* out); +}; + template struct FindMovingAverageAbsMaxFunctor { void operator()(const DeviceContext& ctx, const framework::Tensor& in_accum, @@ -154,6 +161,30 @@ class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel { } }; +template +class FakeChannelWiseQuantizeDequantizeAbsMaxKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* out = context.Output("Out"); + auto* out_scale = context.Output("OutScale"); + T* out_scale_data = out_scale->mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + out->mutable_data(dev_ctx.GetPlace()); + + int bit_length = context.Attr("bit_length"); + int bin_cnt = std::pow(2, bit_length - 1) - 1; + int quant_axis = context.Attr("quant_axis"); + + FindChannelAbsMaxFunctor()(dev_ctx, *in, quant_axis, + out_scale_data); + + ChannelClipFakeQuantDequantFunctor()( + dev_ctx, *in, *out_scale, bin_cnt, quant_axis, out); + } +}; + template class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 3fc5f3bfc6b1633ffe835606bbac6118e6b32ca6..477a9162fe3f779d4006deb2e20b3a16f70cdf47 100644 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -8,7 +8,8 @@ register_operators(EXCLUDES multihead_matmul_op fused_embedding_eltwise_layernorm_op fusion_group_op - fusion_gru_op) + fusion_gru_op + fused_bn_add_activation_op) # fusion_gru_op does not have CUDA kernel op_library(fusion_gru_op) @@ -47,4 +48,9 @@ if (WITH_GPU) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fusion_group);\n") cc_test(test_fusion_group_op SRCS fusion_group_op_test.cc DEPS fusion_group_op) endif() + # fused_bn_add_activation + if (NOT ${CUDNN_VERSION} VERSION_LESS 7401) + op_library(fused_bn_add_activation_op) + file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_bn_add_activation);\n") + endif() endif() diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5b3ed03bb6419cd3c36f6ee2e856f1816d314c75 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; + +void FusedBatchNormAddActOp::InferShape( + framework::InferShapeContext *ctx) const { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", + "FusedBatchNormAddActOp"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("MeanOut"), "Output", "MeanOut", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("VarianceOut"), "Output", "VarianceOut", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("SavedMean"), "Output", "SavedMean", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("SavedVariance"), "Output", "SavedVariance", + "FusedBatchNormAddActOp"); + + const auto x_dims = ctx->GetInputDim("X"); + const auto z_dims = ctx->GetInputDim("Z"); + PADDLE_ENFORCE_EQ(x_dims, z_dims, + platform::errors::InvalidArgument( + "ShapeError: the shapes of input " + "must be equal. But received: the shape " + "of input X = [%s], and the shape of " + "input Y = [%s]", + x_dims, z_dims)); + PADDLE_ENFORCE_GE(x_dims.size(), 2, platform::errors::InvalidArgument( + "ShapeError: the dimensions of input " + "must greater than or equal to 2." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + PADDLE_ENFORCE_LE(x_dims.size(), 5, platform::errors::InvalidArgument( + "ShapeError: the dimensions of input " + "must smaller than or equal to 5." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + + const int64_t C = x_dims[x_dims.size() - 1]; + + auto scale_dim = ctx->GetInputDim("Scale"); + auto bias_dim = ctx->GetInputDim("Bias"); + + PADDLE_ENFORCE_EQ( + scale_dim.size(), 1UL, + platform::errors::InvalidArgument( + "ShapeError: the dimension of scale must equal to 1." + "But received: the shape of scale is [%s], the dimension " + "of scale is [%d]", + scale_dim, scale_dim.size())); + PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, + platform::errors::InvalidArgument( + "ShapeError: the dimension of bias must equal to 1." + "But received: the shape of bias is [%s],the dimension " + "of bias is [%d]", + bias_dim, bias_dim.size())); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 || + framework::product(bias_dim) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(scale_dim[0], C, + platform::errors::InvalidArgument( + "ShapeError: the shape of scale must equal to [%d]" + "But received: the shape of scale is [%d]", + C, scale_dim[0])); + PADDLE_ENFORCE_EQ(bias_dim[0], C, + platform::errors::InvalidArgument( + "ShapeError: the shape of bias must equal to [%d]" + "But received: the shape of bias is [%d]", + C, bias_dim[0])); + } + ctx->SetOutputDim("Y", x_dims); + ctx->SetOutputDim("MeanOut", {C}); + ctx->SetOutputDim("VarianceOut", {C}); + ctx->SetOutputDim("SavedMean", {C}); + ctx->SetOutputDim("SavedVariance", {C}); + ctx->ShareLoD("X", "Y"); +} + +framework::OpKernelType FusedBatchNormAddActOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + // By default, the type of the scale, bias, mean, + // and var tensors should be float when input tensor's dtype is float16. + auto bn_param_type = framework::proto::VarType::FP32; + + PADDLE_ENFORCE_EQ( + bn_param_type, ctx.Input("Scale")->type(), + platform::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, ctx.Input("Bias")->type(), + platform::errors::InvalidArgument("Bias input should be of float type")); + + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + + return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, + library); +} + +void FusedBatchNormAddActOpMaker::Make() { + AddInput("X", "The input tensor"); + AddInput("Z", "The input tensor"); + AddInput("Scale", + "Scale is a 1-dimensional tensor of size C " + "that is applied to the output"); + AddInput("Bias", + "Bias is a 1-dimensional tensor of size C " + "that is applied to the output"); + AddOutput("Y", "result after normalization"); + AddOutput("MeanOut", + "Share memory with Mean. " + "Store the global mean when training"); + AddOutput("VarianceOut", + "Share memory with Variance. " + "Store the global Variance when training"); + AddOutput("SavedMean", + "Mean of the current mini batch, " + "will apply to output when training") + .AsIntermediate(); + AddOutput("SavedVariance", + "Variance of the current mini batch, " + "will apply to output when training") + .AsIntermediate(); + AddOutput("ReserveSpace", + "Reserve GPU space for triggering the new semi-persistent " + "NHWC kernel"); + AddAttr("momentum", "").SetDefault(0.9); + AddAttr("epsilon", "") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, + platform::errors::InvalidArgument( + "'epsilon' should be between 0.0 and 0.001.")); + }); + AddAttr("act_type", "The activation type to be fused.") + .SetDefault("relu"); + AddComment(R"DOC( +Fused Batch Normalization with activation. + +Batch Norm has been implemented as discussed in the paper: +https://arxiv.org/pdf/1502.03167.pdf +Batch Norm can be used as a normalizer function for conv2d and fully_connected operations. +Now, the required data format for FusedBatchNormAddActOp is NHWC `[batch, in_height, in_width, in_channels]`. + +)DOC"); +} + +void FusedBatchNormAddActGradOp::InferShape( + framework::InferShapeContext *ctx) const { + // check input + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), "Input", + framework::GradVarName("Y"), "FusedBatchNormAddActGradOp"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output", + framework::GradVarName("X"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Z")), "Output", + framework::GradVarName("Z"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Scale")), "Output", + framework::GradVarName("Scale"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias")), "Output", + framework::GradVarName("Bias"), "FusedBatchNormAddActGradOp"); + + const auto in_dims = ctx->GetInputDim("X"); + const int C = in_dims[in_dims.size() - 1]; + + ctx->SetOutputDim(framework::GradVarName("X"), in_dims); + ctx->SetOutputDim(framework::GradVarName("Z"), in_dims); + ctx->SetOutputDim(framework::GradVarName("Scale"), {C}); + ctx->SetOutputDim(framework::GradVarName("Bias"), {C}); +} + +framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + const auto *var = ctx.InputVar(framework::GradVarName("Y")); + if (var == nullptr) { + PADDLE_THROW(platform::errors::NotFound( + "Can not find Y@GRAD in the execution context.")); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW( + platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + } + + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(), layout, + library); +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + fused_bn_add_activation, ops::FusedBatchNormAddActOp, + ops::FusedBatchNormAddActOpMaker, ops::FusedBatchNormAddActOpInferVarType, + ops::FusedBatchNormAddActGradOpMaker, + ops::FusedBatchNormAddActGradOpMaker); +REGISTER_OPERATOR(fused_bn_add_activation_grad, + ops::FusedBatchNormAddActGradOp); diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..7f1d297cda3fae54cdde089f25ccdf6715142c5f --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -0,0 +1,338 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/norm_utils.h" +#include "paddle/fluid/platform/cudnn_helper.h" +#include "paddle/fluid/platform/float16.h" + +DECLARE_bool(cudnn_batchnorm_spatial_persistent); + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; +template +using CudnnDataType = platform::CudnnDataType; +template +using BatchNormParamType = typename CudnnDataType::BatchNormParamType; + +template +class FusedBatchNormAddActKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + double epsilon = static_cast(ctx.Attr("epsilon")); + float momentum = ctx.Attr("momentum"); + std::string act_type = ctx.Attr("act_type"); + + if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { + LOG(ERROR) << "Provided epsilon is smaller than " + << "CUDNN_BN_MIN_EPSILON. Setting it to " + << "CUDNN_BN_MIN_EPSILON instead."; + } + epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); + + // Get the size for each dimension. + // NHWC [batch_size, in_height, in_width, in_channels] + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); + const auto &in_dims = x->dims(); + + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + mean_out->mutable_data>(ctx.GetPlace()); + variance_out->mutable_data>(ctx.GetPlace()); + + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); + saved_mean->mutable_data>(ctx.GetPlace()); + saved_variance->mutable_data>(ctx.GetPlace()); + + auto *y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); + + int N, C, H, W, D; + const DataLayout data_layout = DataLayout::kNHWC; + ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); + + auto &dev_ctx = ctx.template device_context(); + + // ------------------- cudnn descriptors --------------------- + auto handle = dev_ctx.cudnn_handle(); + cudnnTensorDescriptor_t data_desc_; + cudnnTensorDescriptor_t bn_param_desc_; + cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + + std::vector dims = {N, C, H, W, D}; + std::vector strides = {H * W * D * C, 1, W * D * C, D * C, C}; + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( + data_desc_, CudnnDataType::type, + in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDeriveBNTensorDescriptor(bn_param_desc_, + data_desc_, mode_)); + + double this_factor = 1. - momentum; + cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; + platform::ScopedActivationDescriptor scope_act_desc; + cudnnActivationDescriptor_t activation_desc_ = + scope_act_desc.descriptor(act_type); + size_t workspace_size = 0; + size_t reserve_space_size = 0; + void *reserve_space_ptr = nullptr; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + // Create reserve space and workspace for batch norm. + // Create tensor for each batchnorm op, it will be used in the + // backward. Thus this tensor shouldn't be temp. + auto *reserve_space = ctx.Output("ReserveSpace"); + PADDLE_ENFORCE_NOT_NULL( + reserve_space, + platform::errors::NotFound( + "The argument ReserveSpace of batch_norm op is not found.")); + + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload:: + cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*xDesc=*/data_desc_, + /*zDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/activation_desc_, + /*sizeInBytes=*/&workspace_size)); + + // -------------- cudnn batchnorm reserve space -------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*activationDesc=*/activation_desc_, + /*xDesc=*/data_desc_, + /*sizeInBytes=*/&reserve_space_size)); + + reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(), + reserve_space_size); + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationForwardTrainingEx( + handle, mode_, bnOps_, CudnnDataType::kOne(), + CudnnDataType::kZero(), data_desc_, x->template data(), + data_desc_, z->template data(), data_desc_, + y->template data(), bn_param_desc_, + scale->template data>(), + bias->template data>(), this_factor, + mean_out->template mutable_data>( + ctx.GetPlace()), + variance_out->template mutable_data>( + ctx.GetPlace()), + epsilon, saved_mean->template mutable_data>( + ctx.GetPlace()), + saved_variance->template mutable_data>( + ctx.GetPlace()), + activation_desc_, workspace_ptr, workspace_size, reserve_space_ptr, + reserve_space_size)); + + // clean when exit. + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + } +}; + +template +class FusedBatchNormAddActGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + double epsilon = static_cast(ctx.Attr("epsilon")); + std::string act_type = ctx.Attr("act_type"); + + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); + const auto *y = ctx.Input("Y"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *reserve_space = ctx.Input("ReserveSpace"); + + const auto &in_dims = x->dims(); + + int N, C, H, W, D; + const DataLayout data_layout = DataLayout::kNHWC; + ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); + + // init output + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_z = ctx.Output(framework::GradVarName("Z")); + auto *d_scale = ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + + d_x->mutable_data(ctx.GetPlace()); + d_z->mutable_data(ctx.GetPlace()); + PADDLE_ENFORCE_EQ( + d_scale && d_bias, true, + platform::errors::PreconditionNotMet( + "Both the scale grad and the bias grad must not be null.")); + d_scale->mutable_data>(ctx.GetPlace()); + d_bias->mutable_data>(ctx.GetPlace()); + PADDLE_ENFORCE_EQ(scale->dims().size(), 1UL, + platform::errors::PreconditionNotMet( + "The scale only has one dimension.")); + PADDLE_ENFORCE_EQ( + scale->dims()[0], C, + platform::errors::PreconditionNotMet( + "The size of scale is equal to the channel of Input(X).")); + + auto &dev_ctx = ctx.template device_context(); + + std::vector dims = {N, C, H, W, D}; + std::vector strides = {H * W * C * D, 1, W * D * C, D * C, C}; + // ------------------- cudnn descriptors --------------------- + cudnnTensorDescriptor_t data_desc_; + cudnnTensorDescriptor_t bn_param_desc_; + cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { + LOG(ERROR) << "Provided epsilon is smaller than " + << "CUDNN_BN_MIN_EPSILON. Setting it to " + << "CUDNN_BN_MIN_EPSILON instead."; + } + epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( + data_desc_, CudnnDataType::type, + in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDeriveBNTensorDescriptor(bn_param_desc_, + data_desc_, mode_)); + + const auto *saved_mean = ctx.Input("SavedMean"); + const auto *saved_var = ctx.Input("SavedVariance"); + const auto *saved_mean_data = + saved_mean->template data>(); + const auto *saved_var_data = + saved_var->template data>(); + + size_t workspace_size = 0; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + auto reserve_space_size = reserve_space->memory_size(); + cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; + platform::ScopedActivationDescriptor scope_act_desc; + cudnnActivationDescriptor_t activation_desc_ = + scope_act_desc.descriptor(act_type); + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*xDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*dyDesc=*/data_desc_, + /*dzDesc=*/data_desc_, + /*dxDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/activation_desc_, + /*sizeInBytes=*/&workspace_size)); + + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationBackwardEx( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*alphaDataDiff=*/CudnnDataType::kOne(), + /*betaDataDiff=*/CudnnDataType::kZero(), + /*alphaParamDiff=*/CudnnDataType::kOne(), + /*betaParamDiff=*/CudnnDataType::kZero(), + /*xDesc=*/data_desc_, + /*xData=*/x->template data(), + /*yDesc=*/data_desc_, + /*yData=*/y->template data(), + /*dyDesc=*/data_desc_, + /*dyData=*/d_y->template data(), + /*dzDesc=*/data_desc_, + /*dzData=*/d_z->template data(), + /*dxDesc=*/data_desc_, + /*dxData=*/d_x->template data(), + /*dBnScaleBiasDesc=*/bn_param_desc_, + /*bnScaleData=*/scale->template data>(), + /*bnBiasData=*/bias->template data>(), + /*dBnScaleData=*/d_scale->template data>(), + /*dBnBiasData=*/d_bias->template data>(), + /*epsilon=*/epsilon, + /*savedMean=*/saved_mean_data, + /*savedInvVariance=*/saved_var_data, + /*activationDesmc=*/activation_desc_, + /*workspace=*/workspace_ptr, + /*workSpaceSizeInBytes=*/workspace_size, + /*reserveSpace=*/const_cast(reserve_space->template data()), + /*reserveSpaceSizeInBytes=*/reserve_space_size)); + + // clean when exit. + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + } +}; + +} // namespace operators +} // namespace paddle + +#if CUDNN_VERSION >= 7401 +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_CUDA_KERNEL( + fused_bn_add_activation, + ops::FusedBatchNormAddActKernel); +REGISTER_OP_CUDA_KERNEL(fused_bn_add_activation_grad, + ops::FusedBatchNormAddActGradKernel< + plat::CUDADeviceContext, plat::float16>); +#endif diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5c7df96e60dd89b74058ead837bb75555f3674ad --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/grad_op_desc_maker.h" +#include "paddle/fluid/framework/op_proto_maker.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/var_type_inference.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +class FusedBatchNormAddActOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedBatchNormAddActGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedBatchNormAddActOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +template +class FusedBatchNormAddActGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType(this->ForwardOpType() + "_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Z", this->Input("Z")); + op->SetInput("Y", this->Output("Y")); + op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); + + op->SetInput("Scale", this->Input("Scale")); + op->SetInput("Bias", this->Input("Bias")); + op->SetInput("SavedMean", this->Output("SavedMean")); + op->SetInput("SavedVariance", this->Output("SavedVariance")); + op->SetInput("ReserveSpace", this->Output("ReserveSpace")); + + op->SetAttrMap(this->Attrs()); + + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetOutput(framework::GradVarName("Z"), this->InputGrad("Z")); + op->SetOutput(framework::GradVarName("Scale"), this->InputGrad("Scale")); + op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); + } +}; + +class FusedBatchNormAddActOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map& GetInputOutputWithSameType() + const override { + static std::unordered_map m{{"X", /*->*/ "Y"}}; + return m; + } +}; + +template +class FusedBatchNormAddActKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +template +class FusedBatchNormAddActGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/fused/fusion_gru_op.cc b/paddle/fluid/operators/fused/fusion_gru_op.cc index 4013906609603e31b798e333d55ecccba197506a..e3776a80b316089891282136022a4e6656360c6e 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.cc +++ b/paddle/fluid/operators/fused/fusion_gru_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_gru_op.h" #include // for memcpy #include +#include #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/fc.h" diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc index a6c9a137b5438d840ae283b72fc9e85903c83775..c5a291f10b2eaa32aa4b98d73004008bae89a5c9 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc @@ -192,6 +192,9 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { copy_size += src_mat_w_sz; } // fill data + if (context_start > 0) { + src_data += context_start * src_mat_w; + } for (int j = 0; j < seq_len - up_pad - down_pad; ++j) { std::memcpy(dst_data, src_data, copy_size); dst_data += col_mat_w; @@ -201,18 +204,15 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { std::memset(dst_data, 0, down_pad * col_mat_w_sz); copy_size -= src_mat_w_sz; for (int j = 0; j < down_pad; ++j) { + if (copy_size < 0) { + copy_size = 0; + } std::memcpy(dst_data, src_data, copy_size); dst_data += col_mat_w; src_data += src_mat_w; copy_size -= src_mat_w_sz; } } else { - PADDLE_ENFORCE_GE(context_length, up_pad + down_pad + 1, - platform::errors::InvalidArgument( - "context length must be bigger or equal than " - "up_pad + down_pad + 1, but received context " - "length is: %d, up_pad is: %d, down_pad is: %d.", - context_length, up_pad, down_pad)); std::memset(dst_data, 0, seq_len * col_mat_w_sz); dst_data = dst_data + up_pad * src_mat_w; int zero_sz = up_pad * src_mat_w_sz; @@ -226,9 +226,15 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { // from bottom dst_data = col_data + ed * col_mat_w; src_data = x_data + st * src_mat_w; + if (context_start > 0) { + src_data += context_start * src_mat_w; + } zero_sz = down_pad * src_mat_w_sz; for (int j = 1; j <= std::min(down_pad, seq_len); ++j) { int copy_size = std::min(cur_src_sz, col_mat_w_sz - zero_sz); + if (copy_size < 0) { + copy_size = 0; + } std::memcpy(dst_data - (zero_sz + copy_size) / sizeof(T), src_data + std::max(seq_len - j - up_pad, 0) * src_mat_w, copy_size); diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc index f64e4f134d62f125e3e781ebf43163a566587d58..ecb7db46a9d8159b8da124e941cc69522f64cd57 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc @@ -24,20 +24,27 @@ void FusionSeqPoolCVMConcatOp::InferShape( framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - "Inputs(X) of FusionSeqPoolCVMConcatOp should not be empty."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of FusionSeqPoolCVMConcatOp should not be null."); + paddle::platform::errors::InvalidArgument( + "Inputs(X) of FusionSeqPoolCVMConcatOp should not be empty.")); + PADDLE_ENFORCE( + ctx->HasOutput("Out"), + paddle::platform::errors::InvalidArgument( + "Output(Out) of FusionSeqPoolCVMConcatOp should not be null.")); int axis = ctx->Attrs().Get("axis"); PADDLE_ENFORCE_EQ( - axis, 1, "FusionSeqPoolCVMConcatOp only supports concat axis=1 yet."); + axis, 1, + paddle::platform::errors::InvalidArgument( + "FusionSeqPoolCVMConcatOp only supports concat axis=1 yet.")); bool use_cvm = ctx->Attrs().Get("use_cvm"); PADDLE_ENFORCE_EQ( use_cvm, true, - "FusionSeqPoolCVMConcatOp only supports use_cvm is true yet."); + paddle::platform::errors::InvalidArgument( + "FusionSeqPoolCVMConcatOp only supports use_cvm is true yet.")); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); - PADDLE_ENFORCE_GT(n, 0UL, "Input tensors count should > 0."); + PADDLE_ENFORCE_GT(n, 0UL, paddle::platform::errors::InvalidArgument( + "Input tensors count should > 0.")); if (n == 1) { LOG(WARNING) << "Only have one input, may waste memory"; } @@ -45,7 +52,8 @@ void FusionSeqPoolCVMConcatOp::InferShape( // The output height should be confirmed in Compute, // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - "The dims size of first input should be 2."); + paddle::platform::errors::InvalidArgument( + "The dims size of first input should be 2.")); ctx->SetOutputDim("Out", {-1, ins_dims[0][axis] * static_cast(n)}); } @@ -99,7 +107,8 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { int w = ins[0]->numel() / x0_dims[0]; PADDLE_ENFORCE_EQ(y_dims[1] % w, 0, - "The output of dims[1] should be dividable of w"); + paddle::platform::errors::InvalidArgument( + "The output of dims[1] should be dividable of w")); jit::seq_pool_attr_t attr(w, jit::SeqPoolType::kSum); if (pooltype == "AVERAGE") { attr.type = jit::SeqPoolType::kAvg; @@ -117,9 +126,11 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { const T* src = ins[i]->data(); T* dst = y_data + i * w; PADDLE_ENFORCE_EQ(static_cast(ins[i]->numel() / x_dims[0]), w, - "Width of all inputs should be equal."); + paddle::platform::errors::InvalidArgument( + "Width of all inputs should be equal.")); PADDLE_ENFORCE_EQ(x_lod.size(), bs + 1, - "Batchsize of all inputs should be equal."); + paddle::platform::errors::InvalidArgument( + "Batchsize of all inputs should be equal.")); for (size_t j = 0; j < bs; ++j) { attr.h = static_cast(x_lod[j + 1] - x_lod[j]); seqpool(src, dst, &attr); diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 712ef05d8631ac74b92795321202cb5590286e82..4865a02c5292ffb9d079d0711f0bf7d6e927c441 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -47,7 +47,9 @@ class GRUUnitKernel : public framework::OpKernel { else if (act_type == relu) ReluFunctor()(d, x, y); else - PADDLE_THROW("unsupported activation type"); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported activation type, only supports identity, sigmoid, tanh " + "and relu.")); } void Compute(const framework::ExecutionContext& context) const override { @@ -137,7 +139,9 @@ class GRUUnitGradKernel : public framework::OpKernel { else if (act_type == relu) ReluGradFunctor()(d, x, y, dy, dx); else - PADDLE_THROW("unsupported activation type"); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported activation type, only supports identity, sigmoid, tanh " + "and relu.")); } void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index f72f7e8b85b873d9be57c8ff348e6adb2251d65d..a5b270c1dfef14bc92697c29bfeafa0fe08211d7 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -595,9 +595,13 @@ class InstanceNormDoubleGradKernel first_grad_arr += inv_var_tile_data * - (dy_arr - dy_arr.colwise().sum() / sample_size - + (dy_arr - + dy_arr.colwise().sum().replicate(sample_size, 1) / sample_size - x_sub_mean_mul_invstd_arr * - (dy_arr * x_sub_mean_mul_invstd_arr).colwise().sum() / + (dy_arr * x_sub_mean_mul_invstd_arr) + .colwise() + .sum() + .replicate(sample_size, 1) / sample_size); first_grad_arr = first_grad_arr * ddx_arr; for (int nc = 0; nc < NxC; ++nc) { diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 1e99e22e12b2a23685dad742f175fd2b0684d334..e8a9ed878e9bd502b9bd7e7d82f574fb5740bb5d 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -104,12 +104,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto dim_x = ctx->GetInputDim("X"); auto interp_method = ctx->Attrs().Get("interp_method"); - PADDLE_ENFORCE( - "bilinear" == interp_method || "nearest" == interp_method || - "bicubic" == interp_method, - "Interpolation method can only be \"bilinear\" or \"nearest\" when " - "Input(X) dimension is 4, but got method = %s .", - interp_method); + PADDLE_ENFORCE_EQ("bilinear" == interp_method || "nearest" == interp_method || + "bicubic" == interp_method, + true, platform::errors::InvalidArgument( + "Interpolation method can only be \"bilinear\" " + "or \"nearest\" or \"bicubic\" when " + "Input(X) dimension is 4, but got method is %s.", + interp_method)); const DataLayout data_layout = framework::StringToDataLayout( ctx->Attrs().Get("data_layout")); @@ -169,13 +170,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto out_size_dim = ctx->GetInputDim("OutSize"); PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( - "OutSize's dimension size must be 1, but got dimension = %d .", - out_size_dim.size())); + platform::errors::InvalidArgument("OutSize's dimension size must be 1, " + "but got dimension size is %d .", + out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 2, platform::errors::InvalidArgument( - "OutSize's dim[0] must be 2, but got dimention = %d .", + "OutSize's dimension[0] must be 2, but got dimension[0] is %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; @@ -264,12 +265,15 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { if (ctx->HasInput("OutSize") && ctx->IsRuntime()) { auto out_size_dim = ctx->GetInputDim("OutSize"); - PADDLE_ENFORCE_EQ(out_size_dim.size(), 1, - "OutSize's dimension size must be 1, but got size =%d .", - out_size_dim.size()); + PADDLE_ENFORCE_EQ( + out_size_dim.size(), 1, + platform::errors::InvalidArgument( + "OutSize's dimension size must be 1, but got size is %d.", + out_size_dim.size())); PADDLE_ENFORCE_EQ(out_size_dim[0], 3, - "OutSize's dim[0] must be 3, but got size = %d .", - out_size_dim[0]); + platform::errors::InvalidArgument( + "OutSize's dim[0] must be 3, but got size is %d.", + out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; } @@ -289,10 +293,8 @@ class InterpolateOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of InterpolateOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of InterpolationOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Interpolate"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Interpolate"); auto dim_x = ctx->GetInputDim("X"); // NCHW format PADDLE_ENFORCE( @@ -534,9 +536,10 @@ class InterpolateOpGrad : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - "Input(Out@GRAD) should not be null"); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "InterpolateGrad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "InterpolateGrad"); + auto dim_x = ctx->GetInputDim("X"); if (ctx->HasOutput(framework::GradVarName("X"))) { ctx->SetOutputDim(framework::GradVarName("X"), dim_x); diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index 667c6e892956e29478f1401c3cb2622713433037..7cc07383bfa5f67a2404b220cb481d9017b40fd8 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/linspace_op.h" +#include namespace paddle { namespace operators { @@ -21,7 +22,7 @@ class LinspaceOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { + void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace"); @@ -50,11 +51,17 @@ class LinspaceOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::proto::VarType::Type(ctx.Attr("dtype")), ctx.GetPlace()); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const override { + return expected_kernel_type; + } }; class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu index c51e8785263b5de7a897f3865ed2dabdf93adfaa..a4f0693323297c286d24b169f1120e4017992a9b 100644 --- a/paddle/fluid/operators/linspace_op.cu +++ b/paddle/fluid/operators/linspace_op.cu @@ -23,9 +23,16 @@ namespace operators { using Tensor = framework::Tensor; template -__global__ void LinspaceKernel(T start, double step, int64_t size, T* out) { - CUDA_KERNEL_LOOP(index, size) { - out[index] = static_cast(start + step * index); +__global__ void LinspaceKernel(T start, T stop, double step, int64_t size, + T* out) { + int64_t index = blockIdx.x * blockDim.x + threadIdx.x; + + for (; index < size; index += blockDim.x * gridDim.x) { + if (index < size / 2) { + out[index] = static_cast(start + step * index); + } else { + out[index] = static_cast(stop - step * (size - index - 1)); + } } } @@ -55,13 +62,15 @@ class CUDALinspaceKernel : public framework::OpKernel { framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t); framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t); - framework::Tensor n; - framework::TensorCopy(start_t, platform::CPUPlace(), &n); - T start = n.data()[0]; - framework::TensorCopy(stop_t, platform::CPUPlace(), &n); - T stop = n.data()[0]; - framework::TensorCopy(*num_t, platform::CPUPlace(), &n); - int32_t num = n.data()[0]; + framework::Tensor n_start; + framework::Tensor n_stop; + framework::Tensor n_num; + framework::TensorCopy(start_t, platform::CPUPlace(), &n_start); + T start = n_start.data()[0]; + framework::TensorCopy(stop_t, platform::CPUPlace(), &n_stop); + T stop = n_stop.data()[0]; + framework::TensorCopy(*num_t, platform::CPUPlace(), &n_num); + int64_t num = static_cast(n_num.data()[0]); PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument( "The num of linspace op should be larger " @@ -72,14 +81,16 @@ class CUDALinspaceKernel : public framework::OpKernel { T* out_data = out->mutable_data(context.GetPlace()); double step = 0; - if (num != 1) { - step = (static_cast(stop - start)) / (num - 1); - } - auto stream = context.cuda_device_context().stream(); int block = 512; int grid = (num + block - 1) / block; - LinspaceKernel<<>>(start, step, num, out_data); + if (num != 1) { + step = (static_cast(stop - start)) / (num - 1); + LinspaceKernel<<>>(start, stop, step, num, + out_data); + } else { + LinspaceSpecialKernel<<>>(start, out_data); + } } }; diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index 2c30a66ef8e937127fb69a459a901164934b5b13..d8e0fefe175869171cac9c8d3798880e844dbe35 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -56,9 +56,15 @@ class CPULinspaceKernel : public framework::OpKernel { T* out_data = out->mutable_data(context.GetPlace()); if (num > 1) { + // step should be of double type for all types double step = (static_cast(stop - start)) / (num - 1); + int half_num = num / 2; for (int i = 0; i < num; ++i) { - out_data[i] = static_cast(start + step * i); + if (i < half_num) { + out_data[i] = static_cast(start + step * i); + } else { + out_data[i] = static_cast(stop - step * (num - i - 1)); + } } } else { out_data[0] = static_cast(start); diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 10d335b828b516fe08871f314ba4667c06f04714..24ed4fcf6684980b217aad35dc124acef653c9b9 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -76,6 +76,7 @@ math_library(prelu) math_library(bert_encoder_functor) math_library(tree2col DEPS math_function) math_library(matrix_inverse) +math_library(segment_pooling) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) diff --git a/paddle/fluid/operators/math/segment_pooling.cc b/paddle/fluid/operators/math/segment_pooling.cc new file mode 100644 index 0000000000000000000000000000000000000000..3c77d3d4cf88324caded3d7863b25b90b1232db6 --- /dev/null +++ b/paddle/fluid/operators/math/segment_pooling.cc @@ -0,0 +1,148 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/math/segment_pooling.h" +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class SegmentPoolFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& segments, framework::Tensor* output, + framework::Tensor* index, + const std::string pooltype = "SUM") { + const IndexT* segment_ids = segments.data(); + auto curent_id = segment_ids[0]; + int64_t last_idx = 0; + int64_t w = input.numel() / input.dims()[0]; + auto& place = *context.eigen_device(); + for (int64_t idx = 1; idx <= segments.numel(); ++idx) { + if (idx < segments.numel()) { + if (segment_ids[idx] == curent_id) continue; + PADDLE_ENFORCE_GE(segment_ids[idx], curent_id, + platform::errors::InvalidArgument( + "The segment ids should be sorted, but got " + "segment_ids[%d]:%d > segment_ids[%d]:%d.", + idx - 1, curent_id, idx, segment_ids[idx])); + } + + Tensor out_t = output->Slice(curent_id, curent_id + 1); + Tensor in_t = input.Slice(last_idx, idx); + + int64_t h = idx - last_idx; + auto in_e = + framework::EigenMatrix::From(in_t, framework::make_ddim({h, w})); + auto out_e = framework::EigenVector::Flatten(out_t); + + auto reduce_dim = Eigen::array({{0}}); + if (pooltype == "MEAN") { + out_e.device(place) = in_e.mean(reduce_dim); + } else if (pooltype == "SUM") { + out_e.device(place) = in_e.sum(reduce_dim); + } else if (pooltype == "MAX") { + out_e.device(place) = in_e.maximum(reduce_dim); + } else if (pooltype == "MIN") { + out_e.device(place) = in_e.minimum(reduce_dim); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling type, only MEAN, SUM, MAX, MIN " + "available, but got %s.", + pooltype)); + } + + last_idx = idx; + if (idx < segments.numel()) curent_id = segment_ids[idx]; + } + } +}; + +template +class SegmentPoolGradFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& out_grad, + const framework::Tensor& segments, framework::Tensor* in_grad, + const framework::Tensor* index = nullptr, + const std::string pooltype = "SUM") { + const IndexT* segment_ids = segments.data(); + auto& place = *context.eigen_device(); + auto curent_id = segment_ids[0]; + int64_t last_idx = 0; + int64_t w = in_grad->numel() / in_grad->dims()[0]; + for (int64_t idx = 1; idx <= segments.numel(); ++idx) { + if (idx < segments.numel()) { + if (segment_ids[idx] == curent_id) continue; + PADDLE_ENFORCE_GE(segment_ids[idx], curent_id, + platform::errors::InvalidArgument( + "The segment ids should be sorted, but got " + "segment_ids[%d]:%d > segment_ids[%d]:%d.", + idx - 1, curent_id, idx, segment_ids[idx])); + } + + Tensor out_g_t = out_grad.Slice(curent_id, curent_id + 1); + Tensor in_g_t = in_grad->Slice(last_idx, idx); + + int64_t h = idx - last_idx; + auto in_g_e = framework::EigenMatrix::From(in_g_t, {h, w}); + auto out_g_e = framework::EigenMatrix::From(out_g_t, {1, w}); + Eigen::DSizes bcast(h, 1); + + if (pooltype == "MEAN") { + in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); + } else if (pooltype == "SUM") { + in_g_e.device(place) = out_g_e.broadcast(bcast); + } else if (pooltype == "MAX" || pooltype == "MIN") { + Tensor out_t = output.Slice(curent_id, curent_id + 1); + Tensor in_t = input.Slice(last_idx, idx); + auto in_e = framework::EigenMatrix::From(in_t, {h, w}); + auto out_e = framework::EigenMatrix::From(out_t, {1, w}); + in_g_e.device(place) = + (in_e == out_e.broadcast(bcast)).template cast() * + out_g_e.broadcast(bcast); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling type, only MEAN, SUM, MAX, MIN " + "available, but got %s.", + pooltype)); + } + + last_idx = idx; + if (idx < segments.numel()) curent_id = segment_ids[idx]; + } + } +}; + +using CPU = platform::CPUDeviceContext; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/segment_pooling.h b/paddle/fluid/operators/math/segment_pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..561fad6921fe7b9e61f6ea4bc33d820a6af25262 --- /dev/null +++ b/paddle/fluid/operators/math/segment_pooling.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace operators { + +template +class SegmentPoolFunctor { + public: + /* mean pool has summed_ids output */ + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& segments, framework::Tensor* output, + framework::Tensor* summed_ids = nullptr, + const std::string pooltype = "SUM"); +}; + +template +class SegmentPoolGradFunctor { + public: + /* mean pool has summed_ids output */ + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& out_grad, + const framework::Tensor& segments, framework::Tensor* in_grad, + const framework::Tensor* summed_ids = nullptr, + const std::string pooltype = "SUM"); +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index c9b852cfc05127a4bbf00ea23a751c59dc2d109d..87d914aa79753fbdc9d859c43bbf749b3ddf95cf 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -44,8 +44,10 @@ class MergeLoDTensorOp : public framework::OperatorBase { scope.FindVar(Output("Out"))->GetMutable(); auto level = static_cast(Attr("level")); - PADDLE_ENFORCE(in_true.numel() || in_false.numel(), - "Input(InTrue) or Input(InFalse) should be initialized."); + PADDLE_ENFORCE_EQ( + in_true.numel() || in_false.numel(), true, + platform::errors::InvalidArgument( + "Input(InTrue) or Input(InFalse) should be initialized.")); auto &mask_dim = mask.dims(); std::unique_ptr cpu_mask{new framework::LoDTensor()}; @@ -56,7 +58,9 @@ class MergeLoDTensorOp : public framework::OperatorBase { framework::TensorCopy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else - PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Not supported GPU, Please recompile or reinstall paddle with CUDA " + "support.")); #endif } auto *mask_data = cpu_mask->data(); @@ -109,7 +113,11 @@ class MergeLoDTensorOp : public framework::OperatorBase { size_t start_offset = lod_and_offset.second.first; size_t end_offset = lod_and_offset.second.second; - PADDLE_ENFORCE_GE(end_offset, start_offset); + PADDLE_ENFORCE_GE(end_offset, start_offset, + platform::errors::InvalidArgument( + "The end offset less than start offset, end offset " + "is %d, start offset is %d.", + end_offset, start_offset)); size_t len = end_offset - start_offset; if (len == 0) { continue; @@ -189,22 +197,24 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { "merge_lod_tensor"); auto mask_dim = context->GetInputDim("Mask"); PADDLE_ENFORCE_EQ(mask_dim.size(), 2, - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor and " - "the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), "].\n"); + platform::errors::InvalidArgument( + "If you are using IfElse OP:" + "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " + "ie.true_block():\n out_1 = ie.input(x)\n\n" + "Please ensure that the cond is a 2-D tensor and " + "the second dim size of cond is 1. " + "But now the cond's shape is [%s].\n", + mask_dim)); if (context->IsRuntime() || mask_dim[1] > 0) { PADDLE_ENFORCE_EQ(mask_dim[1], 1, - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor " - "and the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), "].\n"); + platform::errors::InvalidArgument( + "If you are using IfElse OP:" + "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " + "ie.true_block():\n out_1 = ie.input(x)\n\n" + "Please ensure that the cond is a 2-D tensor " + "and the second dim size of cond is 1. " + "But now the cond's shape is [%s].\n", + mask_dim)); } context->SetOutputDim("Out", context->GetInputDim("InTrue")); diff --git a/paddle/fluid/operators/mv_op.cc b/paddle/fluid/operators/mv_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1339982adaab162056bdefd3eecb405e95188a0d --- /dev/null +++ b/paddle/fluid/operators/mv_op.cc @@ -0,0 +1,125 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/mv_op.h" +namespace paddle { +namespace operators { + +class MVOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The matrix input of mv op"); + AddInput("Vec", "The vector input of mv op"); + AddOutput("Out", "The output of mv op"); + AddComment(R"DOC( +MV Operator. + +This operator is used to perform matrix vector multiplication +of the input tensors `X` and `Vec`. +)DOC"); + } +}; + +class MVOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *context) const override { + OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "mv"); + OP_INOUT_CHECK(context->HasInput("Vec"), "Input", "Vec", "mv"); + OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "mv"); + + auto dim_x = context->GetInputDim("X"); + auto dim_y = context->GetInputDim("Vec"); + PADDLE_ENFORCE_EQ( + dim_x.size(), 2, + platform::errors::InvalidArgument( + "The rank of input X should be 2, but is %d", dim_x.size())); + PADDLE_ENFORCE_EQ( + dim_y.size(), 1, + platform::errors::InvalidArgument( + "The rank of input Vec should be 1, but is %d", dim_y.size())); + PADDLE_ENFORCE_EQ(dim_x[1] == dim_y[0], true, + platform::errors::InvalidArgument( + "The length of input X' second dim should equal the " + "length of input Vec," + " but X[%d, %d], Vec[%d]", + dim_x[0], dim_x[1], dim_y[0])); + + framework::DDim dim_out = framework::make_ddim({dim_x[0]}); + + context->SetOutputDim("Out", dim_out); + context->ShareLoD("X", /*->*/ "Out"); + } +}; + +template +class MVOpGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr retv) const override { + retv->SetType("mv_grad"); + retv->SetInput("X", this->Input("X")); + retv->SetInput("Vec", this->Input("Vec")); + retv->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + retv->SetOutput(framework::GradVarName("Vec"), this->InputGrad("Vec")); + } +}; + +class MVOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *context) const override { + OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "mv"); + OP_INOUT_CHECK(context->HasInput("Vec"), "Input", "Vec", "mv"); + OP_INOUT_CHECK(context->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "mv"); + auto x_dims = context->GetInputDim("X"); + auto vec_dims = context->GetInputDim("Vec"); + + auto x_grad_name = framework::GradVarName("X"); + auto vec_grad_name = framework::GradVarName("Vec"); + + if (context->HasOutput(x_grad_name)) { + context->SetOutputDim(x_grad_name, x_dims); + } + if (context->HasOutput(vec_grad_name)) { + context->SetOutputDim(vec_grad_name, vec_dims); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OPERATOR(mv, ops::MVOp, ops::MVOpMaker, + ops::MVOpGradMaker, + ops::MVOpGradMaker); +REGISTER_OPERATOR(mv_grad, ops::MVOpGrad); + +REGISTER_OP_CPU_KERNEL( + mv, ops::MVKernel, + ops::MVKernel); +REGISTER_OP_CPU_KERNEL( + mv_grad, ops::MVGradKernel, + ops::MVGradKernel); diff --git a/paddle/fluid/operators/mv_op.cu b/paddle/fluid/operators/mv_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..9a16fe025cd71457faade38f92f56e56c26b3b32 --- /dev/null +++ b/paddle/fluid/operators/mv_op.cu @@ -0,0 +1,95 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/mv_op.h" +#include "paddle/fluid/platform/gpu_launch_param_config.h" + +namespace paddle { +namespace operators { + +template +__global__ void MVGradCUDAKernel(const int m, const int n, const T *dout, + const T *vec, T *dx) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < m * n; idx += blockDim.x * gridDim.x) { + int i = idx / n; + int j = idx % n; + dx[idx] = dout[i] * vec[j]; + } +} + +// Using dimensional constraints on matrix multiplication, it is +// straight-forward to check the following table for when X and Y +// are both matrices. +// +// dX = | dOut Vec^T +// dVec = | X^T dOut +template +class MVGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *vec = context.Input("Vec"); + auto *dout = + context.Input(framework::GradVarName("Out")); + auto *dx = context.Output(framework::GradVarName("X")); + auto *dvec = + context.Output(framework::GradVarName("Vec")); + + auto dim_x = x->dims(); + int m = dim_x[0]; + int n = dim_x[1]; + + dx->Resize(framework::make_ddim({m * n})); + + // get data ptr + const T *x_data = x->data(); + const T *vec_data = vec->data(); + const T *dout_data = dout->data(); + + T *dx_data = dx->mutable_data(context.GetPlace()); + T *dvec_data = dvec->mutable_data(context.GetPlace()); + + auto &dev_ctx = + context.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + // calculate dx + auto stream = context.cuda_device_context().stream(); + auto config = GetGpuLaunchConfig1D(dev_ctx, m * n); + MVGradCUDAKernel< + T><<>>( + m, n, dout_data, vec_data, dx_data); + + dx->Resize(framework::make_ddim({m, n})); + + // calculate dvec + blas.GEMV(true, dim_x[0], dim_x[1], static_cast(1), x_data, dout_data, + static_cast(0), dvec_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL( + mv, ops::MVKernel, + ops::MVKernel); +REGISTER_OP_CUDA_KERNEL( + mv_grad, ops::MVGradKernel, + ops::MVGradKernel); diff --git a/paddle/fluid/operators/mv_op.h b/paddle/fluid/operators/mv_op.h new file mode 100644 index 0000000000000000000000000000000000000000..3c63f3640ff46f5592a244a930a191a23959baf7 --- /dev/null +++ b/paddle/fluid/operators/mv_op.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/blas.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class MVKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *vec = context.Input("Vec"); + + auto *out = context.Output("Out"); + + auto dim_x = x->dims(); + + // get data ptr + const T *x_data = x->data(); + const T *vec_data = vec->data(); + T *out_data = out->mutable_data(context.GetPlace()); + + auto &dev_ctx = context.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + blas.GEMV(false, dim_x[0], dim_x[1], static_cast(1), x_data, vec_data, + static_cast(0), out_data); + } +}; + +// Using dimensional constraints on matrix multiplication, it is +// straight-forward to check the following table for when X and Y +// are both matrices. +// +// dX = | dOut vec^T +// dVec = | X^T dOut +template +class MVGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *vec = context.Input("Vec"); + auto *dout = + context.Input(framework::GradVarName("Out")); + auto *dx = context.Output(framework::GradVarName("X")); + auto *dvec = + context.Output(framework::GradVarName("Vec")); + + auto dim_x = x->dims(); + int m = dim_x[0]; + int n = dim_x[1]; + + dx->Resize(framework::make_ddim({m * n})); + + // get data ptr + const T *x_data = x->data(); + const T *vec_data = vec->data(); + const T *dout_data = dout->data(); + + T *dx_data = dx->mutable_data(context.GetPlace()); + T *dvec_data = dvec->mutable_data(context.GetPlace()); + + auto &dev_ctx = context.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + // calculate dx + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) + dx_data[i * n + j] = dout_data[i] * vec_data[j]; + } + + dx->Resize(framework::make_ddim({m, n})); + + // calculate dvec + blas.GEMV(true, dim_x[0], dim_x[1], static_cast(1), x_data, dout_data, + static_cast(0), dvec_data); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h new file mode 100644 index 0000000000000000000000000000000000000000..07333f1ae11c3889b543ca6d327e480607a4bcea --- /dev/null +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -0,0 +1,486 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include +#include "cub/cub.cuh" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/cudnn_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using DataLayout = framework::DataLayout; + +// math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, +// axis=(n,h,w)) * +// np.sum(dy, axis=(n,h,w)) - +// np.sum(dy * ddx, axis=(n,h,w)) + 3 * np.mean(dy * (x - +// mean), +// axis=(n,h,w)) * inv_var.pow(2) * +// np.sum(ddx * (x - mean), axis=(n,h,w))) + inv_var.pow(3) / +// NxHxW * +// np.sum(ddx * (x - mean)) * +// (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * +// np.sum(dy, +// axis=(n,h,w)) * (x - mean) * +// (np.mean(ddx, axis=(n,h,w)) - ddx) + ddr * (dy * inv_var - +// inv_var +// * +// np.mean(dy, axis=(n,h,w)) - +// inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), +// axis=(n,h,w)))) + +template +__global__ void DoubleGradComputeDX(const T *x, const T *mean, + const T *variance, const T *ddx, + const T *dy, const T *scale, + const T *ddscale, const int N, const int C, + const int sample_size, const double epsilon, + T *dx) { + const int outer_size = C; + const int inner_size = N * sample_size; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage dy_storage; + __shared__ typename BlockReduce::TempStorage ddx_storage; + __shared__ typename BlockReduce::TempStorage dy_mul_ddx_storage; + __shared__ typename BlockReduce::TempStorage dy_mul_x_sub_mean_storage; + __shared__ typename BlockReduce::TempStorage ddx_mul_x_sub_mean_storage; + __shared__ T dy_sum_val; + __shared__ T ddx_sum_val; + __shared__ T dy_mul_ddx_sum_val; + __shared__ T dy_mul_x_sub_mean_sum_val; + __shared__ T ddx_mul_x_sub_mean_sum_val; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T mean_val = mean[i]; + T var_val = variance[i]; + T dy_sum = 0; + T ddx_sum = 0; + T dy_mul_ddx_sum = 0; + T dy_mul_x_sub_mean_sum = 0; + T ddx_mul_x_sub_mean_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T ddx_i = ddx[index]; + T dy_i = dy[index]; + T tmp = x[index] - mean_val; + + dy_sum += dy_i; + ddx_sum += ddx_i; + dy_mul_ddx_sum += (ddx_i * dy_i); + + dy_mul_x_sub_mean_sum += (dy_i * tmp); + ddx_mul_x_sub_mean_sum += (ddx_i * tmp); + } + + dy_sum = BlockReduce(dy_storage).Reduce(dy_sum, cub::Sum()); + ddx_sum = BlockReduce(ddx_storage).Reduce(ddx_sum, cub::Sum()); + dy_mul_ddx_sum = + BlockReduce(dy_mul_ddx_storage).Reduce(dy_mul_ddx_sum, cub::Sum()); + dy_mul_x_sub_mean_sum = BlockReduce(dy_mul_x_sub_mean_storage) + .Reduce(dy_mul_x_sub_mean_sum, cub::Sum()); + ddx_mul_x_sub_mean_sum = BlockReduce(ddx_mul_x_sub_mean_storage) + .Reduce(ddx_mul_x_sub_mean_sum, cub::Sum()); + + if (threadIdx.x == 0) { + dy_sum_val = dy_sum; + ddx_sum_val = ddx_sum; + dy_mul_ddx_sum_val = dy_mul_ddx_sum; + dy_mul_x_sub_mean_sum_val = dy_mul_x_sub_mean_sum; + ddx_mul_x_sub_mean_sum_val = ddx_mul_x_sub_mean_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + dx[index] += + ((x[index] - mean_val) * var_val * var_val * var_val / inner_size * + (ddx_sum_val * dy_sum_val / inner_size - dy_mul_ddx_sum_val + + 3. * dy_mul_x_sub_mean_sum_val * var_val * + ddx_mul_x_sub_mean_sum_val * var_val / inner_size) + + ddx_mul_x_sub_mean_sum_val * var_val / inner_size * var_val * + var_val * (dy_sum_val / inner_size - dy[index]) + + dy_mul_x_sub_mean_sum_val * var_val / inner_size * var_val * + var_val * (ddx_sum_val / inner_size - ddx[index])) * + scale[i]; + } + } + __syncthreads(); + if (ddscale != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + dx[index] += (dy[index] * var_val - dy_sum_val / inner_size * var_val - + (x[index] - mean_val) * var_val * + dy_mul_x_sub_mean_sum_val * var_val / inner_size) * + ddscale[i]; + } + } + } +} + +// math: ddy = (x - mean) * inv_var * ddscale + ddbias + +// scale * inv_var * (ddx - (x - mean) * inv_var.pow(2) * +// np.mean(ddx * (x - mean), axis=(n,h,w))) +template +__global__ void DoubleGradComputeDDY(const T *x, const T *mean, + const T *variance, const T *ddscale, + const T *ddbias, const T *ddx, + const T *scale, const int N, const int C, + const int sample_size, + const double epsilon, T *ddy) { + const int outer_size = C; + const int inner_size = N * sample_size; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage ddx_storage; + __shared__ typename BlockReduce::TempStorage ddx_mul_x_sub_mean_storage; + __shared__ T ddx_sum_val; + __shared__ T ddx_mul_x_sub_mean_sum_val; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T mean_val = mean[i]; + T var_val = variance[i]; + T ddx_sum = 0; + T ddx_mul_x_sub_mean_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T ddx_i = ddx[index]; + ddx_sum += ddx_i; + ddx_mul_x_sub_mean_sum += (ddx_i * (x[index] - mean_val)); + } + ddx_sum = BlockReduce(ddx_storage).Reduce(ddx_sum, cub::Sum()); + ddx_mul_x_sub_mean_sum = BlockReduce(ddx_mul_x_sub_mean_storage) + .Reduce(ddx_mul_x_sub_mean_sum, cub::Sum()); + + if (threadIdx.x == 0) { + ddx_sum_val = ddx_sum; + ddx_mul_x_sub_mean_sum_val = ddx_mul_x_sub_mean_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + ddy[index] += scale[i] * var_val * + (ddx[index] - ddx_sum_val / inner_size - + (x[index] - mean_val) * var_val * + ddx_mul_x_sub_mean_sum_val * var_val / inner_size); + } + } + __syncthreads(); + if (ddscale != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + ddy[index] += (x[index] - mean_val) * var_val * ddscale[i]; + } + } + __syncthreads(); + if (ddbias != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + ddy[index] += ddbias[i]; + } + } + } +} + +// math: dscale = inv_var * (dy - np.mean(dy, axis=(n,h,w) - (x-mean) * +// inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) * +// ddx +template +__global__ void DoubleGradComputeDScale(const T *x, const T *mean, + const T *variance, const T *ddx, + const T *dy, const int N, const int C, + const int sample_size, + const double epsilon, T *dscale) { + const int outer_size = C; + const int inner_size = N * sample_size; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage dy_storage; + __shared__ typename BlockReduce::TempStorage dy_mul_x_sub_mean_storage; + __shared__ typename BlockReduce::TempStorage dscale_tmp_storage; + __shared__ T dy_sum_val; + __shared__ T dy_mul_x_sub_mean_sum_val; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T dy_sum = 0; + T dy_mul_x_sub_mean_sum = 0; + T mean_val = mean[i]; + T var_val = variance[i]; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T dy_i = dy[index]; + dy_sum += dy_i; + dy_mul_x_sub_mean_sum += (dy_i * (x[index] - mean_val)); + } + dy_sum = BlockReduce(dy_storage).Reduce(dy_sum, cub::Sum()); + dy_mul_x_sub_mean_sum = BlockReduce(dy_mul_x_sub_mean_storage) + .Reduce(dy_mul_x_sub_mean_sum, cub::Sum()); + + if (threadIdx.x == 0) { + dy_sum_val = dy_sum; + dy_mul_x_sub_mean_sum_val = dy_mul_x_sub_mean_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + T dscale_tmp = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + dscale_tmp += ddx[index] * var_val * + (dy[index] - dy_sum_val / inner_size - + dy_mul_x_sub_mean_sum_val * (x[index] - mean_val) * + var_val * var_val / inner_size); + } + dscale_tmp = + BlockReduce(dscale_tmp_storage).Reduce(dscale_tmp, cub::Sum()); + + if (threadIdx.x == 0) { + dscale[i] += dscale_tmp; + } + __syncthreads(); + } + } +} + +// math: dscale = np.sum(ddx * dy, axis=(n,h,w)) * inv_var +template +__global__ void DoubleGradComputeDScaleWithGlobal( + const T *ddx, const T *variance, const T *dy, const double epsilon, + const int N, const int C, const int sample_size, T *dscale) { + int outer_size = C; + int inner_size = N * sample_size; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage ddx_mul_dy_storage; + __shared__ T ddx_mul_dy_sum_val; + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T inv_var_i = 1.0 / sqrt(variance[i] + epsilon); + T ddx_mul_dy_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T ddx_i = ddx[index]; + T dy_i = dy[index]; + ddx_mul_dy_sum += (ddx_i * dy_i); + } + ddx_mul_dy_sum = + BlockReduce(ddx_mul_dy_storage).Reduce(ddx_mul_dy_sum, cub::Sum()); + if (threadIdx.x == 0) { + ddx_mul_dy_sum_val = ddx_mul_dy_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + dscale[i] = inv_var_i * ddx_mul_dy_sum_val; + } + } +} + +// math: dx = ddscale * dy * inv_var +// math: ddy = scale * ddx * inv_var +template +__global__ void DoubleGradComputeDataWithGlobal( + const T *dy, const T *scale, const T *variance, const double epsilon, + const int C, const int sample_size, const int num, T *dx) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + if (scale != nullptr) { + for (int i = gid; i < num; i += stride) { + const int c = + layout == framework::DataLayout::kNCHW ? i / sample_size % C : i % C; + T inv_var = 1.0 / sqrt(variance[c] + epsilon); + dx[i] = dy[i] * scale[c] * inv_var; + } + } +} + +template +void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, + const DataLayout data_layout, const Tensor *X, + const Tensor *Scale, const Tensor *dY, + const Tensor *Saved_mean, + const Tensor *Saved_variance, const double epsilon, + const bool use_global_stats, const Tensor *ddX, + const Tensor *ddScale, const Tensor *ddBias, + Tensor *dX, Tensor *dScale, Tensor *ddY) { + const T *x_data = X->data(); + const T *dy_data = dY->data(); + const T *ddx_data = (ddX == nullptr ? nullptr : ddX->data()); + + const T *ddscale_data = (ddScale == nullptr ? nullptr : ddScale->data()); + const T *ddbias_data = (ddBias == nullptr ? nullptr : ddBias->data()); + + auto &dev_ctx = ctx.template device_context(); + math::SetConstant set_constant; + + auto &x_dims = X->dims(); + const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] + : x_dims[x_dims.size() - 1]); + const int N = x_dims[0]; + const int num = X->numel(); + const int sample_size = num / N / C; + Tensor scale_tmp; + if (!Scale) { + scale_tmp.mutable_data({C}, ctx.GetPlace()); + set_constant(dev_ctx, &scale_tmp, static_cast(1)); + } + const T *scale_data = Scale ? Scale->data() : scale_tmp.data(); + + const int block = 512; + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + const int max_blocks = std::max(max_threads / block, 1); + int grid = std::min(C, max_blocks); + int grid1 = (num + block - 1) / block; + + const T *mean_data, *variance_data; + if (use_global_stats) { + const auto *running_var = ctx.Input("Variance"); + const auto *running_var_data = running_var->template data(); + variance_data = running_var_data; + } else { + const T *smean_data = Saved_mean->data(); + const T *svariance_data = Saved_variance->data(); + mean_data = smean_data; + variance_data = svariance_data; + } + + if (dX) { + T *dx_data = dX->mutable_data(ctx.GetPlace()); + set_constant(dev_ctx, dX, static_cast(0)); + if (use_global_stats) { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNHWC><<>>( + dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, + dx_data); + } else { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNCHW><<>>( + dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, + dx_data); + } + } else { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDX< + T, block, DataLayout::kNHWC><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, + ddscale_data, N, C, sample_size, epsilon, dx_data); + } else { + DoubleGradComputeDX< + T, block, DataLayout::kNCHW><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, + ddscale_data, N, C, sample_size, epsilon, dx_data); + } + } + } + if (dScale) { + T *dscale_data = dScale->mutable_data(ctx.GetPlace()); + set_constant(dev_ctx, dScale, static_cast(0)); + if (use_global_stats) { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDScaleWithGlobal< + T, block, DataLayout::kNHWC><<>>( + ddx_data, variance_data, dy_data, epsilon, N, C, sample_size, + dscale_data); + } else { + DoubleGradComputeDScaleWithGlobal< + T, block, DataLayout::kNCHW><<>>( + ddx_data, variance_data, dy_data, epsilon, N, C, sample_size, + dscale_data); + } + } else { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDScale< + T, block, DataLayout::kNHWC><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, N, C, + sample_size, epsilon, dscale_data); + } else { + DoubleGradComputeDScale< + T, block, DataLayout::kNCHW><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, N, C, + sample_size, epsilon, dscale_data); + } + } + } + if (ddY) { + T *ddy_data = ddY->mutable_data(ctx.GetPlace()); + set_constant(dev_ctx, ddY, static_cast(0)); + if (use_global_stats) { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNHWC><<>>( + ddx_data, scale_data, variance_data, epsilon, C, sample_size, num, + ddy_data); + } else { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNCHW><<>>( + ddx_data, scale_data, variance_data, epsilon, C, sample_size, num, + ddy_data); + } + } else { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDDY< + T, block, DataLayout::kNHWC><<>>( + x_data, mean_data, variance_data, ddscale_data, ddbias_data, + ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); + } else { + DoubleGradComputeDDY< + T, block, DataLayout::kNCHW><<>>( + x_data, mean_data, variance_data, ddscale_data, ddbias_data, + ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); + } + } + } +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 5c6c38da92808f05c90e7dad2482e7c7364a1f80..eb41d21e09218b203f887d8fd812d46dc8367c71 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -23,46 +23,54 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Param"), - "Input(Param) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Grad"), - "Input(Grad) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Moment"), - "Input(Moment) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE( - ctx->HasInput("LearningRate"), - "Input(LearningRate) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Param").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Grad").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front()); - - PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), - "Output(ParamOut) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), - "Output(MomentOut) of DecayedAdagradOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("Param"), "Input", "Param", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("Grad"), "Input", "Grad", "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("Moment"), "Input", "Moment", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("LearningRate"), "Input", "LearningRate", + "DecayedAdagradOp"); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Param").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->Inputs("Param").front(), + ctx->GetInputsVarType("Param").front())); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Grad").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->Inputs("Grad").front(), + ctx->GetInputsVarType("Grad").front())); + + OP_INOUT_CHECK(ctx->HasOutput("ParamOut"), "Output", "ParamOut", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasOutput("MomentOut"), "Output", "MomentOut", + "DecayedAdagradOp"); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, - "Maybe the Input variable LearningRate has not " - "been initialized. You may need to confirm " - "if you put exe.run(startup_program) " - "after optimizer.minimize function."); + platform::errors::InvalidArgument( + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "LearningRate should have one element"); + platform::errors::InvalidArgument( + "LearningRate should have one element")); auto param_dims = ctx->GetInputDim("Param"); - PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Grad"), - "Param and Grad input of DecayedAdagradOp should have " - "the same dimension."); - PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Moment"), - "Param and Moment input of DecayedAdagradOp should have " - "the same dimension."); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + platform::errors::InvalidArgument( + "Param and Grad input of DecayedAdagradOp should have " + "the same dimension.")); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment"), + platform::errors::InvalidArgument( + "Param and Moment input of DecayedAdagradOp should have " + "the same dimension.")); ctx->SetOutputDim("ParamOut", param_dims); ctx->SetOutputDim("MomentOut", param_dims); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h index 279edfb015c26848d4078975a40bdca650bdc6a0..f264ebf8a32636a1e2076f8721b3c95d65f5382b 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h @@ -24,17 +24,19 @@ class DecayedAdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto* param_var = ctx.InputVar("Param"); - PADDLE_ENFORCE(param_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Param").front(), - framework::ToTypeName(param_var->Type())); + PADDLE_ENFORCE_EQ(param_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Param").front(), + framework::ToTypeName(param_var->Type()))); const auto* grad_var = ctx.InputVar("Grad"); - PADDLE_ENFORCE(grad_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Grad").front(), - framework::ToTypeName(grad_var->Type())); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); auto param_out_tensor = ctx.Output("ParamOut"); auto moment_out_tensor = ctx.Output("MomentOut"); diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.h b/paddle/fluid/operators/optimizers/lars_momentum_op.h index b579b5143ddbe6221738f9864f13fb7bea4ac509..55775bc08fb5ebc31cd231b8088a9798561fabfc 100755 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.h +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.h @@ -30,7 +30,12 @@ class LarsMomentumOpKernel : public framework::OpKernel { auto learning_rate = ctx.Input("LearningRate"); auto* grad_var = ctx.InputVar("Grad"); // only support dense for now. - PADDLE_ENFORCE_EQ(grad_var->IsType(), true); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); auto grad = ctx.Input("Grad"); param_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cc b/paddle/fluid/operators/optimizers/rmsprop_op.cc index 99d1156ee6d5fc88161e25bfa581a265707e6f92..eeee008cdc53c457146074060d526d8d0e8b43aa 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cc @@ -143,4 +143,5 @@ http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker); REGISTER_OP_CPU_KERNEL( - rmsprop, ops::RmspropOpKernel); + rmsprop, ops::RmspropOpKernel, + ops::RmspropOpKernel); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cu b/paddle/fluid/operators/optimizers/rmsprop_op.cu index 8b17d6a0204045a9b20adb79dbad72dff5ba267e..bf11ee686757c6c5e54e05f055eaa19f6553f915 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cu +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cu @@ -15,4 +15,5 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - rmsprop, ops::RmspropOpKernel); + rmsprop, ops::RmspropOpKernel, + ops::RmspropOpKernel); diff --git a/paddle/fluid/operators/segment_pool_op.cc b/paddle/fluid/operators/segment_pool_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..322cd97f01c3ad97ba74f049696fdec592ee524e --- /dev/null +++ b/paddle/fluid/operators/segment_pool_op.cc @@ -0,0 +1,166 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/segment_pool_op.h" +#include +#include + +namespace paddle { +namespace operators { + +class SegmentPoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SegmentPool"); + OP_INOUT_CHECK(ctx->HasInput("SegmentIds"), "Input", "SegmentIds", + "SegmentPool"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SegmentPool"); + auto dims = ctx->GetInputDim("X"); + dims[0] = -1; + ctx->SetOutputDim("Out", dims); + + if (ctx->Attrs().Get("pooltype") == "MEAN") { + OP_INOUT_CHECK(ctx->HasOutput("SummedIds"), "Output", "SummedIds", + "SegmentPool"); + ctx->SetOutputDim("SummedIds", {-1, 1}); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); + } +}; + +class SegmentPoolOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) The input data of SegmentPoolOp"); + AddInput("SegmentIds", + "(Tensor) 1-D tensor which have the same size with the fist " + "dimension of input X."); + AddOutput("Out", "(Tensor) The output of SegmentPoolOp."); + AddOutput("SummedIds", + "(Tensor) This tensor is used to counts of segment ids for the " + "backward of the mean pool.") + .AsIntermediate(); + AddAttr( + "pooltype", + "(string, default 'SUM') the pooling type of SegmentPoolOp.") + .SetDefault("SUM") + .InEnum({"SUM", "MEAN", "MIN", "MAX"}); + AddComment(R"DOC( +Segment Pool Operator. + +This operator will pool the elements of input `X` which with the same index +in `SegmentIds`. + +For SUM operation, it computes a tensor such that $Out_i = \sum_{j} X_{j}$ +where sum is over j such that `SegmentIds[j] == i`. + +For MEAN operation, it computes a tensor such that +$Out_i = \frac{1}{n_i} \sum_{j} X_{j}$ where sum is over j such that +`SegmentIds[j] == i` and $n_i$ is the number of all index `SegmentIds[j] == i`. + +For MIN operation, it computes a tensor such that $Out_i = \min_{j} X_{j}$ +where min is over j such that `SegmentIds[j] == i`. + +For MAX operation, it computes a tensor such that $Out_i = \max_{j} X_{j}$ +where max is over j such that `SegmentIds[j] == i`. + )DOC"); + } +}; + +class SegmentPoolGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + framework::GradVarName("Out"), "SegmentPoolGrad"); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SegmentPoolGrad"); + auto og_dims = ctx->GetInputDim(framework::GradVarName("Out")); + auto x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(), + platform::errors::InvalidArgument( + "The rank of output grad must equal to Input(X). But " + "received: input rank %u, input shape [%s].", + og_dims.size(), og_dims)); + for (int64_t i = 1; i < og_dims.size(); ++i) { + PADDLE_ENFORCE_EQ( + og_dims[i], x_dims[i], + platform::errors::InvalidArgument( + "The dimension mismatch between Input(OUT@GRAD) and " + "Input(X). Received Input(OUT@GRAD): input rank %u, " + "input shape [%s]; received Input(X): input rank %u, " + "input shape [%s].", + og_dims.size(), og_dims, x_dims.size(), x_dims)); + } + + ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } +}; + +template +class SegmentPoolGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op_desc_ptr) const override { + op_desc_ptr->SetType("segment_pool_grad"); + op_desc_ptr->SetInput("X", this->Input("X")); + op_desc_ptr->SetInput("SegmentIds", this->Input("SegmentIds")); + op_desc_ptr->SetInput("Out", this->Output("Out")); + if (BOOST_GET_CONST(std::string, this->GetAttr("pooltype")) == "MEAN") { + op_desc_ptr->SetInput("SummedIds", this->Output("SummedIds")); + } + op_desc_ptr->SetInput(framework::GradVarName("Out"), + this->OutputGrad("Out")); + op_desc_ptr->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op_desc_ptr->SetAttrMap(this->Attrs()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(segment_pool, ops::SegmentPoolOp, ops::SegmentPoolOpMaker, + ops::SegmentPoolGradOpMaker, + ops::SegmentPoolGradOpMaker); +REGISTER_OPERATOR(segment_pool_grad, ops::SegmentPoolGradOp); + +REGISTER_OP_CPU_KERNEL( + segment_pool, + ops::SegmentPoolKernel, + ops::SegmentPoolKernel); + +REGISTER_OP_CPU_KERNEL( + segment_pool_grad, + ops::SegmentPoolGradKernel, + ops::SegmentPoolGradKernel); diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a505946b9f5229425f724ae5469beb77863e9aaf --- /dev/null +++ b/paddle/fluid/operators/segment_pool_op.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/segment_pooling.h" +#include "paddle/fluid/platform/macros.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { + auto* input = context.Input("X"); + auto* segment = context.Input("SegmentIds"); + auto* output = context.Output("Out"); + std::string pooltype = context.Attr("pooltype"); + Tensor* summed_ids = nullptr; + + int64_t num_indices = segment->numel(); + PADDLE_ENFORCE_EQ( + num_indices, input->dims()[0], + platform::errors::InvalidArgument( + "Segment_ids should be the same size as dimension 0 of input X.")); + PADDLE_ENFORCE_EQ(num_indices, segment->dims()[0], + platform::errors::InvalidArgument( + "Segment_ids should be 1-D tensor, or it's other " + "dimension size is 1. Segment_ids's shape is: [%s].", + segment->dims())); + + if (input->numel() == 0 || segment->numel() == 0) { + return; + } + + bool cpu_place = context.GetPlace().type() == typeid(platform::CPUPlace); + if (cpu_place) { + auto dims = input->dims(); + auto* segment_ids = segment->data(); + dims[0] = static_cast(segment_ids[segment->numel() - 1] + 1); + PADDLE_ENFORCE_GT( + dims[0], 0, + platform::errors::InvalidArgument( + "Segment ids must be >= 0, but got last id %d", dims[0])); + output->Resize({dims}); + output->mutable_data(context.GetPlace()); + math::SetConstant set_zero; + auto& dev_ctx = context.template device_context(); + set_zero(dev_ctx, output, static_cast(0)); + } + + SegmentPoolFunctor pool; + + pool(context.template device_context(), *input, *segment, + output, summed_ids, pooltype); +} + +template +class SegmentPoolKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* segment = context.Input("SegmentIds"); + auto index_type = segment->type(); + if (index_type == framework::proto::VarType::INT32) { + SegmentKernelLaunchHelper(context); + } else if (index_type == framework::proto::VarType::INT64) { + SegmentKernelLaunchHelper(context); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported index type, Expected int, int64, but got %s.", + index_type)); + } + } +}; + +template +class SegmentPoolGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input("X"); + auto* output = context.Input("Out"); + auto* segment = context.Input("SegmentIds"); + auto* out_g = context.Input(framework::GradVarName("Out")); + auto* in_g = context.Output(framework::GradVarName("X")); + std::string pooltype = context.Attr("pooltype"); + + const Tensor* summed_ids = nullptr; + if (pooltype == "MEAN") { + summed_ids = context.Input("SummedIds"); + } + + in_g->mutable_data(context.GetPlace()); + math::SetConstant set_zero; + auto& dev_ctx = context.template device_context(); + set_zero(dev_ctx, in_g, static_cast(0)); + + auto index_type = segment->type(); + if (index_type == framework::proto::VarType::INT32) { + SegmentPoolGradFunctor pool; + pool(context.template device_context(), *input, *output, + *out_g, *segment, in_g, summed_ids, pooltype); + } else if (index_type == framework::proto::VarType::INT64) { + SegmentPoolGradFunctor pool; + pool(context.template device_context(), *input, *output, + *out_g, *segment, in_g, summed_ids, pooltype); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported index type, Expected int, int64, but got %s.", + index_type)); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index f20bada8ab288fe74fd8ca82a73522a22b234191..142b00b4de66caaedda5c4f0723d31e3a819b8a4 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -60,20 +60,33 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(src_stride_numel.size(), dst_stride_numel.size(), - "src and dst tensor should have the same dims size."); + platform::errors::InvalidArgument( + "Source and destination tensor should have the same " + "dimension size, but source tensor dimension size is " + "%u, destination tensor size is %u.", + src_stride_numel.size(), dst_stride_numel.size())); for (int64_t i = 0; i < axis; ++i) { if (i < axis) { - PADDLE_ENFORCE_EQ(src_stride_numel[i] / src_stride_numel[axis], - dst_stride_numel[i] / dst_stride_numel[axis], - "src and dst should have the same elements " - "except the specified axis."); + PADDLE_ENFORCE_EQ( + src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis], + platform::errors::InvalidArgument( + "Source and destination tensor should have the same number of " + "elements except the specified axis, but the source elements " + "number is %d, destination elements number is %d.", + src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis])); } else if (i == axis) { continue; } else { - PADDLE_ENFORCE_EQ(src_stride_numel[i], dst_stride_numel[i], - "src and dst should have the same elements " - "except the specified axis."); + PADDLE_ENFORCE_EQ( + src_stride_numel[i], dst_stride_numel[i], + platform::errors::InvalidArgument( + "Source and destination tensor should have the same number of " + "elements except the specified axis, but the source elements " + "number is %d, destination elements number is %d.", + src_stride_numel[i], dst_stride_numel[i])); } } @@ -90,7 +103,8 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, memory::Copy(gpu_place, dst + i * dst_after, gpu_place, src + i * src_after, sizeof(T) * size, cuda_ctx.stream()); #else - PADDLE_THROW("Paddle is not compiled with GPU"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Paddle is not compiled with GPU.")); #endif } } diff --git a/paddle/fluid/operators/top_k_v2_op.cc b/paddle/fluid/operators/top_k_v2_op.cc index cc72d83411f5a34561a75e7e75f98077ee5a4e5d..0e3fcced19ea8eb1580ca93fa9d6616685601f75 100644 --- a/paddle/fluid/operators/top_k_v2_op.cc +++ b/paddle/fluid/operators/top_k_v2_op.cc @@ -32,7 +32,6 @@ class TopkV2Op : public framework::OperatorWithKernel { auto input_dims = ctx->GetInputDim("X"); const int& dim_size = input_dims.size(); - const int k = static_cast(ctx->Attrs().Get("k")); int axis = static_cast(ctx->Attrs().Get("axis")); PADDLE_ENFORCE_EQ((axis < dim_size) && (axis >= (-1 * dim_size)), true, "the axis of topk" @@ -41,8 +40,18 @@ class TopkV2Op : public framework::OperatorWithKernel { if (axis < 0) axis += dim_size; - PADDLE_ENFORCE_GE( - k, 1, "the attribute of k in the topk must >= 1, but received %d .", k); + int k; + auto k_is_tensor = ctx->HasInput("K"); + if (k_is_tensor) { + k = -1; + } else { + k = static_cast(ctx->Attrs().Get("k")); + PADDLE_ENFORCE_EQ(k >= 1, true, + "the attribute of k in the topk must >= 1 or be a " + "Tensor, but received %d .", + k); + } + PADDLE_ENFORCE_GE(input_dims.size(), 1, "input of topk must have >= 1d shape"); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index f8a29a52d7a3d9332b9dcb8189dfd7c1df902faa..db8b2c30501bd7f291b23728a26dcd3ea27e0ec5 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -78,21 +78,35 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { platform::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, - "The rank of X(Input) can't be less than 2."); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::InvalidArgument( + "The rank of X(Input) can't be less than 2, but received rank is %u.", + x_dims.size())); auto w_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(w_dims.size(), 2, "W should be 2-D tensor"); + PADDLE_ENFORCE_EQ( + w_dims.size(), 2, + platform::errors::InvalidArgument( + "Input W should be a 2-D tensor, but its actual dimension is %u.", + w_dims.size())); int output_channel = ctx->Attrs().Get("OutputChannel"); int input_channel = ctx->Attrs().Get("InputChannel"); int kernel_h = ctx->Attrs().Get("KernelH"); int kernel_w = ctx->Attrs().Get("KernelW"); - PADDLE_ENFORCE_EQ(w_dims[0], output_channel, - "W dim[0] should be equal to OutputChannel"); + PADDLE_ENFORCE_EQ( + w_dims[0], output_channel, + platform::errors::InvalidArgument( + "Input W's dimension[0] should be equal to OutputChannel, the " + "dimension[0] is %d, OutputChannel is %d.", + w_dims[0], output_channel)); PADDLE_ENFORCE_EQ( w_dims[1], input_channel * kernel_h * kernel_w, - "W dim[1] should be equal to InputChannel * StrideH * StrideW"); + platform::errors::InvalidArgument( + "Input W's dimension[1] should be equal to InputChannel * StrideH * " + "StrideW, the dimension[1] is %d, expected value is %d.", + w_dims[1], input_channel * kernel_h * kernel_w)); if (ctx->IsRuntime()) { framework::Variable* x_var = @@ -103,10 +117,14 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { platform::errors::InvalidArgument("The Input(X) Tensor of VarConv2dOP " "does not contain LoD information.")); - PADDLE_ENFORCE_GE(x_lod.size(), 1, "The Input(X)'s lod info is corrupted."); - PADDLE_ENFORCE_EQ( - x_dims[0], static_cast(x_lod[0].back()), - "The Input(X)'s lod info mismatches the actual tensor shape."); + PADDLE_ENFORCE_GE(x_lod.size(), 1, + platform::errors::InvalidArgument( + "The Input(X)'s lod info is corrupted.")); + PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod[0].back()), + platform::errors::InvalidArgument( + "The Input(X)'s lod info mismatches the actual " + "tensor shape, input lod is %s, tensor shape is %s.", + x_lod, x_dims)); framework::Variable* row_var = BOOST_GET(framework::Variable*, ctx->GetInputVarPtrs("ROW")[0]); diff --git a/paddle/fluid/platform/cuda_profiler.h b/paddle/fluid/platform/cuda_profiler.h index 957bdf1e698d0aedb86c5b0cb732ab545c260bcc..a9382f2c8adcb18e320ef44086a312f89c03ad09 100644 --- a/paddle/fluid/platform/cuda_profiler.h +++ b/paddle/fluid/platform/cuda_profiler.h @@ -24,7 +24,11 @@ namespace platform { void CudaProfilerInit(std::string output_file, std::string output_mode, std::string config_file) { - PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv"); + PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv", + platform::errors::InvalidArgument( + "Unsupported cuda profiler output mode, expect `kvp` or " + "`csv`, but received `%s`.", + output_mode)); cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; PADDLE_ENFORCE_CUDA_SUCCESS( cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode)); diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index af8798a4b7cf5a8832ce9345cad45ce3096484e4..9116edd01b040e793d23c76a04b2c93ed4d2586b 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -521,3 +521,18 @@ DEFINE_int32( DEFINE_bool(sort_sum_gradient, false, "Sum gradients by the reverse order of " "the forward execution sequence."); + +/** + * Performance related FLAG + * Name: max_inplace_grad_add + * Since Version: 2.0.0 + * Value Range: int32, default=0 + * Example: + * Note: The maximum number of inplace grad_add. + */ +DEFINE_int32( + max_inplace_grad_add, 0, + "The maximum number of inplace grad_add. When doing " + "gradient accumulation, if the number of gradients need to that " + "less FLAGS_max_inplace_grad_add, than it will be use several grad_add" + "instead of sum. Default is 0."); diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index 318178d5eb927e45fa6472a695ce57f4b2a058b8..894740e25c018b09f8604006ae06fa5b9dc14bf0 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -62,6 +62,7 @@ DECLARE_bool(use_system_allocator); // others DECLARE_bool(benchmark); DECLARE_int32(inner_op_parallelism); +DECLARE_int32(max_inplace_grad_add); DECLARE_string(tracer_profile_fname); #ifdef PADDLE_WITH_CUDA // cudnn @@ -348,7 +349,7 @@ static void RegisterGlobalVarGetterSetter() { FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb, FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory, FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname, - FLAGS_paddle_num_threads, FLAGS_use_mkldnn); + FLAGS_paddle_num_threads, FLAGS_use_mkldnn, FLAGS_max_inplace_grad_add); #ifdef PADDLE_WITH_CUDA REGISTER_PUBLIC_GLOBAL_VAR( diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index f751136640caad6acd3230bc22cd0e3f0fafe9fb..d3052ebd351ef4844d7563935172ed4b7eb1654c 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -111,6 +111,7 @@ std::map> op_passing_outs_map = { {"fake_quantize_dequantize_moving_average_abs_max", {"Out", "OutScale", "OutAccum", "OutState"}}, {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}}, + {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}}, {"check_finite_and_unscale", {"Out", "FoundInfinite"}}, {"update_loss_scaling", {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}}, diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 330254ecaafd29c00e8942765956ea065d2bb7cf..04087cb241c9cd4975773e646bc0ef6e1287518f 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include #include @@ -22,6 +23,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" @@ -2528,6 +2530,10 @@ All parameter, weight, gradient are variables in Paddle. "enable_inplace", [](const BuildStrategy &self) { return self.enable_inplace_; }, [](BuildStrategy &self, bool b) { self.enable_inplace_ = b; }) + .def_property( + "enable_addto", + [](const BuildStrategy &self) { return self.enable_addto_; }, + [](BuildStrategy &self, bool b) { self.enable_addto_ = b; }) .def_property( "fuse_all_reduce_ops", [](const BuildStrategy &self) { diff --git a/paddle/fluid/train/CMakeLists.txt b/paddle/fluid/train/CMakeLists.txt index 235d92ac4f9e88947cea04425b0916b8a0290979..d587081fbac8a27df18bdacba3d94f6adcd3b171 100644 --- a/paddle/fluid/train/CMakeLists.txt +++ b/paddle/fluid/train/CMakeLists.txt @@ -26,7 +26,7 @@ function(train_test TARGET_NAME) ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.train.model/) endif() set_tests_properties(test_train_${TARGET_NAME}${arg} - PROPERTIES DEPENDS test_${TARGET_NAME}) + PROPERTIES FIXTURES_REQUIRED test_${TARGET_NAME}_infer_model) if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_train_${TARGET_NAME}${arg} PROPERTIES TIMEOUT 150) diff --git a/paddle/fluid/train/demo/README.md b/paddle/fluid/train/demo/README.md index bd53ab4b0c023b2591d792b504ab496a42d2835d..8a44c25aea9a0d7133ef915815d5e60227bd3e54 100644 --- a/paddle/fluid/train/demo/README.md +++ b/paddle/fluid/train/demo/README.md @@ -7,7 +7,7 @@ # WITH_MKLDNN=ON|OFF PADDLE_LIB=/paddle/lib/dir -cmake .. -DFLUID_INSTALL_DIR=$PADDLE_LIB \ +cmake .. -DPADDLE_INSTALL_DIR=$PADDLE_LIB \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_GPU=OFF \ -DWITH_STYLE_CHECK=OFF \ @@ -41,7 +41,7 @@ cd build # WITH_MKLDNN=ON|OFF PADDLE_LIB=/paddle/lib/dir -# PADDLE_LIB is the same with FLUID_INSTALL_DIR when building the lib +# PADDLE_LIB is the same with PADDLE_INSTALL_DIR when building the lib cmake .. -DPADDLE_LIB=$PADDLE_LIB \ -DWITH_MKLDNN=OFF \ -DWITH_MKL=OFF diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc index 1087f5672459506cc7b824127cd822c0df7ba566..1ef98720f83697715c05e868177faba489fd8760 100644 --- a/paddle/fluid/train/demo/demo_trainer.cc +++ b/paddle/fluid/train/demo/demo_trainer.cc @@ -29,7 +29,9 @@ namespace train { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -70,7 +72,8 @@ int main() { } } - PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + PADDLE_ENFORCE_NE(loss_name, "", + platform::errors::NotFound("Loss name is not found.")); // init all parameters executor.Run(*startup_program, &scope, 0); diff --git a/paddle/fluid/train/demo/run.sh b/paddle/fluid/train/demo/run.sh index f7efb3b3b7d5d9bf45e4b728006d7e24daa4be74..2955e7574daa2d2e41bbade95c3c213917d07d4f 100755 --- a/paddle/fluid/train/demo/run.sh +++ b/paddle/fluid/train/demo/run.sh @@ -14,12 +14,12 @@ function download() { download # build demo trainer -fluid_install_dir=${PADDLE_ROOT}/build/fluid_install_dir +paddle_install_dir=${PADDLE_ROOT}/build/paddle_install_dir mkdir -p build cd build rm -rf * -cmake .. -DPADDLE_LIB=$fluid_install_dir \ +cmake .. -DPADDLE_LIB=$paddle_install_dir \ -DWITH_MKLDNN=$TURN_ON_MKL \ -DWITH_MKL=$TURN_ON_MKL make diff --git a/paddle/fluid/train/imdb_demo/README.md b/paddle/fluid/train/imdb_demo/README.md index ecc985e13f8a7a2e9d2da037b98ccd2d1574794c..28fd66710f80dda06b1c87266362cb969b42534c 100644 --- a/paddle/fluid/train/imdb_demo/README.md +++ b/paddle/fluid/train/imdb_demo/README.md @@ -11,7 +11,7 @@ PADDLE_ROOT=./Paddle cd Paddle mkdir build cd build -cmake -DFLUID_INFERENCE_INSTALL_DIR=$PADDLE_ROOT \ +cmake -DPADDLE_INFERENCE_INSTALL_DIR=$PADDLE_ROOT \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_PYTHON=OFF \ -DWITH_MKL=OFF \ @@ -40,7 +40,7 @@ see: [IMDB Dataset of 50K Movie Reviews | Kaggle](https://www.kaggle.com/lakshmi mkdir build cd build rm -rf * - PADDLE_LIB=path/to/Paddle/build/fluid_install_dir + PADDLE_LIB=path/to/Paddle/build/paddle_install_dir cmake .. -DPADDLE_LIB=$PADDLE_LIB -DWITH_MKLDNN=OFF -DWITH_MKL=OFF make ``` diff --git a/paddle/fluid/train/imdb_demo/demo_trainer.cc b/paddle/fluid/train/imdb_demo/demo_trainer.cc index d45edd563f03d7a1b156d063d5e7296290d0eaba..a08069a57ca824f307b4bf8836237f573ab3c429 100644 --- a/paddle/fluid/train/imdb_demo/demo_trainer.cc +++ b/paddle/fluid/train/imdb_demo/demo_trainer.cc @@ -45,7 +45,9 @@ namespace train { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -98,7 +100,11 @@ int main(int argc, char* argv[]) { file_vec.push_back(filename); } } - PADDLE_ENFORCE_GE(file_vec.size(), 1, "At least one file to train"); + PADDLE_ENFORCE_GE( + file_vec.size(), 1, + platform::errors::InvalidArgument( + "At least one file to train, but received number of file is %d.", + file_vec.size())); paddle::framework::InitDevices(false); const auto cpu_place = paddle::platform::CPUPlace(); paddle::framework::Executor executor(cpu_place); @@ -148,7 +154,9 @@ int main(int argc, char* argv[]) { const std::vector readers = dataset_ptr->GetReaders(); PADDLE_ENFORCE_EQ(readers.size(), 1, - "readers num should be equal to thread num"); + platform::errors::InvalidArgument( + "Readers num(%d) should be equal to thread num(1).", + readers.size())); readers[0]->SetPlace(paddle::platform::CPUPlace()); const std::vector& input_feed_names = readers[0]->GetUseSlotAlias(); diff --git a/paddle/fluid/train/test_train_recognize_digits.cc b/paddle/fluid/train/test_train_recognize_digits.cc index 45c438e8925b4e0a88e61ad509b88cd6226773a4..e7b698e1a34e267e392d696b67b92cd2e8c23f3b 100644 --- a/paddle/fluid/train/test_train_recognize_digits.cc +++ b/paddle/fluid/train/test_train_recognize_digits.cc @@ -51,7 +51,8 @@ void Train() { } } - PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + PADDLE_ENFORCE_NE(loss_name, "", + platform::errors::NotFound("Loss name is not found.")); // prepare data auto x_var = scope.Var("img"); diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 9e150763dbb30ec6196ce2e62d28f737f42185fb..524c086c07925c880dfb46a70a1f930686bae867 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -20,14 +20,54 @@ rem Paddle CI Task On Windows Platform rem ================================================= rem -------clean up environment----------- -wmic process where name="op_function_generator.exe" call terminate 2>NUL set work_dir=%cd% -mkdir build +wmic process where name="op_function_generator.exe" call terminate 2>NUL + +rem ------initialize common variable------ +if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" +if not defined BRANCH set BRANCH=develop +if not defined TENSORRT_ROOT set TENSORRT_ROOT="C:/TensorRT-5.1.5.0" +if not defined WITH_MKL set WITH_MKL=ON +if not defined WITH_GPU set WITH_GPU=OFF +if not defined WITH_AVX set WITH_AVX=ON +if not defined WITH_TESTING set WITH_TESTING=ON +if not defined WITH_PYTHON set WITH_PYTHON=ON +if not defined ON_INFER set ON_INFER=ON +if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON +if not defined WITH_STATIC_LIB set WITH_STATIC_LIB=ON +if not defined WITH_CACHE set WITH_CACHE=ON +if not defined WITH_TPCACHE set WITH_TPCACHE=ON + + +rem -------set cache build work directory----------- +if "%WITH_CACHE%"=="OFF" ( + rmdir build /s/q + goto :mkbuild +) + +for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%# +set day_now=%datetime:~6,2% +set day_before=-1 +set /p day_before= day.txt + type day.txt + rmdir build /s/q +) +git diff origin/develop --stat --name-only | findstr "cmake CMakeLists.txt paddle_build.bat" +if %ERRORLEVEL% EQU 0 ( + rmdir build /s/q +) + +:mkbuild +if not exist build ( + mkdir build +) cd /d build -tree . +dir . dir paddle\fluid\pybind\Release -rem ------initialize the virtual environment------ +rem ------initialize the python environment------ if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37 set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH% @@ -38,7 +78,7 @@ rem %PYTHON_EXECUTABLE% -m pip install virtualenv rem %PYTHON_EXECUTABLE% -m virtualenv paddle_winci rem call paddle_winci\Scripts\activate.bat -rem ------pre install requirement---------- +rem ------pre install python requirement---------- where python where pip pip install --upgrade pip --user @@ -62,15 +102,6 @@ set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 :: set maximum cache size to 20G clcache.exe -M 21474836480 -rem ------initialize common variable------ -if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" -if not defined BRANCH set BRANCH=develop -if not defined WITH_AVX set WITH_AVX=ON -if not defined WITH_TESTING set WITH_TESTING=ON -if not defined WITH_PYTHON set WITH_PYTHON=ON -if not defined ON_INFER set ON_INFER=ON -if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON -if not defined WITH_TPCACHE set WITH_TPCACHE=ON rem ------set cache third_party------ set cache_dir=%work_dir:Paddle=cache% @@ -111,6 +142,7 @@ exit /b 1 :CASE_wincheck_mkl set WITH_MKL=ON set WITH_GPU=OFF +set MSVC_STATIC_CRT=ON call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error @@ -122,11 +154,13 @@ goto:success :CASE_wincheck_openblas set WITH_MKL=OFF set WITH_GPU=ON +set MSVC_STATIC_CRT=OFF rem Temporarily turn off WITH_INFERENCE_API_TEST on GPU due to compile hang set WITH_INFERENCE_API_TEST=OFF call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error +:: call :test_inference || goto test_inference_error goto:success rem "Other configurations are added here" @@ -145,12 +179,14 @@ set start=%start:~4,10% echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ -DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ -DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% goto:eof :cmake_error @@ -213,10 +249,10 @@ echo ======================================== for /F %%# in ('wmic os get localdatetime^|findstr 20') do set end=%%# set end=%end:~4,10% call :timestamp "%start%" "%end%" "Build" -tree /F %cd%\fluid_inference_install_dir\paddle -%cache_dir%\tools\busybox64.exe du -h -d 0 %cd%\fluid_inference_install_dir\paddle\lib > lib_size.txt +tree /F %cd%\paddle_inference_install_dir\paddle +%cache_dir%\tools\busybox64.exe du -h -d 0 %cd%\paddle_inference_install_dir\paddle\lib > lib_size.txt set /p libsize=< lib_size.txt -for /F %%i in ("%libsize%") do echo "Windows FLuid_Inference Size: %%i" +for /F %%i in ("%libsize%") do echo "Windows Paddle_Inference Size: %%i" %cache_dir%\tools\busybox64.exe du -h -d 0 %cd%\python\dist > whl_size.txt set /p whlsize=< whl_size.txt for /F %%i in ("%whlsize%") do echo "Windows PR whl Size: %%i" @@ -255,7 +291,9 @@ dir %THIRD_PARTY_PATH:/=\%\install\mklml\lib dir %THIRD_PARTY_PATH:/=\%\install\mkldnn\bin dir %THIRD_PARTY_PATH:/=\%\install\warpctc\bin -set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% +set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;^ +%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;^ +%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% ctest.exe --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4 goto:eof @@ -278,7 +316,7 @@ set end=%end:~4,10% call :timestamp "%start%" "%end%" "TestCases Total" cd %work_dir%\paddle\fluid\inference\api\demo_ci -%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo +%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo %TENSORRT_ROOT%/include %TENSORRT_ROOT%/lib %MSVC_STATIC_CRT% goto:eof :test_inference_error @@ -418,6 +456,7 @@ taskkill /f /im rc.exe 2>NUL wmic process where name="op_function_generator.exe" call terminate 2>NUL taskkill /f /im python.exe 2>NUL call paddle_winci\Scripts\deactivate.bat 2>NUL +del %PADDLE_WHL_FILE_WIN% taskkill /f /im python.exe 2>NUL echo Windows CI run successfully! exit /b 0 diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index ac89116fc499d456e1fab8db030eda1c8fce9de2..69303013d2a41a049276c0d1b03b9d902b555d23 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -140,18 +140,18 @@ function cmake_base() { if [ "$1" != "" ]; then echo "using python abi: $1" if [ "$1" == "cp27-cp27m" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.15-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.15-ucs4/lib:} export PATH=/opt/python/cp27-cp27m/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.15-ucs2/lib/libpython2.7.so" pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp27-cp27mu" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.15-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.15-ucs2/lib:} export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.15-ucs4/lib/libpython2.7.so" pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp27-cp27m-gcc82" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.15-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.15-ucs4/lib:} @@ -362,12 +362,12 @@ function build_size() { Calculate /paddle/build size and PR whl size ============================================ EOF - if [ "$1" == "fluid_inference" ]; then + if [ "$1" == "paddle_inference" ]; then cd ${PADDLE_ROOT}/build - cp -r fluid_inference_install_dir fluid_inference - tar -czf fluid_inference.tgz fluid_inference - buildSize=$(du -h --max-depth=0 ${PADDLE_ROOT}/build/fluid_inference.tgz |awk '{print $1}') - echo "FLuid_Inference Size: $buildSize" + cp -r paddle_inference_install_dir paddle_inference + tar -czf paddle_inference.tgz paddle_inference + buildSize=$(du -h --max-depth=0 ${PADDLE_ROOT}/build/paddle_inference.tgz |awk '{print $1}') + echo "Paddle_Inference Size: $buildSize" else SYSTEM=`uname -s` if [ "$SYSTEM" == "Darwin" ]; then @@ -1446,7 +1446,7 @@ EOF fi endTime_s=`date +%s` echo "Build Time: $[ $endTime_s - $startTime_s ]s" - build_size "fluid_inference" + build_size "paddle_inference" } function tar_fluid_lib() { @@ -1456,10 +1456,10 @@ function tar_fluid_lib() { ======================================== EOF cd ${PADDLE_ROOT}/build - cp -r fluid_install_dir fluid + cp -r paddle_install_dir fluid tar -czf fluid.tgz fluid - cp -r fluid_inference_install_dir fluid_inference - tar -czf fluid_inference.tgz fluid_inference + cp -r paddle_inference_install_dir paddle_inference + tar -czf paddle_inference.tgz paddle_inference } function test_fluid_lib() { diff --git a/paddle/scripts/windows_build/build.bat b/paddle/scripts/windows_build/build.bat index 65d44877d12554c73f7d93dafb9cecb9fb55e60a..6f99c23ccd262f3cf15b1cac6b1c56a9cc2c79d8 100644 --- a/paddle/scripts/windows_build/build.bat +++ b/paddle/scripts/windows_build/build.bat @@ -118,8 +118,8 @@ call:Build echo PACKAGE INFERENCE LIBRARY mkdir inference_dist -%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/fluid_inference_install_dir', 'zip', root_dir='fluid_inference_install_dir')" -%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/fluid_install_dir', 'zip', root_dir='fluid_install_dir')" +%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/paddle_inference_install_dir', 'zip', root_dir='paddle_inference_install_dir')" +%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/paddle_install_dir', 'zip', root_dir='paddle_install_dir')" echo BUILD INFERENCE LIBRARY COMPLETE goto :END diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 661471599cb080da7a65c11fecc339830f2c00ee..e749cf88b6a49846b678c1c4258d2b3c2a8c01a4 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -90,6 +90,7 @@ from .tensor.linalg import cholesky #DEFINE_ALIAS # from .tensor.linalg import tensordot #DEFINE_ALIAS from .tensor.linalg import bmm #DEFINE_ALIAS from .tensor.linalg import histogram #DEFINE_ALIAS +from .tensor.linalg import mv #DEFINE_ALIAS from .tensor.logic import equal #DEFINE_ALIAS from .tensor.logic import greater_equal #DEFINE_ALIAS from .tensor.logic import greater_than #DEFINE_ALIAS @@ -203,7 +204,6 @@ from .tensor.math import prod #DEFINE_ALIAS from .tensor.random import standard_normal from .tensor.random import normal from .tensor.random import uniform #DEFINE_ALIAS -from .tensor.random import shuffle #DEFINE_ALIAS from .tensor.random import randn #DEFINE_ALIAS from .tensor.random import rand #DEFINE_ALIAS from .tensor.random import randint #DEFINE_ALIAS @@ -276,3 +276,5 @@ from .hapi import callbacks from .hapi import summary import paddle.text import paddle.vision + +disable_static() diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index 969ad3c922f9c15b2e39f71ae4359cd3d2fcdcce..bb60c58211c237c56bc89741e5d3cde11aa68e81 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -37,7 +37,7 @@ from .common import download import tarfile import scipy.io as scio from paddle.dataset.image import * -from paddle.reader import * +from paddle.reader import map_readers, xmap_readers from paddle import compat as cpt import os import numpy as np diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index aeb8cac98e23a1a8eda7df1708646d089c1da7bf..d00faac838504f5d68e9d44d9ffa9f25c7bf2ee5 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -180,7 +180,7 @@ class Fleet(object): raise ValueError( "`role_maker` should be subclass of `RoleMakerBase`, but got {}". format(type(role_maker))) - self._role_maker.generate_role() + self._role_maker._generate_role() self.strategy_compiler = StrategyCompiler() if paddle.fluid.framework.in_dygraph_mode(): @@ -207,7 +207,7 @@ class Fleet(object): fleet.is_first_worker() """ - return self._role_maker.is_first_worker() + return self._role_maker._is_first_worker() def worker_index(self): """ @@ -224,7 +224,7 @@ class Fleet(object): fleet.worker_index() """ - return self._role_maker.worker_index() + return self._role_maker._worker_index() def worker_num(self): """ @@ -241,7 +241,7 @@ class Fleet(object): fleet.worker_num() """ - return self._role_maker.worker_num() + return self._role_maker._worker_num() def is_worker(self): """ @@ -259,7 +259,7 @@ class Fleet(object): fleet.is_worker() """ - return self._role_maker.is_worker() + return self._role_maker._is_worker() def worker_endpoints(self, to_string=False): """ @@ -277,9 +277,9 @@ class Fleet(object): """ if to_string: - return ",".join(self._role_maker.get_trainer_endpoints()) + return ",".join(self._role_maker._get_trainer_endpoints()) else: - return self._role_maker.get_trainer_endpoints() + return self._role_maker._get_trainer_endpoints() def server_num(self): """ @@ -294,7 +294,7 @@ class Fleet(object): fleet.init() fleet.server_num() """ - return len(self._role_maker.get_pserver_endpoints()) + return len(self._role_maker._get_pserver_endpoints()) def server_index(self): """ @@ -311,7 +311,7 @@ class Fleet(object): fleet.server_index() """ - return self._role_maker.server_index() + return self._role_maker._server_index() def server_endpoints(self, to_string=False): """ @@ -330,9 +330,9 @@ class Fleet(object): """ if to_string: - return ",".join(self._role_maker.get_pserver_endpoints()) + return ",".join(self._role_maker._get_pserver_endpoints()) else: - return self._role_maker.get_pserver_endpoints() + return self._role_maker._get_pserver_endpoints() def is_server(self): """ @@ -350,7 +350,7 @@ class Fleet(object): fleet.is_server() """ - return self._role_maker.is_server( + return self._role_maker._is_server( ) or self._role_maker._is_heter_worker() def set_util(self, util): diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index d36c06047f5cafbf0f3ec31e13c8b15c2b88528a..f66f013e4dbaadd534d6859b7ba6530779c82a3b 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -361,19 +361,19 @@ class RoleMakerBase(object): self._heter_trainer_device = "CPU" self._is_heter_parameter_server_mode = False - def is_worker(self): + def _is_worker(self): """ return is_worker() of current process """ raise NotImplementedError("Please implement this method in child class") - def is_server(self): + def _is_server(self): """ return is_server() of current process """ raise NotImplementedError("Please implement this method in child class") - def is_first_worker(self): + def _is_first_worker(self): """ Check whether the node is the first instance of worker. Returns: @@ -382,7 +382,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def worker_num(self): + def _worker_num(self): """ Get current total worker number. @@ -391,7 +391,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def server_num(self): + def _server_num(self): """ Get current total server number. @@ -400,7 +400,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def worker_index(self): + def _worker_index(self): """ Get current worker id. @@ -409,7 +409,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def server_index(self): + def _server_index(self): """ Get current server id. @@ -418,7 +418,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def role_id(self): + def _role_id(self): """ Get current id. @@ -427,7 +427,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def node_num(self): + def _node_num(self): """ Get the training node number Returns: @@ -435,13 +435,13 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def get_trainer_endpoints(self): + def _get_trainer_endpoints(self): """ return trainer endpoints """ return self._worker_endpoints - def get_pserver_endpoints(self): + def _get_pserver_endpoints(self): """ return pserver endpoints """ @@ -543,90 +543,93 @@ class PaddleCloudRoleMaker(RoleMakerBase): def _all_reduce(self, input, mode="sum", comm_world="worker"): return self._gloo.all_reduce(input, mode, comm_world) - def is_worker(self): + def _is_worker(self): """ whether current process is worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.WORKER - def is_server(self): + def _is_server(self): """ whether current process is server """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.SERVER - def is_first_worker(self): + def _is_first_worker(self): """ whether current process is worker of rank 0 """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.WORKER and self._current_id == 0 - def worker_index(self): + def _worker_index(self): """ get index of current worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._current_id - def server_index(self): + def _server_index(self): """ get index of current server """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._current_id - def role_id(self): + def _role_id(self): """ get index of current node """ + if not self._role_is_generated: + self._generate_role() return self._current_id - def worker_num(self): + def _worker_num(self): """ retrun the current number of worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._trainers_num - def server_num(self): + def _server_num(self): """ return the current number of server """ if not self._role_is_generated: - self.generate_role() - return len(self.get_pserver_endpoints()) + self._generate_role() + return len(self._get_pserver_endpoints( + )) if self._get_pserver_endpoints() is not None else 0 - def node_num(self): + def _node_num(self): """ return the training node number """ if not self._role_is_generated: - self.generate_role() - return self._node_num + self._generate_role() + return self._nodes_num - def get_trainer_endpoints(self): + def _get_trainer_endpoints(self): """ get endpoint of all trainers """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._worker_endpoints - def get_pserver_endpoints(self): + def _get_pserver_endpoints(self): """ get endpoint of all pservers """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._server_endpoints def _is_non_distributed(self): @@ -635,7 +638,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): (use python-run to launch fleet-code directly) """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._non_distributed def _heter_worker_num(self): @@ -643,7 +646,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): get heter worker nums """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._heter_trainers_num def _is_heter_worker(self): @@ -651,25 +654,9 @@ class PaddleCloudRoleMaker(RoleMakerBase): whether current process is heter worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.HETER_WORKER - def _get_rank(self): - """ - get current rank in all workers and pservers - """ - if not self._role_is_generated: - self.generate_role() - return self._rank - - def _get_size(self): - """ - get total num of all workers and pservers - """ - if not self._role_is_generated: - self.generate_role() - return self._size - def _ps_env(self): try: # Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set @@ -682,7 +669,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._trainers_num = 1 self._role = Role.WORKER self._current_id = 0 - self._node_num = 1 + self._nodes_num = 1 self._heter_trainers_num = 0 self._heter_trainer_endpoints = None self._non_distributed = True @@ -757,7 +744,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._trainers_num = trainers_num self._role = role self._current_id = current_id - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) self._heter_trainers_num = heter_trainers_num self._heter_trainer_endpoints = heter_trainer_eplist @@ -776,7 +763,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._non_distributed = True self._worker_endpoints = self._worker_endpoints.split(",") self._trainers_num = len(self._worker_endpoints) - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) def _gloo_init(self): @@ -832,13 +819,13 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._gloo.init( rendezvous=rendezvous_type, role=self._role, - role_id=self.role_id(), - worker_num=self.worker_num(), - server_num=self.server_num(), + role_id=self._role_id(), + worker_num=self._worker_num(), + server_num=self._server_num(), need_init_all=need_init_all, kwargs=kwargs) - def generate_role(self): + def _generate_role(self): """ generate role for role maker """ @@ -874,7 +861,7 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): self._cur_endpoint = self._worker_endpoints[self._current_id] elif self._role == Role.SERVER: self._cur_endpoint = self._server_endpoints[self._current_id] - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) def _user_defined_collective_env(self): @@ -882,10 +869,10 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): self._current_id = self._kwargs.get("current_id") self._trainers_num = len(self._worker_endpoints) self._training_role = Role.WORKER - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) - def generate_role(self): + def _generate_role(self): """ generate role for role maker """ diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index e822c3c92f47396388079dda649d299872cfc96d..efaa854c0879ddb57c7746cede68047ff82931a0 100644 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -237,8 +237,8 @@ class UtilBase(object): if not isinstance(files, list): raise TypeError("files should be a list of file need to be read.") - trainer_id = self.role_maker.worker_index() - trainers = self.role_maker.worker_num() + trainer_id = self.role_maker._worker_index() + trainers = self.role_maker._worker_num() remainder = len(files) % trainers blocksize = int(len(files) / trainers) @@ -280,7 +280,7 @@ class UtilBase(object): fleet_util._set_role_maker(role) fleet_util.print_on_rank("I'm worker 0", 0) """ - if self.role_maker.worker_index() != rank_id: + if self.role_maker._worker_index() != rank_id: return print(message) diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 4b629bc35ce59da9af0b72a2ab4ee44e587a86f1..d63c9f9184c0eb9aafec73df09b225d598f3413f 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -156,7 +156,7 @@ def get_cluster_from_args(args, gpus): else: start_port = 6070 if os.environ.get('FLAGS_START_PORT') is not None: - start_port = os.environ.get('FLAGS_START_PORT') + start_port = int(os.environ.get('FLAGS_START_PORT')) free_ports = [x for x in range(start_port, start_port + len(gpus))] @@ -463,9 +463,8 @@ def launch(): cuda_device_num = 0 if len(has_ps_args) > 0 or cuda_device_num == 0: - logger.info( - "Run parameter-sever cpu mode. pserver arguments:{}, cuda count:{}". - format(has_ps_args, cuda_device_num)) + logger.info("Run parameter-sever cpu mode. pserver arguments:{}".format( + has_ps_args)) launch_ps(args) elif len(has_collective_args) > 0: logger.info("Run collective gpu mode. gpu arguments:{}, cuda count:{}". diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 17d3b96cf4466e560381c20fe265b39cac6697f0..7540cd9f4c1f352804550561c6f75b63104f9381 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -435,9 +435,17 @@ def start_local_trainers(cluster, len(pod.trainers), pretty_print_envs(proc_env, ("Distributed Envs", "Value")))) + logger.info( + "details abouts PADDLE_TRAINER_ENDPOINTS can be found in {}/endpoints.log.". + format(log_dir)) fn = None if log_dir is not None: os.system("mkdir -p {}".format(log_dir)) + if os.path.exists("%s/endpoints.log" % log_dir): + os.system("rm -f {}/endpoints.log".format(log_dir)) + with open("%s/endpoints.log" % log_dir, "w") as f: + f.write("PADDLE_TRAINER_ENDPOINTS: \n") + f.write("\n".join(cluster.trainers_endpoints())) fn = open("%s/workerlog.%d" % (log_dir, idx), "a") proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn) else: diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py index 70b010978bb4d5be98310efa8ff04a3f853602ab..8ff4114bf8eda4080c252a736d7b6ee69990faa4 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/common.py +++ b/python/paddle/distributed/fleet/meta_optimizers/common.py @@ -57,12 +57,12 @@ class CollectiveHelper(object): if startup_program is None: self.startup_program = fluid.default_startup_program() - endpoints = self.role_maker.get_trainer_endpoints() - current_endpoint = endpoints[self.role_maker.worker_index()] + endpoints = self.role_maker._get_trainer_endpoints() + current_endpoint = endpoints[self.role_maker._worker_index()] for ring_id in range(self.nrings): self._init_communicator( self.startup_program, current_endpoint, endpoints, - self.role_maker.worker_index(), ring_id, self.wait_port) + self.role_maker._worker_index(), ring_id, self.wait_port) self._broadcast_params() def _init_communicator(self, program, current_endpoint, endpoints, rank, diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index 3f6ed1ed2f23d4595b3aadff6f259f9e27f129b2..6806a479d30f467bd8b6f6d5c6832dda63af4055 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -47,7 +47,7 @@ class DGCOptimizer(MetaOptimizerBase): sparsity=configs['sparsity'], parameter_list=opt._parameter_list, use_nesterov=opt._use_nesterov, - num_trainers=self.role_maker.worker_num(), + num_trainers=self.role_maker._worker_num(), regularization=opt.regularization, grad_clip=opt._grad_clip, name=opt._name) @@ -60,7 +60,7 @@ class DGCOptimizer(MetaOptimizerBase): if not isinstance(self.inner_opt, Momentum): logging.warn("dgc only works on Momentum optimizer") return False - if self.role_maker.worker_num() <= 1: + if self.role_maker._worker_num() <= 1: logging.warn("dgc only works on multi cards") return False diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 6c1cc3d7a9769a5c61997ab761a5458b7e8df4a3..0ad9e5680eab4a1beb340359e1af44fce9217097 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -50,12 +50,12 @@ class GraphExecutionOptimizer(MetaOptimizerBase): # should fix the variable def _setup_nccl_op(self, startup_program, main_program, build_strategy): - trainer_endpoints = self.role_maker.get_trainer_endpoints() + trainer_endpoints = self.role_maker._get_trainer_endpoints() trainers = trainer_endpoints - trainer_id = self.role_maker.worker_index() - current_endpoint = self.role_maker.get_trainer_endpoints()[trainer_id] + trainer_id = self.role_maker._worker_index() + current_endpoint = self.role_maker._get_trainer_endpoints()[trainer_id] trainer_endpoints_env = ",".join(trainer_endpoints) - trainers_num = self.role_maker.worker_num() + trainers_num = self.role_maker._worker_num() nccl_id_var = startup_program.global_block().create_var( name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) for i in range(1, build_strategy.nccl_comm_num): @@ -127,8 +127,8 @@ class GraphExecutionOptimizer(MetaOptimizerBase): local_build_strategy.enable_sequential_execution = True exe_strategy = self.user_defined_strategy.execution_strategy - worker_num = self.role_maker.worker_num() - node_num = self.role_maker.node_num() + worker_num = self.role_maker._worker_num() + node_num = self.role_maker._node_num() if self.role_maker._is_collective: assert worker_num >= 1, "nccl2 worker_num must >= 1, now:{}" % worker_num @@ -170,9 +170,9 @@ class GraphExecutionOptimizer(MetaOptimizerBase): # TODO(guru4elephant): should be an independent optimizer self._setup_nccl_op(startup_program, main_program, local_build_strategy) - local_build_strategy.num_trainers = self.role_maker.worker_num() - local_build_strategy.trainer_id = self.role_maker.worker_index() - local_build_strategy.trainers_endpoints = self.role_maker.get_trainer_endpoints( + local_build_strategy.num_trainers = self.role_maker._worker_num() + local_build_strategy.trainer_id = self.role_maker._worker_index() + local_build_strategy.trainers_endpoints = self.role_maker._get_trainer_endpoints( ) local_build_strategy.enable_backward_optimizer_op_deps = True diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 4ebac20888dd708bd90f91abdef4a472bac2847c..9f094978d842a8ba194742b527dc6f3cd19234cd 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -38,7 +38,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): if not self.user_defined_strategy.localsgd: return False - if self.role_maker.worker_num() <= 1: + if self.role_maker._worker_num() <= 1: return False return isinstance(self.inner_opt, paddle.optimizer.momentum.Momentum) \ @@ -168,7 +168,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ - 'scale': 1.0 / self.role_maker.worker_num(), + 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) sub_block.append_op( @@ -208,7 +208,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): if not self.user_defined_strategy.adaptive_localsgd: return False - if self.role_maker.worker_num() <= 1: + if self.role_maker._worker_num() <= 1: return False return isinstance(self.inner_opt, paddle.optimizer.momentum.Momentum) \ @@ -275,7 +275,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): inputs={'X': [avg_loss]}, outputs={'Out': [avg_loss]}, attrs={ - 'scale': 1.0 / self.role_maker.worker_num(), + 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) @@ -398,7 +398,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ - 'scale': 1.0 / self.role_maker.worker_num(), + 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) sub_block.append_op( diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index 7dc532c86ea681d8479710732ec33e96c58c35d5..dfa765364f357b6e685c3983c73cfb4f1b2cce61 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -31,7 +31,7 @@ class ParameterServerGraphOptimizer(ParameterServerOptimizer): if k_steps < 0: return False - if self.role_maker.is_server(): + if self.role_maker._is_server(): return False if self.role_maker._is_heter_parameter_server_mode: diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index 51d4d343165b9057c803a22aa428081109d7d35f..38ad41f8836b4e8c3b304dbf539b47d5293a8221 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -239,10 +239,10 @@ class ParameterServerOptimizer(MetaOptimizerBase): strategy, self.role_maker) compiled_config.strategy = strategy - if self.role_maker.is_worker() or self.role_maker._is_heter_worker(): + if self.role_maker._is_worker() or self.role_maker._is_heter_worker(): main_program, startup_program = self._build_trainer_programs( compiled_config) - elif self.role_maker.is_server(): + elif self.role_maker._is_server(): main_program, startup_program = self._build_pserver_programs( compiled_config) diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 87fa70779111ea485319f50b58901c605fffa23c..889fec838ed3d6dc83d2c15e92138f49e62f01dd 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -126,11 +126,11 @@ class PipelineOptimizer(MetaOptimizerBase): optimize_ops, params_grads, prog_list = \ self.wrapped_opt.minimize(loss, startup_program, parameter_list, no_grad_set) - if self.role_maker.worker_num() == 1: + if self.role_maker._worker_num() == 1: return optimize_ops, params_grads - endpoints = self.role_maker.get_trainer_endpoints() - current_endpoint = endpoints[self.role_maker.worker_index()] + endpoints = self.role_maker._get_trainer_endpoints() + current_endpoint = endpoints[self.role_maker._worker_index()] self.startup_program = startup_program if startup_program is None: self.startup_program = fluid.default_startup_program() @@ -142,7 +142,7 @@ class PipelineOptimizer(MetaOptimizerBase): self.nranks = nranks self.nrings = len(self.main_program_list) - self.rank = self.role_maker.worker_index() + self.rank = self.role_maker._worker_index() self.endpoints = endpoints self.current_endpoint = current_endpoint diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 5d882e0c122d62296cdbee4bc6dda2093e183d67..6dd4661f00062f55bb834bbee50daf1924a0c87a 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -104,9 +104,9 @@ class ParameterServerRuntime(RuntimeBase): def _init_worker(self): def sync_strategy_envs(): kwargs = {} - kwargs["pserver_endpoints"] = self.role_maker.get_pserver_endpoints( - ) - kwargs["trainer_id"] = self.role_maker.worker_index() + kwargs[ + "pserver_endpoints"] = self.role_maker._get_pserver_endpoints() + kwargs["trainer_id"] = self.role_maker._worker_index() return kwargs def geo_strategy_envs(): @@ -150,7 +150,7 @@ class ParameterServerRuntime(RuntimeBase): return "#".join(init_attrs) kwargs = {} - kwargs["trainers"] = self.role_maker.worker_num() + kwargs["trainers"] = self.role_maker._worker_num() kwargs["sparse_attrs"] = get_sparse_attrs() return kwargs @@ -338,7 +338,7 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": [",".join([str(i) for i in var.shape])], @@ -378,14 +378,15 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": slice_shapes, "slice_varnames": var_ctx.split_varnames(), "remote_varnames": var_ctx.split_varnames(), "is_sparse": True, "endpoints": var_ctx.split_endpoints(), - "pserver_num": len(self.role_maker.get_pserver_endpoints()), + "pserver_num": + len(self.role_maker._get_pserver_endpoints()), "file_path": os.path.join(dirname, var.name) }) @@ -403,7 +404,7 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": slice_shapes, "slice_varnames": slice_varnames, @@ -411,7 +412,7 @@ class ParameterServerRuntime(RuntimeBase): "is_sparse": True, "endpoints": var_ctx.split_endpoints(), "pserver_num": - len(self.role_maker.get_pserver_endpoints()), + len(self.role_maker._get_pserver_endpoints()), "file_path": os.path.join(dirname, var.name) }) @@ -422,7 +423,7 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": [",".join([str(i) for i in var.shape])], diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 9f748b7956f9faa6b1c948d87f0ef4659057a421..e8cc6ce99016075a950f13d9e23f2957c9686471 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -197,6 +197,7 @@ def __bootstrap__(): 'free_when_no_cache_hit', 'call_stack_level', 'sort_sum_gradient', + 'max_inplace_grad_add', ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index d51cacd1a5cad53ef77b325e5380100c537e057e..478fecf74e4013e0d695c68af86a0e39a4a4e845 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -251,12 +251,19 @@ def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): begin_idx = 0 if end_idx is None: end_idx = len(op_descs) - for i in range(begin_idx, end_idx): - op_desc = op_descs[i] - if isinstance(op_desc, tuple): - op_desc = op_desc[0] - op_desc._rename_input(old_name, new_name) - op_desc._rename_output(old_name, new_name) + if isinstance(op_descs, (list, tuple)): + for i in range(begin_idx, end_idx): + op_desc = op_descs[i] + if isinstance(op_desc, tuple): + op_desc = op_desc[0] + op_desc._rename_input(old_name, new_name) + op_desc._rename_output(old_name, new_name) + if isinstance(op_descs, collections.OrderedDict): + for key, value in op_descs.items(): + if isinstance(value, (list, tuple)): + for op_desc in value: + op_desc._rename_input(old_name, new_name) + op_desc._rename_output(old_name, new_name) def _create_op_desc_(op_type, inputs, outputs, attrs): @@ -369,6 +376,41 @@ def _append_grad_suffix_(name): return cpt.to_text(name) + core.grad_var_suffix() +def _accumulate_gradients_by_sum_op_(var_name, renamed_vars, pending_sum_ops, + op_idx): + """ + Use sum op to accumulate_gradients, the gradients are stored in renamed_vars. + """ + if op_idx not in pending_sum_ops.keys(): + pending_sum_ops[op_idx] = [] + pending_sum_ops[op_idx].append( + _create_op_desc_("sum", {"X": renamed_vars[var_name]}, + {"Out": [var_name]}, {"use_mkldnn": False})) + renamed_vars[var_name] = [var_name] + + +def _accumulate_gradients_by_add_ops_(var_name, renamed_vars, pending_sum_ops, + op_idx): + """ + Use several inplace add op to accumulate_gradients, the gradients are stored in renamed_vars. + """ + if op_idx not in pending_sum_ops.keys(): + pending_sum_ops[op_idx] = [] + out_name = renamed_vars[var_name][0] + for i in range(1, len(renamed_vars[var_name])): + x_name = out_name + y_name = renamed_vars[var_name][i] + if i != len(renamed_vars[var_name]) - 1: + out_name = var_name + '@ADD@' + str(i) + else: + out_name = var_name + pending_sum_ops[op_idx].append( + _create_op_desc_("grad_add", {"X": [x_name], + "Y": [y_name]}, {"Out": [out_name]}, + {"use_mkldnn": False})) + renamed_vars[var_name] = [var_name] + + def _addup_repetitive_outputs_(op_descs, block_idx): """ In backward part, an variable may be the output of more than one ops. @@ -376,7 +418,9 @@ def _addup_repetitive_outputs_(op_descs, block_idx): In these cases, the variable should be the accumulation of all the outputs. `sum_op`s are added to implement the accumulate. """ - pending_sum_ops = [] + _MAX_ADD_NUM_ = core.globals()['FLAGS_max_inplace_grad_add'] + #pending_sum_ops = [] + pending_sum_ops = collections.OrderedDict() var_rename_count = collections.defaultdict(int) renamed_vars = collections.defaultdict(list) renamed_var_start_idx = collections.defaultdict(list) @@ -385,10 +429,13 @@ def _addup_repetitive_outputs_(op_descs, block_idx): if "@GRAD" not in var_name: continue if len(renamed_vars[var_name]) > 1: - pending_sum_ops.append((_create_op_desc_( - "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, - {"use_mkldnn": False}), idx)) - renamed_vars[var_name] = [var_name] + if len(renamed_vars[var_name]) > _MAX_ADD_NUM_: + _accumulate_gradients_by_sum_op_(var_name, renamed_vars, + pending_sum_ops, idx) + else: + _accumulate_gradients_by_add_ops_(var_name, renamed_vars, + pending_sum_ops, idx) + for param_idx, param_name in enumerate(op_desc.output_names()): arg_names = op_desc.output(param_name) for arg_idx, var_name in enumerate(arg_names): @@ -440,13 +487,26 @@ def _addup_repetitive_outputs_(op_descs, block_idx): renamed_vars[var_name].append(new_name) for var_name, inputs in six.iteritems(renamed_vars): - if len(inputs) > 1: - pending_sum_ops.append( - (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]}, - {"use_mkldnn": False}), len(op_descs))) + if len(renamed_vars[var_name]) > 1: + if len(renamed_vars[var_name]) > _MAX_ADD_NUM_: + _accumulate_gradients_by_sum_op_(var_name, renamed_vars, + pending_sum_ops, len(op_descs)) + else: + _accumulate_gradients_by_add_ops_(var_name, renamed_vars, + pending_sum_ops, + len(op_descs)) + # sum_op descs are sorted according to their insert position - for p in reversed(pending_sum_ops): - op_descs.insert(p[1], p[0]) + for key, value in collections.OrderedDict( + reversed(list(pending_sum_ops.items()))).items(): + + # NOTE(zhiqiu): Since reversed, the idx of op_descs to be inserted will remains correct. + # For example, [0, 1, 2], and we want to insert 'a' at idx 1, 'b' at idx 2, and the expected result is [0, 1, 'a', 2, 'b']. + # If reversed, we first insert 'b' at idx 2, it becomes [0, 1, 2, 'b'], and then insert 'a' at idx 1, it becomes [0, 1, 'a', 2, 'b']. + # If not reverse, we first insert 'a' at idx 1, it becomes [0, 1, 'a', 2], and then insert 'b' at idx 2, it becomes [0, 1, 'a', 'b', 2]. + idx = key + for i, op in enumerate(value): + op_descs.insert(idx + i, op) return op_descs diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index 7b564b3f837c001673bdd272ba60edf31cde21fb..ac6493b1c2969a8c3319bc8d29983b0ccc3a67d9 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -45,6 +45,7 @@ from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from paddle.fluid import core +from paddle.fluid.param_attr import ParamAttr from paddle.fluid.entry_attr import ProbabilityEntry, CountFilterEntry from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_ @@ -57,7 +58,7 @@ __all__ = [ 'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', 'partial_concat', 'sparse_embedding', 'partial_sum', 'tdm_child', 'rank_attention', 'tdm_sampler', 'batch_fc', '_pull_box_extended_sparse', 'bilateral_slice', - 'correlation' + 'correlation', 'fused_bn_add_act' ] @@ -1625,3 +1626,191 @@ def correlation(x, }, outputs={"Output": output}) return output + + +def fused_bn_add_act(x, + y, + momentum=0.9, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + moving_mean_name=None, + moving_variance_name=None, + act=None, + name=None): + """ + This Op performs batch norm on input x, and adds the result to input y. Then + it performs activation on the sum. The data format of inputs must be NHWC + `[batch, in_height, in_width, in_channels]`. + + Args: + x(Tensor): The rank of input tensor can be 2, 3, 4, 5. The data type + is float16. + y(Tensor): The rank of input tensor can be 2, 3, 4, 5. The data type + is float16. + momentum(float|Tensor, optional): The value used for the moving_mean and + moving_var computation. This should be a float number or a tensor with + shape [1] and data type as float32. The updated formula is: + :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` + :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` + Default is 0.9. + epsilon(float, optional): A value added to the denominator for + numerical stability. Default is 1e-5. + param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale` + of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as param_attr, the name of scale can be set in ParamAttr. + If the Initializer of the param_attr is not set, the parameter is initialized + with Xavier. Default: None. + bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm. + If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. + If the Initializer of the bias_attr is not set, the bias is initialized zero. + Default: None. + moving_mean_name(str, optional): The name of moving_mean which store the global Mean. If it + is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm + will save global mean with the string. + moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. + If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm + will save global variance with the string. + act(string, optional): Activation type, linear|relu|prelu|... + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + def build_program(main_program, startup_program): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm( + input=conv1_1, + act=None, + data_layout='NHWC') + fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) + prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + iters = 5 + batch_size = 16 + support_gpu = fluid.is_compiled_with_cuda() + if support_gpu: + main_program = fluid.Program() + startup_program = fluid.Program() + place = fluid.CUDAPlace(0) + x, y, loss = build_program(main_program, startup_program) + + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + exe = fluid.Executor(place) + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, feed=feeder.feed(data), fetch_list=[loss]) + """ + helper = LayerHelper('fused_bn_add_act', **locals()) + + check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], + 'fused_bn_add_act') + check_variable_and_dtype(y, 'input', ['float16', 'float32', 'float64'], + 'fused_bn_add_act') + bn_param_dtype = core.VarDesc.VarType.FP32 + + x_shape = x.shape + channel_num = x_shape[-1] + param_shape = [channel_num] + + # create parameter + scale = helper.create_parameter( + attr=helper.param_attr, + shape=param_shape, + dtype=bn_param_dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter( + attr=helper.bias_attr, + shape=param_shape, + dtype=bn_param_dtype, + is_bias=True) + mean = helper.create_parameter( + attr=ParamAttr( + name=moving_mean_name, initializer=Constant(0.0), trainable=False), + shape=param_shape, + dtype=bn_param_dtype) + mean.stop_gradient = True + variance = helper.create_parameter( + attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False), + shape=param_shape, + dtype=bn_param_dtype) + variance.stop_gradient = True + + # create output + # mean and mean_out share the same memory + mean_out = mean + # variance and variance out share the same memory + variance_out = variance + saved_mean = helper.create_variable_for_type_inference( + dtype=bn_param_dtype, stop_gradient=True) + saved_variance = helper.create_variable_for_type_inference( + dtype=bn_param_dtype, stop_gradient=True) + reserve_space = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.FP16, stop_gradient=True) + batch_norm_out = helper.create_variable_for_type_inference( + core.VarDesc.VarType.FP16) + + inputs = { + "X": x, + "Z": y, + "Scale": scale, + "Bias": bias, + } + attrs = {"epsilon": epsilon, 'momentum': momentum} + + outputs = { + "Y": batch_norm_out, + "MeanOut": mean_out, + "VarianceOut": variance_out, + "SavedMean": saved_mean, + "SavedVariance": saved_variance, + "ReserveSpace": reserve_space + } + + helper.append_op( + type="fused_bn_add_activation", + inputs=inputs, + outputs=outputs, + attrs=attrs) + + return batch_norm_out diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index 1f301b7148d005d4e3d5d272fd78f78af6dc1e6a..a9f080c514dff078b0068bce262fa177fd0b0db2 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -135,6 +135,7 @@ gray_list = { 'get_tensor_from_selected_rows', 'sign', 'cast', + 'fused_bn_add_activation', } ''' # The set of ops that don't support fp16 calculation diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 0b142ff33de55f36410eb9c23cb75210fc9d6321..0ff166d8dc89ac79c36343df9bc379cb171c36fd 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -69,8 +69,10 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): ] for in_name in op.input_names: - if src_dtype == core.VarDesc.VarType.FP32 and op.type == 'batch_norm': - if in_name != 'X': + if src_dtype == core.VarDesc.VarType.FP32 and op.type in [ + 'batch_norm', 'fused_bn_add_activation' + ]: + if in_name not in {'X', 'Z'}: continue for in_var_name in op.input(in_name): in_var = block.var(in_var_name) @@ -102,7 +104,8 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): op._set_attr('in_dtype', dest_dtype) if src_dtype == core.VarDesc.VarType.FP32 and dest_dtype == core.VarDesc.VarType.FP16: for out_name in op.output_names: - if op.type == 'batch_norm' and out_name != 'Y': + if op.type in ['batch_norm', 'fused_bn_add_activation' + ] and out_name != 'Y': continue for out_var_name in op.output(out_name): out_var = block.var(out_var_name) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 7b276293638189d304e5c33b2cd4497bb4256bab..8d7ebcf4caa53929c5dd97159e63cf3cd02f5636 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -99,7 +99,12 @@ class ImperativeQuantAware(object): self._activation_bits = activation_bits self._moving_rate = moving_rate - quant_type = {'abs_max', 'moving_average_abs_max'} + quant_type = { + 'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max' + } + + assert activation_quantize_type != 'channel_wise_abs_max', \ + "The activation quantization type does not support 'channel_wise_abs_max'." if activation_quantize_type not in quant_type: raise ValueError( "Unknown activation_quantize_type : '%s'. It can only be " @@ -108,8 +113,8 @@ class ImperativeQuantAware(object): if weight_quantize_type not in quant_type: raise ValueError( "Unknown weight_quantize_type: '%s'. It can only be " - "'abs_max' or 'moving_average_abs_max' now." % - (str(weight_quantize_type))) + "'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now." + % (str(weight_quantize_type))) self._activation_quantize_type = activation_quantize_type self._weight_quantize_type = weight_quantize_type diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index e22c980b0a7c6030c5d6a2fbc4fd58d2ec66958a..2e35ac288c7158a220e3b96babb146e28d50a5ee 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -24,7 +24,7 @@ from paddle.fluid.data_feeder import check_variable_and_dtype __all__ = [ 'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D', - 'QuantizedLinear' + 'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax' ] @@ -209,6 +209,89 @@ class FakeQuantAbsMax(layers.Layer): return quant_out +class FakeChannelWiseQuantDequantAbsMax(layers.Layer): + def __init__(self, + name=None, + channel_num=None, + quant_bits=8, + quant_axis=0, + dtype='float32', + quant_on_weight=False): + assert quant_on_weight == True, "Channel_wise only can be used on weight quantization." + super(FakeChannelWiseQuantDequantAbsMax, self).__init__() + self._quant_bits = quant_bits + self._quant_axis = quant_axis + self._dtype = dtype + self._name = name + self._channel_num = channel_num + scale_prefix = "{}.scale".format( + name) if name else 'quant_dequant.scale' + self._scale_name = unique_name.generate(scale_prefix) + if quant_on_weight: + scale_attr = ParamAttr( + name=self._scale_name, + initializer=Constant(0.0), + trainable=False) + self._scale = self.create_parameter( + shape=[self._channel_num], attr=scale_attr, dtype=self._dtype) + self._scale.stop_gradient = True + else: + self._scale = None + + def forward(self, input): + if in_dygraph_mode(): + attrs = ('bit_length', self._quant_bits, 'quant_axis', + self._quant_axis) + quant_out = _varbase_creator( + type=input.type, + name="{}.quantized.dequantized".format(input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) + + out_scale = self._scale + if out_scale is None: + out_scale = _varbase_creator( + type=core.VarDesc.VarType.LOD_TENSOR, + name=self._scale_name, + shape=[self._channel_num], + dtype=self._dtype, + persistable=False) + out_scale.stop_gradient = True + + out, _, = core.ops.fake_channel_wise_quantize_dequantize_abs_max( + input, quant_out, out_scale, *attrs) + return out + + check_variable_and_dtype(input, 'input', ['float32'], + "FakeChannelWiseQuantDequantAbsMax") + attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis} + inputs = {"X": [input]} + quant_out = self._helper.create_variable( + name="{}.quantized.dequantized".format(input.name), + dtype=input.dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False) + out_scale = self._scale + if not out_scale: + out_scale = self._helper.create_variable( + name=self._scale_name, + dtype=self._dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=True) + outputs = {"Out": [quant_out], "OutScale": [out_scale]} + + self._helper.append_op( + type="fake_channel_wise_quantize_dequantize_abs_max", + inputs=inputs, + outputs=outputs, + attrs=attrs) + + return quant_out + + def _get_fake_quant_type(quant_type, **kwargs): call_args = { "name": kwargs.get("name", None), @@ -220,10 +303,17 @@ def _get_fake_quant_type(quant_type, **kwargs): call_args["quant_on_weight"] = kwargs.get("quant_on_weight", False) elif quant_type == 'moving_average_abs_max': call_args["moving_rate"] = kwargs.get("moving_rate", 0.9) - + elif quant_type == 'channel_wise_abs_max': + call_args["quant_on_weight"] = kwargs.get("quant_on_weight", False) + call_args["channel_num"] = kwargs.get("channel_num", None) + call_args["quant_axis"] = kwargs.get("quant_axis", 0) + assert call_args["channel_num"] is not None, ( + "You need to input channel_num" + "when you use channel_wise_abs_max strategy.") fake_quant_map = { 'abs_max': FakeQuantAbsMax, - 'moving_average_abs_max': FakeQuantMovingAverage + 'moving_average_abs_max': FakeQuantMovingAverage, + 'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax } return fake_quant_map[quant_type](**call_args) @@ -255,19 +345,23 @@ class QuantizedConv2D(layers.Layer): self.weight = getattr(layer, 'weight') self.bias = getattr(layer, 'bias') # For FakeQuant + self._conv2d_quant_axis = 0 self._fake_quant_weight = _get_fake_quant_type( weight_quantize_type, name=self.weight.name, moving_rate=moving_rate, quant_bits=weight_bits, dtype=self._dtype, - quant_on_weight=True) + quant_on_weight=True, + channel_num=self.weight.shape[self._conv2d_quant_axis], + quant_axis=self._conv2d_quant_axis) self._fake_quant_input = _get_fake_quant_type( activation_quantize_type, name=layer.full_name(), moving_rate=moving_rate, quant_bits=activation_bits, - dtype=self._dtype) + dtype=self._dtype, + quant_on_weight=False) def forward(self, input): quant_input = self._fake_quant_input(input) @@ -341,19 +435,23 @@ class QuantizedLinear(layers.Layer): self.weight = getattr(layer, 'weight') self.bias = getattr(layer, 'bias') # For FakeQuant + self._linear_quant_axis = 1 self._fake_quant_weight = _get_fake_quant_type( weight_quantize_type, name=self.weight.name, moving_rate=moving_rate, quant_bits=weight_bits, dtype=self._dtype, - quant_on_weight=True) + quant_on_weight=True, + channel_num=self.weight.shape[self._linear_quant_axis], + quant_axis=self._linear_quant_axis) self._fake_quant_input = _get_fake_quant_type( activation_quantize_type, name=layer.full_name(), moving_rate=moving_rate, quant_bits=activation_bits, - dtype=self._dtype) + dtype=self._dtype, + quant_on_weight=False) def forward(self, input): quant_input = self._fake_quant_input(input) diff --git a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py index 877897c0a0e7282546727d56b54c0af506e18bc0..0018d81dbf248726186cf3170fa9f5d32fa785fd 100644 --- a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py +++ b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py @@ -19,6 +19,9 @@ import argparse import paddle.fluid as fluid from paddle.fluid.framework import IrGraph from paddle.fluid import core +import paddle + +paddle.enable_static() def parse_args(): diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py index 17e0f452e98220b2de97e9567311efeffdee27b4..3fba0e892184953b300a54dd8590e07e81bc5f2d 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py @@ -27,6 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.fluid import core +paddle.enable_static() + logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py index a534edb7efd51f5eb7fd0c20540d531a44a84f53..12d1cfcc41d53f1a4e979128631559f89c6c299b 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py @@ -25,6 +25,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.fluid import core +paddle.enable_static() + logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py index 5f0a8f2d6fa9818481096249aaf74da27a852531..b81ef7b30ed4783133e46f7b895569db68438912 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py @@ -27,6 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass from paddle.fluid import core +paddle.enable_static() + logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py index dab4b63cda4cca8036b4236d44cb54660258c0d4..e38148250af2177801995d263dc6d3c9502bc501 100644 --- a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py +++ b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py @@ -27,6 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.fluid import core +paddle.enable_static() + def parse_args(): parser = argparse.ArgumentParser() diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph.py b/python/paddle/fluid/contrib/slim/tests/test_graph.py index 2cf897ec418fa75a70cfa7fa3fe0a4b9e79d3c65..435cefd73e733379eb96821519a5687dfba50046 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_graph.py +++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py @@ -22,6 +22,8 @@ import paddle.fluid as fluid from paddle.fluid.framework import IrGraph from paddle.fluid import core +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index f076d274b643367a2703910dfa6899c5bfd1317c..df505cf2435e73d4c30f641451fb1225a21816c6 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -32,6 +32,8 @@ from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.dygraph.nn import Linear from paddle.fluid.log_helper import get_logger +paddle.enable_static() + os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) @@ -181,7 +183,6 @@ class TestImperativeQat(unittest.TestCase): img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) - out = lenet(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py new file mode 100644 index 0000000000000000000000000000000000000000..80d388ac0da6219bda8e485aabaaf7fea44f6cd0 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -0,0 +1,430 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +from __future__ import print_function + +import os +import numpy as np +import random +import unittest +import logging +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.optimizer import AdamOptimizer +from paddle.fluid.framework import IrGraph +from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass +from paddle.fluid.dygraph.container import Sequential +from paddle.fluid.dygraph.nn import Conv2D +from paddle.fluid.dygraph.nn import Pool2D +from paddle.fluid.dygraph.nn import Linear +from paddle.fluid.log_helper import get_logger + +paddle.enable_static() + +os.environ["CPU_NUM"] = "1" +if core.is_compiled_with_cuda(): + fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + + +def StaticLenet(data, num_classes=10, classifier_activation='softmax'): + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + conv1 = fluid.layers.conv2d( + data, + num_filters=6, + filter_size=3, + stride=1, + padding=1, + param_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr) + pool1 = fluid.layers.pool2d( + conv1, pool_size=2, pool_type='max', pool_stride=2) + conv2 = fluid.layers.conv2d( + pool1, + num_filters=16, + filter_size=5, + stride=1, + padding=0, + param_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr) + pool2 = fluid.layers.pool2d( + conv2, pool_size=2, pool_type='max', pool_stride=2) + + fc1 = fluid.layers.fc(input=pool2, + size=120, + param_attr=fc_w1_attr, + bias_attr=fc_b1_attr) + fc2 = fluid.layers.fc(input=fc1, + size=84, + param_attr=fc_w2_attr, + bias_attr=fc_b2_attr) + fc3 = fluid.layers.fc(input=fc2, + size=num_classes, + act=classifier_activation, + param_attr=fc_w3_attr, + bias_attr=fc_b3_attr) + + return fc3 + + +class ImperativeLenet(fluid.dygraph.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): + super(ImperativeLenet, self).__init__() + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + self.features = Sequential( + Conv2D( + num_channels=1, + num_filters=6, + filter_size=3, + stride=1, + padding=1, + param_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2), + Conv2D( + num_channels=6, + num_filters=16, + filter_size=5, + stride=1, + padding=0, + param_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2)) + + self.fc = Sequential( + Linear( + input_dim=400, + output_dim=120, + param_attr=fc_w1_attr, + bias_attr=fc_b1_attr), + Linear( + input_dim=120, + output_dim=84, + param_attr=fc_w2_attr, + bias_attr=fc_b2_attr), + Linear( + input_dim=84, + output_dim=num_classes, + act=classifier_activation, + param_attr=fc_w3_attr, + bias_attr=fc_b3_attr)) + + def forward(self, inputs): + x = self.features(inputs) + + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + +class TestImperativeQat(unittest.TestCase): + """ + QAT = quantization-aware training + """ + + def test_qat_save(self): + imperative_qat = ImperativeQuantAware( + weight_quantize_type='channel_wise_abs_max', + activation_quantize_type='moving_average_abs_max') + + with fluid.dygraph.guard(): + lenet = ImperativeLenet() + imperative_qat.quantize(lenet) + adam = AdamOptimizer( + learning_rate=0.001, parameter_list=lenet.parameters()) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=32, drop_last=True) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32) + + epoch_num = 1 + for epoch in range(epoch_num): + lenet.train() + for batch_id, data in enumerate(train_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + out = lenet(img) + acc = fluid.layers.accuracy(out, label) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + lenet.clear_gradients() + if batch_id % 100 == 0: + _logger.info( + "Train | At epoch {} step {}: loss = {:}, acc= {:}". + format(epoch, batch_id, + avg_loss.numpy(), acc.numpy())) + + lenet.eval() + for batch_id, data in enumerate(test_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + + out = lenet(img) + acc_top1 = fluid.layers.accuracy( + input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy( + input=out, label=label, k=5) + + if batch_id % 100 == 0: + _logger.info( + "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". + format(epoch, batch_id, + acc_top1.numpy(), acc_top5.numpy())) + + # save weights + model_dict = lenet.state_dict() + fluid.save_dygraph(model_dict, "save_temp") + + # test the correctness of `paddle.jit.save` + data = next(test_reader()) + test_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + test_img = fluid.dygraph.to_variable(test_data) + lenet.eval() + before_save = lenet(test_img) + + # save inference quantized model + path = "./mnist_infer_model" + paddle.jit.save( + layer=lenet, + model_path=path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ]) + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + [inference_program, feed_target_names, fetch_targets] = ( + fluid.io.load_inference_model( + dirname=path, + executor=exe, + model_filename="__model__", + params_filename="__variables__")) + after_save, = exe.run(inference_program, + feed={feed_target_names[0]: test_data}, + fetch_list=fetch_targets) + + self.assertTrue( + np.allclose(after_save, before_save.numpy()), + msg='Failed to save the inference quantized model.') + + def test_qat_acc(self): + def _build_static_lenet(main, startup, is_test=False, seed=1000): + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + main.random_seed = seed + startup.random_seed = seed + img = fluid.layers.data( + name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int64') + prediction = StaticLenet(img) + if not is_test: + loss = fluid.layers.cross_entropy( + input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + else: + avg_loss = prediction + return img, label, avg_loss + + reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32, drop_last=True) + weight_quantize_type = 'channel_wise_abs_max' + activation_quant_type = 'moving_average_abs_max' + param_init_map = {} + seed = 1000 + lr = 0.1 + + # imperative train + _logger.info( + "--------------------------dynamic graph qat--------------------------" + ) + imperative_qat = ImperativeQuantAware( + weight_quantize_type=weight_quantize_type, + activation_quantize_type=activation_quant_type) + + with fluid.dygraph.guard(): + np.random.seed(seed) + fluid.default_main_program().random_seed = seed + fluid.default_startup_program().random_seed = seed + lenet = ImperativeLenet() + fixed_state = {} + for name, param in lenet.named_parameters(): + p_shape = param.numpy().shape + p_value = param.numpy() + if name.endswith("bias"): + value = np.zeros_like(p_value).astype('float32') + else: + value = np.random.normal( + loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( + p_shape).astype('float32') + fixed_state[name] = value + param_init_map[param.name] = value + lenet.set_dict(fixed_state) + + imperative_qat.quantize(lenet) + adam = AdamOptimizer( + learning_rate=lr, parameter_list=lenet.parameters()) + dynamic_loss_rec = [] + lenet.train() + for batch_id, data in enumerate(reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + + out = lenet(img) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + lenet.clear_gradients() + dynamic_loss_rec.append(avg_loss.numpy()[0]) + if batch_id % 100 == 0: + _logger.info('{}: {}'.format('loss', avg_loss.numpy())) + + paddle.jit.save( + layer=lenet, + model_path="./dynamic_mnist", + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ]) + + # static graph train + _logger.info( + "--------------------------static graph qat--------------------------" + ) + static_loss_rec = [] + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + + main = fluid.Program() + infer = fluid.Program() + startup = fluid.Program() + static_img, static_label, static_loss = _build_static_lenet( + main, startup, False, seed) + infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, + seed) + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + opt = AdamOptimizer(learning_rate=lr) + opt.minimize(static_loss) + + scope = core.Scope() + with fluid.scope_guard(scope): + exe.run(startup) + for param in main.all_parameters(): + param_tensor = scope.var(param.name).get_tensor() + param_tensor.set(param_init_map[param.name], place) + + main_graph = IrGraph(core.Graph(main.desc), for_test=False) + infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) + transform_pass = QuantizationTransformPass( + scope=scope, + place=place, + activation_quantize_type=activation_quant_type, + weight_quantize_type=weight_quantize_type, + quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) + transform_pass.apply(main_graph) + transform_pass.apply(infer_graph) + build_strategy = fluid.BuildStrategy() + build_strategy.fuse_all_reduce_ops = False + binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( + loss_name=static_loss.name, build_strategy=build_strategy) + + feeder = fluid.DataFeeder( + feed_list=[static_img, static_label], place=place) + with fluid.scope_guard(scope): + for batch_id, data in enumerate(reader()): + loss_v, = exe.run(binary, + feed=feeder.feed(data), + fetch_list=[static_loss]) + static_loss_rec.append(loss_v[0]) + if batch_id % 100 == 0: + _logger.info('{}: {}'.format('loss', loss_v)) + + save_program = infer_graph.to_program() + with fluid.scope_guard(scope): + fluid.io.save_inference_model("./static_mnist", [infer_img.name], + [infer_pre], exe, save_program) + rtol = 1e-05 + atol = 1e-08 + for i, (loss_d, + loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): + diff = np.abs(loss_d - loss_s) + if diff > (atol + rtol * np.abs(loss_s)): + _logger.info( + "diff({}) at {}, dynamic loss = {}, static loss = {}". + format(diff, i, loss_d, loss_s)) + break + + self.assertTrue( + np.allclose( + np.array(dynamic_loss_rec), + np.array(static_loss_rec), + rtol=rtol, + atol=atol, + equal_nan=True), + msg='Failed to do the imperative qat.') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py index 3ac1590b8aa6eaefbccd3907b314fb438386ffc6..3ea1c84f976a85850a2496218a248eb09ae20022 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py @@ -25,6 +25,8 @@ import paddle.fluid as fluid from paddle.dataset.common import download from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization +paddle.enable_static() + random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 864631ec27829e29aabb1a00a858cd0ce85e8389..18389d9433b9a5dd81e2f7e1725ce484a26d7a4a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -26,6 +26,8 @@ import paddle.fluid as fluid from paddle.dataset.common import download from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization +paddle.enable_static() + random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py index a6c19b5e45a41ba8f30648befb44de5ad30d6fe8..12b5a2458a4da055710d4af08b97cdfff052ed8d 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py @@ -15,6 +15,9 @@ import sys import unittest from test_post_training_quantization_mobilenetv1 import TestPostTrainingQuantization +import paddle + +paddle.enable_static() class TestPostTrainingForResnet50(TestPostTrainingQuantization): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py index 7b51973131496172d61b7ad968417eb41fa11c08..7f9209c8b3ff8c20040bdd80bb4302f39c621546 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py @@ -18,6 +18,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass +import paddle + +paddle.enable_static() class TestQuant2Int8MkldnnPass(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py index 3acbd8974195854da014990b13f3b1ba38e4c2c1..7ee0fd1d3e28f206b3c3a33fc0a2ceb25b0b4ab3 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py @@ -25,6 +25,7 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass from paddle.fluid import core +paddle.enable_static() os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py index dc9b83e44355342dde132f498354394fc9390af1..768a9ba7cfc3e769fe66c1deaffb1e60fc1a5689 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py @@ -27,6 +27,8 @@ from paddle.fluid.contrib.slim.quantization import TransformForMobilePass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py index 9e8c5027ebbf9b365b2a8f7e80f56fb2d202fe97..b03281546a59b4118a5a32b131ea7f66b208e6f0 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py @@ -27,6 +27,8 @@ from paddle.fluid.contrib.slim.quantization import OutScaleForInferencePass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py index 32292c8a47b50bc5e7eb2d7833823e586eea8909..f03d0faa3981b5767eef1c5fde0f583f08686c13 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py +++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py @@ -29,6 +29,8 @@ from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py index ff22b1b61e68f9c7d364b34a3b6b185a766f8c64..1e8fa51d635e32d5d0169cf23ca0681051028ae9 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py @@ -17,6 +17,9 @@ import os import time from paddle.dataset.common import download, DATA_HOME from paddle.fluid.contrib.slim.quantization import WeightQuantization +import paddle + +paddle.enable_static() class TestWeightQuantization(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/tests/test_correlation.py b/python/paddle/fluid/contrib/tests/test_correlation.py index 7fcef4dbcd1efd3655b6339ed5ec880d8cd33fc0..50b091415a52a2b2c09907e45435361cbc79795c 100644 --- a/python/paddle/fluid/contrib/tests/test_correlation.py +++ b/python/paddle/fluid/contrib/tests/test_correlation.py @@ -16,6 +16,9 @@ import unittest import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable +import paddle + +paddle.enable_static() def corr(x_1, diff --git a/python/paddle/fluid/contrib/tests/test_fp16_utils.py b/python/paddle/fluid/contrib/tests/test_fp16_utils.py index e286bb0150e996de156eb2ab6d594b1e9c6dfe8d..0b51f2dcc869ea073eb05c908cb30963eb5c2033 100644 --- a/python/paddle/fluid/contrib/tests/test_fp16_utils.py +++ b/python/paddle/fluid/contrib/tests/test_fp16_utils.py @@ -16,6 +16,9 @@ import unittest import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.contrib.mixed_precision import fp16_utils +import paddle + +paddle.enable_static() class AMPTest(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index 5fb1dba40a3c69bd3419640a404c580c8375f215..1bf1a234834670d680e3f13a0206b17d216db8fd 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -25,6 +25,8 @@ import os import copy import numpy as np +paddle.enable_static() + def resnet_cifar10(input, depth=32): def conv_bn_layer(input, diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py index 77fdf0087b93c3ad44a2492de68f8f57ce243ef3..342be7db3ed30d9b7d1af9133d289b933fb23c45 100644 --- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py +++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py @@ -20,6 +20,9 @@ import paddle import paddle.fluid as fluid from paddle.fluid.contrib.quantize.quantize_transpiler import _original_var_name from paddle.fluid.contrib.quantize.quantize_transpiler import QuantizeTranspiler +import paddle + +paddle.enable_static() def linear_fc(num): diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py index a5f08ca969ac43f47899395aeb588ddaf2f1e394..906d83fff4fd61390a68133170cb1c43f6b74251 100644 --- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py +++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py @@ -21,6 +21,8 @@ import paddle import paddle.fluid as fluid import contextlib +paddle.enable_static() + def get_places(): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 5218c0aac957422a665513b5eb2a0391c5c7a01f..3b3b9bbe96f2929257d99b924af9770605b287f4 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -370,6 +370,7 @@ class StaticLayer(object): Returns: Traced ConcreteProgram and executable translated Layer. """ + # 1. unify args/kwargs and replace Tensor with InputSpec if len(args) != len(self._function_spec.args_name): args, kwargs = self._function_spec.unified_args_and_kwargs(args, @@ -522,6 +523,19 @@ def _switch_declarative_mode_guard_(is_declarative=True): _in_declarative_mode_ = original_val +def _verify_init_in_dynamic_mode(class_instance): + """ + Verifies the instance is initialized in dynamic mode. + """ + if isinstance(class_instance, layers.Layer): + if not class_instance._init_in_dynamic_mode: + raise RuntimeError( + " `paddle.jit.to_static` is only available in dynamic mode. Please call `paddle.disable_static()` before " + "initializing your Layer class `{}` . Because parameters of Layer class should be initialized firstly " + "in dynamic mode while applying transformation.".format( + class_instance)) + + class ConcreteProgram(object): __slots__ = [ @@ -554,6 +568,9 @@ class ConcreteProgram(object): func_spec(FunctionSpec): A FunctionSpec instance for decorated function. input_spec(list[InputSpec]): """ + # verify the instance is initialized in imperative mode. + _verify_init_in_dynamic_mode(class_instance) + # Transforms dygraph function into static function and caches it. dygraph_function = func_spec.dygraph_function static_func = convert_to_static(dygraph_function) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 7075024369f328b59ecac014b0960fc26f447ff2..9c79deaab73ff7bde9a2414ceb67ad0d04103498 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -91,6 +91,7 @@ class Layer(core.Layer): self._helper = LayerObjectHelper(self._full_name) self._built = False self._dtype = dtype + self._init_in_dynamic_mode = framework.in_dygraph_mode() self._parameters = collections.OrderedDict() # Buffers the variable (not parameter) created in layer diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index 216478479a7cfdcffac5f21855d0974309842c89..e348c67ae0461674358fa6d34ee8a73648862a6d 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -170,22 +170,40 @@ class CompileTimeStrategy(object): return trainer.mode == DistributedMode.ASYNC def get_role_id(self): - return self.role_maker.role_id() + try: + return self.role_maker._role_id() + except Exception: + return self.role_maker.role_id() def get_trainers(self): - return self.role_maker.worker_num() + try: + return self.role_maker._worker_num() + except Exception: + return self.role_maker.worker_num() def get_ps_endpoint(self): - return self.role_maker.get_pserver_endpoints()[self.get_role_id()] + try: + return self.role_maker._get_pserver_endpoints()[self.get_role_id()] + except Exception: + return self.role_maker.get_pserver_endpoints()[self.get_role_id()] def get_ps_endpoints(self): - return self.role_maker.get_pserver_endpoints() + try: + return self.role_maker._get_pserver_endpoints() + except Exception: + return self.role_maker.get_pserver_endpoints() def get_heter_worker_endpoints(self): - return self.role_maker._get_heter_worker_endpoints() + try: + return self.role_maker._get_heter_worker_endpoints() + except Exception: + return self.role_maker.get_heter_worker_endpoints() def get_heter_worker_endpoint(self): - return self.role_maker._get_heter_worker_endpoint() + try: + return self.role_maker._get_heter_worker_endpoint() + except Exception: + return self.role_maker.get_heter_worker_endpoint() def get_origin_programs(self): return self.origin_main_program, self.origin_startup_program diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index ef469377acfbc0c2c521de61f8eacc0f7c9f0854..51fa1677b868e59f6c8c027d849d0b6bc45aef0f 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -62,6 +62,8 @@ def run_check(): # Your Paddle Fluid works well on MUTIPLE GPU or CPU. # Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now """ + paddle.enable_static() + print("Running Verify Fluid Program ... ") device_list = [] @@ -157,3 +159,5 @@ def run_check(): print( "Your Paddle Fluid is installed successfully ONLY for SINGLE GPU or CPU! " "\n Let's start deep Learning with Paddle Fluid now") + + paddle.disable_static() diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 6e5f7fd035acfeab975f63b0794829d57f9bb239..fe5b683bdeaa3b997cc506ad99f1a74010808f62 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -26,13 +26,13 @@ from functools import reduce import numpy as np import paddle -import paddle.reader -from paddle.reader import * from paddle.fluid import layers from paddle.fluid.executor import Executor, global_scope from paddle.fluid.evaluator import Evaluator from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable, \ program_guard, dygraph_not_support +from paddle.reader import cache, map_readers, buffered, compose, chain, shuffle, \ + ComposeNotAligned, firstn, xmap_readers, multiprocess_reader from .wrapped_decorator import signature_safe_contextmanager from paddle.fluid.compiler import CompiledProgram from paddle.fluid.log_helper import get_logger @@ -62,7 +62,7 @@ __all__ = [ 'set_program_state', 'get_program_parameter', 'get_program_persistable_vars', -] + reader.__all__ + paddle.reader.__all__ +] + reader.__all__ _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4a750f301a02c1a8f90e4103103c174baf32ead9..3e7d10f8d1a02126c3d4bec490fcd2f3194123ee 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -3167,7 +3167,7 @@ def instance_norm(input, param_shape = [channel_num] - if param_attr and bias_attr: + if param_attr != False and bias_attr != False: # create parameter scale = helper.create_parameter( attr=helper.param_attr, @@ -3190,7 +3190,7 @@ def instance_norm(input, instance_norm_out = helper.create_variable_for_type_inference(dtype) inputs = {"X": input} - if param_attr and bias_attr: + if param_attr != False and bias_attr != False: inputs["Scale"] = scale inputs["Bias"] = bias diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 89acfc6075be0b625da04d187cd46dd47ac699c9..2fba578ec077f2a74388d433bf3ab5b3098e81ad 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -680,8 +680,10 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): if not isinstance(value, Variable): if dtype in ['int64', 'int32']: attrs['str_value'] = str(int(value)) + attrs['value'] = int(value) else: attrs['str_value'] = str(float(value)) + attrs['value'] = float(value) if in_dygraph_mode(): shape = utils.convert_shape_to_list(shape) @@ -1422,7 +1424,7 @@ def linspace(start, stop, num, dtype=None, name=None): stop(int|float|Tensor): The input :attr:`stop` is start variable of range. It is a scalar, \ or a Tensor of shape [1] with input data type int32, int64, float32 or float64. num(int|Tensor): The input :attr:`num` is given num of the sequence. It is an int scalar, \ - or a Tensor of shape [1] with data type int32 or int64. + or a Tensor of shape [1] with data type int32. dtype(np.dtype|str, optional): The data type of output tensor, it could be int32, int64, float32 and float64. Default: if None, the data type is float32. name(str, optional): Normally there is no need for user to set this property. @@ -1451,11 +1453,14 @@ def linspace(start, stop, num, dtype=None, name=None): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) if not isinstance(start, Variable): - tensor_start = fill_constant([1], dtype, start) + with device_guard("cpu"): + tensor_start = fill_constant([1], dtype, start) if not isinstance(stop, Variable): - tensor_stop = fill_constant([1], dtype, stop) + with device_guard("cpu"): + tensor_stop = fill_constant([1], dtype, stop) if not isinstance(num, Variable): - tensor_num = fill_constant([1], 'int32', num) + with device_guard("cpu"): + tensor_num = fill_constant([1], 'int32', num) if in_dygraph_mode(): return core.ops.linspace(tensor_start, tensor_stop, tensor_num, 'dtype', dtype) diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index 673c965b662a022739f8d489c331f4de9455a926..96321aae566d1f910042f4e348d0be8b3e88c341 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -4,4 +4,5 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") # default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) + set_tests_properties(${src} PROPERTIES FIXTURES_SETUP ${src}_infer_model) endforeach() diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index a7d5a0305993a637ba2ce7d59f91a0c03b700a69..9a2cc4ab1a1b9071825f92d7ed50d9db6f13a385 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -23,6 +23,8 @@ import math import sys import os +paddle.enable_static() + def train(use_cuda, save_dirname, is_local): x = fluid.layers.data(name='x', shape=[13], dtype='float32') diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 22b74f2922887eb972806eac15904795b5a48ca7..7c2d5c693a9fdcea8f6249eaa8f418f87da1790e 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -24,6 +24,8 @@ import unittest import os import numpy as np +paddle.enable_static() + def resnet_cifar10(input, depth=32): def conv_bn_layer(input, diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index ef14600e6446505228b5cd70c9d9288cdae44a39..568d7518a1e0b161fe6b46c6a845c10681234c4b 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -25,6 +25,8 @@ import paddle import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid +paddle.enable_static() + word_dict, verb_dict, label_dict = conll05.get_dict() word_dict_len = len(word_dict) label_dict_len = len(label_dict) diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 5e241aaa32727686b84a0354a11d5a92f9576a90..a0056ba3bab06bb90ddc8b0ffe7587cf1a1d59b1 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -24,6 +24,8 @@ from paddle.fluid.executor import Executor import unittest import os +paddle.enable_static() + dict_size = 30000 source_dict_dim = target_dict_dim = dict_size hidden_dim = 32 diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 4fbb146752e73358a02a19fd5109e84ad00ecbae..71c57b851600d097ca4c6f13b6ba2050af9c825b 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -26,6 +26,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.layers.device import get_places +paddle.enable_static() + BATCH_SIZE = 64 diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 433b5498de718d46395676b70b0abd0ab9240336..c2ab249f5713d419b95ff848f061568f3d058457 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -26,6 +26,8 @@ import paddle.fluid.nets as nets from paddle.fluid.executor import Executor from paddle.fluid.optimizer import SGDOptimizer +paddle.enable_static() + IS_SPARSE = True USE_GPU = False BATCH_SIZE = 256 diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py index 0d65513c122d3ea9effcc391f6049b9c1b462546..3791e386ecfdefde15207926a6b43f0a14d4060e 100644 --- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py @@ -25,6 +25,9 @@ import math import sys import unittest from paddle.fluid.executor import Executor +import paddle + +paddle.enable_static() dict_size = 30000 source_dict_dim = target_dict_dim = dict_size diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index c919584554b1613b6b3b125cf7beaddda931c47f..aae4de70aca19fbbfb9aa303bf2a9049b05854f1 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -23,6 +23,8 @@ import numpy as np import math import sys +paddle.enable_static() + def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): PASS_NUM = 100 diff --git a/python/paddle/fluid/tests/custom_op/test_custom_op.py b/python/paddle/fluid/tests/custom_op/test_custom_op.py index 0d02da53d66d3a0ad3160f130153f013db92e1c9..c9f7d0b7c966ad1f99160de4b879f09b013bc513 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_op.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_op.py @@ -21,6 +21,8 @@ import contextlib import paddle import paddle.fluid as fluid +paddle.enable_static() + file_dir = os.path.dirname(os.path.abspath(__file__)) fluid.load_op_library(os.path.join(file_dir, 'librelu2_op.so')) diff --git a/python/paddle/fluid/tests/test_beam_search_decoder.py b/python/paddle/fluid/tests/test_beam_search_decoder.py index fe8a9daa3bea4b99bb42edc78538685c5ce11fe3..69f3ff46b3ac9c50f588a64182d02783cbc93aed 100644 --- a/python/paddle/fluid/tests/test_beam_search_decoder.py +++ b/python/paddle/fluid/tests/test_beam_search_decoder.py @@ -29,6 +29,8 @@ from paddle.fluid.contrib.decoder.beam_search_decoder import * import unittest import os +paddle.enable_static() + dict_size = 30000 source_dict_dim = target_dict_dim = dict_size src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) diff --git a/python/paddle/fluid/tests/test_data_feeder.py b/python/paddle/fluid/tests/test_data_feeder.py index 16a33fd3ab3c794494687ba39278e327560686ec..d50c57e670b070238ba67f0a68930841159bc9ed 100644 --- a/python/paddle/fluid/tests/test_data_feeder.py +++ b/python/paddle/fluid/tests/test_data_feeder.py @@ -16,6 +16,9 @@ from __future__ import print_function import paddle.fluid as fluid import unittest +import paddle + +paddle.enable_static() class TestDataFeeder(unittest.TestCase): diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 425c4e3c7e38cff2f892eff28428082b57b3727d..05b9067ec400f8be4da49bad31423767b2e876ea 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -24,6 +24,9 @@ import numpy as np from unittests.test_imperative_base import new_program_scope from paddle.fluid.dygraph import base from paddle.fluid import core +import paddle + +paddle.enable_static() class LayerTest(unittest.TestCase): diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py index 3c977afc7c813908fbe2dfb7445d9ca183cf2231..7859fca15f643fa00384ae4387ca07074b2ed868 100644 --- a/python/paddle/fluid/tests/test_error_clip.py +++ b/python/paddle/fluid/tests/test_error_clip.py @@ -22,6 +22,7 @@ BATCH_SIZE = 128 CLIP_MAX = 2e-6 CLIP_MIN = -1e-6 +paddle.enable_static() prog = fluid.framework.Program() with fluid.program_guard(main_program=prog): diff --git a/python/paddle/fluid/tests/test_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py index 1c992b9d8cd38a3851f99b1fc78ef5639c7f6eef..b7792e5ce27a55c9862d1e9a751fc6599d83dc7e 100644 --- a/python/paddle/fluid/tests/test_if_else_op.py +++ b/python/paddle/fluid/tests/test_if_else_op.py @@ -28,6 +28,8 @@ from paddle.fluid.layers.control_flow import ConditionalBlock import unittest import numpy as np +paddle.enable_static() + class TestMNISTIfElseOp(unittest.TestCase): # FIXME: https://github.com/PaddlePaddle/Paddle/issues/12245#issuecomment-406462379 diff --git a/python/paddle/fluid/tests/test_python_operator_overriding.py b/python/paddle/fluid/tests/test_python_operator_overriding.py index 5f92c437ec726f510d9194d23f1a01a5478827d6..fd9dc961988df750144e089a971938148c21940a 100644 --- a/python/paddle/fluid/tests/test_python_operator_overriding.py +++ b/python/paddle/fluid/tests/test_python_operator_overriding.py @@ -21,6 +21,9 @@ import numpy as np import paddle.fluid.layers as layers import paddle.fluid.framework as framework import paddle.fluid as fluid +import paddle + +paddle.enable_static() class TestPythonOperatorOverride(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/c_comm_init_op.py b/python/paddle/fluid/tests/unittests/c_comm_init_op.py index db77477cca62d10ff6692013a64a8d2ce5a38ec1..ed6a75230c60d194783cffec117b8d1d2bb9cda0 100644 --- a/python/paddle/fluid/tests/unittests/c_comm_init_op.py +++ b/python/paddle/fluid/tests/unittests/c_comm_init_op.py @@ -19,6 +19,9 @@ import os import paddle.fluid.core as core import paddle.fluid as fluid from paddle.distributed.fleet.base.private_helper_function import wait_server_ready +import paddle + +paddle.enable_static() class TestCCommInitOp(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py index 8e75b3c3438c0afb24871838b66e1285da78c592..c682c795019caff14e17f808ceac3fa5a5162562 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py @@ -28,6 +28,8 @@ import paddle import paddle.fluid as fluid import paddle.compat as cpt +paddle.enable_static() + np.random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/collective_allgather_api.py index bdf4ca07ae9b57e083137945f58aaabb571e20ec..63d7f52c11a8ad3ad041ad82e30b8124a899fd61 100644 --- a/python/paddle/fluid/tests/unittests/collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allgather_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_op.py b/python/paddle/fluid/tests/unittests/collective_allgather_op.py index 349996547687657453497956007d2431b11ea45f..f77a97aa915f6fd63a4d5ed0d95752c6ca022eb1 100644 --- a/python/paddle/fluid/tests/unittests/collective_allgather_op.py +++ b/python/paddle/fluid/tests/unittests/collective_allgather_op.py @@ -34,6 +34,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllGather(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/collective_allreduce_api.py index aea429ae5e3e622ee1b584796ef87edc1d4c8d72..67242b274fcb154273127ef020fc14896af6ad8e 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_op.py b/python/paddle/fluid/tests/unittests/collective_allreduce_op.py index 9aef8879cab15ade735195ab173d9386764fb690..eef59ee3dde92c6ceaecbe15e997fb958b1fab19 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllreduce(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_barrier_api.py b/python/paddle/fluid/tests/unittests/collective_barrier_api.py index 09b3c27126d926ac7175f6045f385adf4d530b44..dbcc70d540bd6acff89342f2a44a751757c39494 100644 --- a/python/paddle/fluid/tests/unittests/collective_barrier_api.py +++ b/python/paddle/fluid/tests/unittests/collective_barrier_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveBarrierAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/collective_broadcast_api.py index a879a027b50688234c8efb8468e6eac660d8a145..08a3d948906a8bb40299bd0aed645b8425f1e7ae 100644 --- a/python/paddle/fluid/tests/unittests/collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/collective_broadcast_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_broadcast_op.py b/python/paddle/fluid/tests/unittests/collective_broadcast_op.py index 18f0485f923e4f72f76be3b0b34ebeb1d89c926c..127f48be61851a8264b2a6d4db57fcbd984f1d53 100644 --- a/python/paddle/fluid/tests/unittests/collective_broadcast_op.py +++ b/python/paddle/fluid/tests/unittests/collective_broadcast_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveBroadcast(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/collective_reduce_api.py index 3e89b1cb3ee8550d3dbb4e1a055f092e57126c7f..41e31146a22297fc9328ebc804638c729bd423f0 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/collective_reduce_op.py index da61284344b58d44c5ba02af5ed42c553f857c94..0448c66d1323405abe3fb468583073caf260bb6e 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduce(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py index 7e6904286234364e7ae84a5c21b9826885f99dc4..7a9e0b148d55667622470a4ad117991fc7ad4c0a 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduce(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter.py b/python/paddle/fluid/tests/unittests/collective_reducescatter.py index 2f14277ae1e549b0b8dc075694752c18b395d230..8b989c73d4deb69e85b821ef0b2091ef0af7a0c4 100644 --- a/python/paddle/fluid/tests/unittests/collective_reducescatter.py +++ b/python/paddle/fluid/tests/unittests/collective_reducescatter.py @@ -34,6 +34,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduceScatter(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py index 3e286d7f43db6e9cd290b88a0be5a4ae1215737a..91712e2b50f230b68743a4fcd3a7cba767c1f304 100644 --- a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py +++ b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduceScatter(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_api.py b/python/paddle/fluid/tests/unittests/collective_scatter_api.py index f68929ad3b36d5a0bf145a93b30172f0422dc9f9..ca36c8c83a5e26c74de88a701cc9421ddf0d81d2 100644 --- a/python/paddle/fluid/tests/unittests/collective_scatter_api.py +++ b/python/paddle/fluid/tests/unittests/collective_scatter_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveScatterAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_op.py b/python/paddle/fluid/tests/unittests/collective_scatter_op.py index efe5e17bcce1ecddf859edbb3543876fe5fc9f89..7afa4aec63990372d69f1d16c133e6698aef4dc9 100644 --- a/python/paddle/fluid/tests/unittests/collective_scatter_op.py +++ b/python/paddle/fluid/tests/unittests/collective_scatter_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveScatter(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py index 88a3cd14c43334f2abed9c8b435b64d47a65dc85..de52072d4a8388aaf7d90428a2704e984360b7ba 100644 --- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py @@ -30,6 +30,8 @@ import signal from functools import reduce from test_dist_base import TestDistRunnerBase, runtime_main +paddle.enable_static() + DTYPE = "float32" paddle.dataset.mnist.fetch() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 4c90ffdf4e26e3ba0f72d9c3f424125b8aa08465..5721445c414cf94379f44cab6bd01cca511938bf 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -30,6 +30,8 @@ import ctr_dataset_reader from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from paddle.distributed.fleet.base.util_factory import fleet_util +paddle.enable_static() + # Fix seed for test fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index f82ee4a613b12a7d011c6dd90c9b7ca94501e014..470fb98d7991cf0cbffa47f6d5129b045f59ae97 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -31,6 +31,8 @@ from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader from paddle.distributed.fleet.base.util_factory import fleet_util +paddle.enable_static() + # Fix seed for test fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index 0114b0fee207d11129fc0a552a36c763bf975c9a..ff84848873924c52b0f7e8f5bc71ec2a266b73f1 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -34,6 +34,8 @@ from functools import reduce from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from paddle.distributed.fleet.base.util_factory import fleet_util +paddle.enable_static() + DTYPE = "int64" DATA_URL = 'http://paddle-dist-ce-data.bj.bcebos.com/simnet.train.1000' DATA_MD5 = '24e49366eb0611c552667989de2f57d5' diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py index 20e89bd46c67b557cd6ab1ad0fd531a6b22f947d..f63139464e7552ed82c74171717b1b32f33caa09 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist.py @@ -31,6 +31,8 @@ from functools import reduce from test_dist_base import TestDistRunnerBase, runtime_main from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy +paddle.enable_static() + DTYPE = "float32" paddle.dataset.mnist.fetch() diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index a2fd61e2387ee362946c15788d76cba4dec46055..5ba40c7c8388c45810852946f5e790bc1213767d 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -30,6 +30,8 @@ import sys import signal from test_dist_base import TestDistRunnerBase, runtime_main +paddle.enable_static() + # Fix seed for test fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py index 5582a65304d3e9bad2d4621e11f8a4f312189a9a..450ef7557bc1574c31a00d05154aead19083c1bc 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py @@ -358,5 +358,24 @@ class TestDecorateModelDirectly(unittest.TestCase): self.assertListEqual(list(input_shape), [-1, 16, 10]) +class TestErrorWithInitFromStaticMode(unittest.TestCase): + def test_raise_error(self): + # disable imperative + paddle.enable_static() + + net = SimpleNet() + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + net.forward.concrete_program + + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + net.forward.inputs + + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + net.forward.outputs + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py index 873d9ecb53549e9d6a3982ca4528e63526bd3a0d..b0ab55758ee7d9eeb5a9bd747934e6f7a1992f7b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py @@ -21,6 +21,7 @@ import numpy as np import textwrap import unittest +import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator from paddle.fluid.dygraph.jit import declarative @@ -279,5 +280,33 @@ class TestEnableDeclarative(unittest.TestCase): static_output.numpy(), dygraph_output.numpy(), atol=1e-4)) +class Net(fluid.dygraph.layers.Layer): + def __init__(self): + super(Net, self).__init__() + + def forward(self, x): + return x + 1 + + +class TestErrorWithInitFromStaticMode(unittest.TestCase): + def setUp(self): + self.program_translator = ProgramTranslator() + self.x = np.random.randn(10, 32).astype('float32') + + def test_raise_error(self): + # disable imperative + paddle.enable_static() + net = Net() + + self.program_translator.enable(True) + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + self.program_translator.get_output(net.forward, self.x) + + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + self.program_translator.get_program(net.forward, self.x) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py index 4453dff892fcaacd65ed5f1bdf81817db51c6fe1..6aa9156a0d4cb8e737f395d04521257ccb95559e 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py @@ -17,12 +17,14 @@ import random import time import unittest +import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import ProgramTranslator from paddle.fluid.dygraph import to_variable from yolov3 import cfg, YOLOv3 +paddle.enable_static() random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..a62adcea3f94379aa81643e26a7df53ab92fe676 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class FcFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 128, 768], dtype="float32") + data_y = fluid.data(name="y", shape=[-1, 128, 768], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=3072, + num_flatten_dims=2, + act="relu") + fc_out2 = fluid.layers.fc(input=fc_out1, + size=768, + num_flatten_dims=2) + + self.feeds = {"data": np.random.random((4, 128, 768)).astype("float32")} + self.fetch_list = [fc_out2] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + self.assertTrue(PassVersionChecker.IsCompatible('fc_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b43470d402f8671091365db04237797a012e78 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class FcGruFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + dict_dim, emb_dim = 128, 64 + data = fluid.data( + name='step_data', shape=[None], dtype='int64', lod_level=1) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim = 512 + x = fluid.layers.fc(input=emb, size=hidden_dim * 3) + hidden = fluid.layers.dynamic_gru( + input=x, + size=hidden_dim, + bias_attr=True, + origin_mode=False, + is_reverse=True) + + batch = 16 + lod_tensor = fluid.LoDTensor() + lod_tensor.set(np.random.randint( + 0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) + lod_tensor.set_lod([[0, batch]]) + self.feeds = {"step_data": lod_tensor} + self.fetch_list = [hidden] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + self.assertTrue(PassVersionChecker.IsCompatible('fc_gru_fuse_pass')) + + +class MulGruFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + dict_dim, emb_dim = 128, 64 + data = fluid.data( + name='step_data', shape=[None], dtype='int64', lod_level=1) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim = 512 + x = fluid.layers.fc(input=emb, size=hidden_dim * 3, bias_attr=False) + hidden = fluid.layers.dynamic_gru( + input=x, + size=hidden_dim, + bias_attr=True, + origin_mode=False, + is_reverse=True) + + batch = 16 + lod_tensor = fluid.LoDTensor() + lod_tensor.set(np.random.randint( + 0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) + lod_tensor.set_lod([[0, batch]]) + self.feeds = {"step_data": lod_tensor} + self.fetch_list = [hidden] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + self.assertTrue(PassVersionChecker.IsCompatible('mul_gru_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..fbb4373dae2c44148a5ac6b65c11a3d47adfd1a1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py @@ -0,0 +1,52 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class MulLstmFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + dict_dim, emb_dim = 128, 64 + hidden_dim = 512 + + data = fluid.data( + name='data', shape=[1], dtype='int64', lod_level=1) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + x = fluid.layers.fc(input=emb, size=hidden_dim * 4, bias_attr=False) + forward, cell = fluid.layers.dynamic_lstm( + input=x, size=hidden_dim * 4) + + batch = 16 + lod_tensor = fluid.LoDTensor() + lod_tensor.set(np.random.randint( + 0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) + lod_tensor.set_lod([[0, batch]]) + self.feeds = {"data": lod_tensor} + self.fetch_list = [forward, cell] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + self.assertTrue(PassVersionChecker.IsCompatible('mul_lstm_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..eadda5ba06a79f061bcf87f9b0bf2c0770c763f5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py @@ -0,0 +1,140 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class SeqconvEltaddReluFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[100, 100], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=4, + padding_start=0, + act="relu", + bias_attr=param_attr) + + np_data = np.random.random((80, 100)).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[10, 20, 30, 20]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +class SeqconvEltaddReluFusePassTestPaddingStartPositive(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[-1, 4], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=3, + padding_start=2, + act="relu", + bias_attr=param_attr) + + np_data = np.array([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], + [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6], + [7, 7, 7, 7]]).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[5, 2]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +class SeqconvEltaddReluFusePassTestPaddingStartNegative(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[100, 100], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=4, + padding_start=-1, + act="relu", + bias_attr=param_attr) + + np_data = np.random.random((80, 100)).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[10, 20, 30, 20]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +class SeqconvEltaddReluFusePassTestPaddingStartNone(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[100, 100], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=4, + act="relu", + bias_attr=param_attr) + + np_data = np.random.random((80, 100)).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[10, 20, 30, 20]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..5fa242df4e412fb9c2f3af08b3a186c3e086f2d6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class SquaredMatSubFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data_a = fluid.data(name="data_a", shape=[128, 1], dtype="float32") + data_b = fluid.data(name="data_b", shape=[256, 1], dtype="float32") + + fc_a = fluid.layers.fc(data_a, size=256) + fc_b = fluid.layers.fc(data_b, size=64) + + data_a_square = paddle.square(fc_a) + data_b_square = paddle.square(fc_b) + + matmul_ab = paddle.matmul(fc_a, fc_b) + matmul_ab_square = paddle.square(matmul_ab) + matmul_square_ab = paddle.matmul(data_a_square, data_b_square) + + scale = paddle.fill_constant(shape=[1], value=0.5, dtype='float32') + + sub_val = paddle.elementwise_sub(matmul_ab_square, matmul_square_ab) + squared_mat_sub_out = fluid.layers.elementwise_mul(sub_val, scale) + + self.feeds = { + "data_a": np.random.random((128, 1)).astype("float32"), + "data_b": np.random.random((256, 1)).astype("float32") + } + self.fetch_list = [squared_mat_sub_out] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + self.assertTrue( + PassVersionChecker.IsCompatible('squared_mat_sub_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py index 34a52e7aed342ac8db471ad94b277efd0faf9d27..83d4b7091cb3276ba8e2c1ff9fd7dca9b1692c63 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py @@ -75,7 +75,9 @@ class TransposeFlattenConcatFusePassWithAxisTest(InferencePassTest): use_gpu = True self.check_output_with_option(use_gpu) - PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass') + self.assertTrue( + PassVersionChecker.IsCompatible( + 'transpose_flatten_concat_fuse_pass')) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_allgather.py b/python/paddle/fluid/tests/unittests/test_allgather.py index 877ae6f6e16c2269d7674c38b1ec30ad02f453c0..9bb34d3db4388d5a4f109ef20d2199ee7431dae8 100644 --- a/python/paddle/fluid/tests/unittests/test_allgather.py +++ b/python/paddle/fluid/tests/unittests/test_allgather.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestAllGatherOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_allreduce.py b/python/paddle/fluid/tests/unittests/test_allreduce.py index e0b6422a67b408840be9b96210b6003165dcb3a8..660f559535cd8f7f81499d1ea7244b033c12f08c 100644 --- a/python/paddle/fluid/tests/unittests/test_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_allreduce.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestAllReduceOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py index 2a8e0e6c7f0bcf4a779b4c098cd4af816e976205..e324f0ec3d37f6ea1cf257cac9c7e72969cd8971 100644 --- a/python/paddle/fluid/tests/unittests/test_argsort_op.py +++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py @@ -348,57 +348,99 @@ class TestArgsortErrorOnGPU(TestArgsortErrorOnCPU): class TestArgsort(unittest.TestCase): + def init(self): + self.input_shape = [10000, ] + self.axis = 0 + def setUp(self): + self.init() if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace() - self.data = np.random.rand(2, 3, 4).astype("float32") + self.data = np.random.rand(*self.input_shape) - def test_api_0(self): + def test_api(self): with fluid.program_guard(fluid.Program()): - input = fluid.data(name="input", shape=[2, 3, 4], dtype="float32") - output = paddle.argsort(x=input) - exe = fluid.Executor(self.place) - result, = exe.run(feed={'input': self.data}, fetch_list=[output]) - np_result = np.argsort(self.data) - self.assertEqual((result == np_result).all(), True) + input = fluid.data( + name="input", shape=self.input_shape, dtype="float64") + + output = paddle.argsort(input, axis=self.axis) + output2 = paddle.argsort(input, axis=self.axis, descending=True) - def test_api_1(self): - with fluid.program_guard(fluid.Program()): - input = fluid.data(name="input", shape=[2, 3, 4], dtype="float32") - output = paddle.argsort(x=input, axis=1) exe = fluid.Executor(self.place) - result, = exe.run(feed={'input': self.data}, fetch_list=[output]) - np_result = np.argsort(self.data, axis=1) + result, result2 = exe.run(feed={'input': self.data}, + fetch_list=[output, output2]) + + np_result = np.argsort(self.data, axis=self.axis) self.assertEqual((result == np_result).all(), True) + np_result2 = np.argsort(-self.data, axis=self.axis) + self.assertEqual((result2 == np_result2).all(), True) + + +class TestArgsort2(TestArgsort): + def init(self): + self.input_shape = [10000, 1] + self.axis = 0 + + +class TestArgsort3(TestArgsort): + def init(self): + self.input_shape = [1, 10000] + self.axis = 1 + + +class TestArgsort4(TestArgsort): + def init(self): + self.input_shape = [2, 3, 4] + self.axis = 1 + + +class TestArgsortImperative(unittest.TestCase): + def init(self): + self.input_shape = [10000, ] + self.axis = 0 -class TestArgsortDygraph(unittest.TestCase): def setUp(self): - self.input_data = np.random.rand(10, 10) + self.init() + self.input_data = np.random.rand(*self.input_shape) if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace() - def test_api_0(self): + def test_api(self): paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) - out = paddle.argsort(var_x) - self.assertEqual((np.argsort(self.input_data) == out.numpy()).all(), - True) - paddle.enable_static() + var_x = paddle.to_tensor(self.input_data) + out = paddle.argsort(var_x, axis=self.axis) + expect = np.argsort(self.input_data, axis=self.axis) + self.assertEqual((expect == out.numpy()).all(), True) + + out2 = paddle.argsort(var_x, axis=self.axis, descending=True) + expect2 = np.argsort(-self.input_data, axis=self.axis) + self.assertEqual((expect2 == out2.numpy()).all(), True) - def test_api_1(self): - paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) - out = paddle.argsort(var_x, axis=-1) - self.assertEqual( - (np.argsort( - self.input_data, axis=-1) == out.numpy()).all(), True) paddle.enable_static() +class TestArgsortImperative2(TestArgsortImperative): + def init(self): + self.input_shape = [10000, 1] + self.axis = 0 + + +class TestArgsortImperative3(TestArgsortImperative): + def init(self): + self.input_shape = [1, 10000] + self.axis = 1 + + +class TestArgsortImperative2(TestArgsortImperative): + def init(self): + self.input_shape = [2, 3, 4] + self.axis = 1 + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py index fd009db5fd00133c5bad7c8c52662002ebd03fa8..3f33120d1f79f089d7511621611141683f0a03cd 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py @@ -31,6 +31,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py index 55173325f621f7333a7c3ca32a9c55becee72e5a..fca1baf85e56e1f531dc3c5f64a7af0bda18836c 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py index 5d72fa01008af55a83d7b9a19747a8d96fb74b2b..0c17807a689e6793af6d81467d73a5727d546698 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py index 5382f7e328ed1afa2d7516cd0d8db2db659aadd7..ca103be59b96714fe6762e517a665c298082334f 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py index 3c78438bdf68538da598f19270d8812e1286474d..3eeff91ff2d830f6dcedbae291342f9a6ecf4878 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py index 8c10cd0e9922859bf3bad2015587fc0a6b2ba5da..f8c12f8905112cd5f768ea04cae21b19c90f46f6 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_broadcast.py b/python/paddle/fluid/tests/unittests/test_broadcast.py index 029e881d6f69ec0781c1d8ad8e66a9b6fd48cec1..8b8cdb1235ce3830277cfe661bad84aba423e24b 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestCBroadcastOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py index 43d485a0a6d24be6e8db32f16fe96a70bb229858..2c9168df472f493a16c19ad1b121ec0d126b6306 100644 --- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py +++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py @@ -36,6 +36,7 @@ class InplaceTestBase(unittest.TestCase): self.fuse_all_optimizer_ops = False def setUp(self): + paddle.enable_static() self.initParameter() if self.use_cuda and fluid.core.is_compiled_with_cuda(): self.device_count = fluid.core.get_cuda_device_count() diff --git a/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py b/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py index 71777df4651ea26c7cf5dfc7231018288c2670e2..dbf77fafcc47d0b45b95e02819384c2a1d10f98f 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveAllgatherAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py index 24dd7cacff6adc56eb059a7bec016a1d3e322825..a405da80adaf0f2c3b6698bd175797670a748c62 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveAllreduceAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py b/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py index ebf86f6ae14f1ecbdb3711378c84a3c1ce4967fb..d0a67baa61e69b09cc6578e2edb9df46df03549f 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveBarrierAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py index b1cf4f1ac4c822ad578f5ee0e0268324de5e5e25..702e04311570ef5cdd450a59f471c7688579a494 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveBroadcastAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce.py b/python/paddle/fluid/tests/unittests/test_collective_reduce.py index 36837d6a227febd02e6ef1e2aeb905de19ca8acc..c0627467428109891ed71e1bd6f5576694ff59d6 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_reduce.py +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestCReduceOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py b/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py index bf3975f3fc1c6959ffbb28a51543ebfef00c52e5..8d28c794f023a6945893342a53386f6ffb8a6052 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveReduceAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter.py b/python/paddle/fluid/tests/unittests/test_collective_scatter.py index 7fe3ce73359559c0f9b4e0e3990032ce693aab8a..ea34d1cab5a5a573c7053b956eb1474e5fb44179 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_scatter.py +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestCScatterOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py b/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py index cae842b396111f004b7ce52ce3f40c20ebe57263..3a37da52b8e9270c27749eb10252134ea97a6b46 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveScatterAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index f5b1350065ecce19299375364edb75dd48364e47..5916000fba79fc0da2ef545beac634a3edfe01df 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -28,6 +28,8 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet +paddle.enable_static() + class TestCommunicatorGeoEnd2End(unittest.TestCase): def net(self): @@ -140,6 +142,7 @@ import paddle.distributed.fleet as fleet from test_communicator_geo import TestCommunicatorGeoEnd2End +paddle.enable_static() class RunServer(TestCommunicatorGeoEnd2End): def runTest(self): diff --git a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py index 991d34e42ae5d1bfed1b0afa0a0d051d9f75e357..b0f55f2939dc94af603f4cc5851dbb5e6317774f 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py @@ -29,6 +29,8 @@ import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +paddle.enable_static() + class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): def net(self): @@ -120,6 +122,7 @@ from test_communicator_half_async import TestCommunicatorHalfAsyncEnd2End from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +paddle.enable_static() class RunServer(TestCommunicatorHalfAsyncEnd2End): def runTest(self): diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py index 4d767709ef56f11d6790c85206b544d63883841e..b2cb3141aad48ddb59887b99a7d02ce56ca74493 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py @@ -37,7 +37,7 @@ class TestClass(unittest.TestCase): low=0, high=9, size=label_shape).astype('int64') yield img, label - reader = fluid.io.cache(fake_reader) + reader = paddle.reader.cache(fake_reader) batch_reader = fluid.io.batch(reader, batch_size=batch_size) places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py index fbeff20c63b2f4a3f01ac4131ac7063aff0204cf..2adf6e41931816688051132ee38215814a427378 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 62c372b6034b738e565435a9e014df16aa33630c..7f55e956a94aee79dda07762e953e71807899bff 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -19,6 +19,8 @@ import unittest import paddle import paddle.distributed.fleet.base.role_maker as role_maker +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py index 5a5d8afc55bac4c0ea862e75b728c6c1a37b3188..5b7e0fb94c662f4aa47fbaad964e03c576c97807 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py index 9085556c04c356e5b703ec0b36c3884100ad73f8..3dff9d0f9d82530cade09a737d448fca4bf4f960 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py index 4787d048bd2566fe063073867bcbd4138d25ff21..bdfa3a9a7d57869466b895f23674b6e8ef83310f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py index 59ca41a11e325cfb66a3a3eaadb4eca6f9764212..db73069bf7d42ac008f14b804bd7d31b808d92b9 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index 9f7974b5f970710c50b954dedf3beb2694067621..db3f2afb3668bc1831286f8d13b274895e7632fd 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -19,6 +19,8 @@ import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py index 7d18e935f58b6588adbef913c10d3ad497f07b53..82a8f46a945b9d97a7c6c662f11edf82fbc68111 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py @@ -22,6 +22,9 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import f from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory from test_dist_fleet_base import TestFleetBase from dist_fleet_simnet_bow import train_network +import paddle + +paddle.enable_static() class TestDistGeoCtr_2x2(TestFleetBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py index 02a739c060cd2bd58ecec4d7dc65b65e8a3a35a7..b3e38a421287611c43bb82d93b4df166e23f6484 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py @@ -18,6 +18,9 @@ import os import unittest import tempfile from test_dist_fleet_heter_base import TestFleetHeterBase +import paddle + +paddle.enable_static() class TestDistHeterDatasetAsync2x2(TestFleetHeterBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py index 3369039661205ef78a3ec0254241c3ed80b771a9..00301f9b1c61dd12dc993e0b4c735479fe16daed 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py @@ -21,6 +21,9 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker from paddle.distributed.fleet.base.util_factory import fleet_util from paddle.distributed.fleet import fleet +import paddle + +paddle.enable_static() class TestDistFleetHeterProgram(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py index 8132add37a673d9035ca108cc124f075b53226f1..d766e6bf2af714e04c6a04d8a8e627bcc631cee9 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py index e7b10be2349cce755267297025ca8520b6d494ee..218eb77d0b5653fb80bceae6714f85f2674df6cb 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py @@ -24,6 +24,8 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet +paddle.enable_static() + # For Net base_lr = 0.2 emb_lr = base_lr * 3 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py index de4363f255ba8fd80b7caea11a03a28899c1c9e7..8d101a34b68e4b9b84caa7de8921bd1096e71944 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index f1176aea34ea88d821326597e34cd064fdbad26c..6fe52ba9fe61ad83341ece5c29fcafa89095de82 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index 33a17f1489d3af7fe63f7589ffe76823fdeb5a0e..c570c4d8cd01dd7e7b113b1f5f35c9887f4a4376 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py index ec34993905e3cfc4603ac48987a690b7fa8a5439..e0fa590db2abdd3d3c0ccaca2d599e66c75102ba 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py @@ -18,6 +18,9 @@ import os import unittest import tempfile from test_dist_fleet_base import TestFleetBase +import paddle + +paddle.enable_static() class TestDistSimnetASync2x2(TestFleetBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py index 1f6274ec16488323c9f7e6b14a94e0d9182d7aca..23a2b8fd306070083a0fbec11c0709748b6ed6ac 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2BackWardDeps(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py index 24c9b9a139733c0428e99fb8dfdc02c9cb38393e..4cf2cf5f3675480b6ef6f8e04561102fbfd1dccf 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py @@ -16,6 +16,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py index 0b9b85d5d52c38f748679a92a99ec61c3dec7903..9bc48ac0a1b2d4eca90acc1cd9792696bfcb7a2e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py @@ -18,6 +18,9 @@ from test_dist_base import TestDistBase import os import subprocess +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py index 7dac11535629379639e86f2a4d2583fb703d5bfb..7336794578ed7b80a182b6175ebb0eda4252041d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py @@ -17,6 +17,9 @@ import shutil import os import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistFleetSave(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py index d5ebe09adca01a339dd5da1c6e73c621a4a21a2d..255fd9b2855af579f419d1ada9044a445258746e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2FleetApi(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py index cc002582371d33fa29c0d738568212855e025023..356c5573f95308d9d2cbf93b4232b199f5ee2a5e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py @@ -17,6 +17,9 @@ import unittest from test_dist_base import TestDistBase import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py index f43ccc8becb8fd76735618e75c80a27f1f54c8c3..d9e6be8609d273dd7a149ff59a350da4c9dede20 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py @@ -17,6 +17,9 @@ import unittest from test_dist_base import TestDistBase import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py index d063f8473e0f50256dc424429ce1244a4b893ccf..28ef31875dbdeda83ab1d8de272e0b515c3cda83 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py index fd15020275bdce1a6424f3134ff089bd761ee1b1..4436064dc28ed1276481378c70aa3b306486e0c8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py index 4f4941aa217b985c829391e9e8652d91f72b0c98..d55582fbb4dbb51b8b541579543015909e85aad8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistLocalSGDFleetApi(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_op.py b/python/paddle/fluid/tests/unittests/test_dist_op.py index 1f46e0e7f9ca97409a7c6ea634ed96421e593f5f..0f71027d274018a48e769a28ff9679204251c1d3 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_op.py +++ b/python/paddle/fluid/tests/unittests/test_dist_op.py @@ -19,6 +19,8 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core +paddle.enable_static() + def dist(x, y, p): if p == 0.: diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py index dbf0319d3054f097f9e3b0e85a81a47581fddbbc..64217135be735cb0bd752e240a787c42c2bb4944 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py @@ -18,6 +18,9 @@ from test_dist_base import TestDistBase import os import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py index 761d57408b9a8f9e52419331bfb0bca5b0135c30..dd5c393f49c3f2a52414091fa3d3349e25362ae8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py @@ -17,6 +17,9 @@ from __future__ import print_function import unittest import gc import paddle.fluid as fluid +import paddle + +paddle.enable_static() class TranspilerAsyncLRDecayTest(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py index c8d0d840872a8af4dd5230fd3a33961490ebdb0a..e6bc99fc2257c6561d24cac71a37fa840ff966ab 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py @@ -15,6 +15,9 @@ import unittest import paddle.fluid as fluid import gc +import paddle + +paddle.enable_static() gc.set_debug(gc.DEBUG_COLLECTABLE) diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 7835fd3f53ddb7f9a95313c6cc5fc7b72ae6d664..01f0abe0f217c342c4ea14cb55b4c40b5d273284 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -306,5 +306,70 @@ class TestFakeQuantDequantAbsOp(OpTest): self.check_grad(["X"], "Out", user_defined_grads=gradient) +class TestChannelWiseFakeQuantDequantOp(OpTest): + def setUp(self): + self.set_arg() + assert self.quant_axis in [0, 1], "quant_axis should be 0 or 1." + + self.op_type = "fake_channel_wise_quantize_dequantize_abs_max" + self.attrs = {'bit_length': 8, 'quant_axis': self.quant_axis} + + scales = [] + outputs = self.inputs['X'].copy() + range_v = (1 << (self.attrs['bit_length'] - 1)) - 1 + if self.quant_axis == 0: + for i in range(self.inputs['X'].shape[0]): + scale_v = np.max(np.abs(self.inputs['X'][i])).astype("float32") + scales.append(scale_v) + outputs[i] = np.round(outputs[i] * range_v / + scale_v) * scale_v / range_v + elif self.quant_axis == 1: + for i in range(self.inputs['X'].shape[1]): + scale_v = np.max(np.abs(self.inputs['X'][:, i])).astype( + "float32") + scales.append(scale_v) + outputs[:, i] = np.round(outputs[:, i] * range_v / + scale_v) * scale_v / range_v + + self.outputs = { + 'Out': outputs, + 'OutScale': np.array(scales).astype("float32"), + } + + def set_arg(self): + self.quant_axis = 0 + self.inputs = { + 'X': np.random.random((3, 4, 64, 64)).astype("float32"), + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + x = self.inputs["X"] + gradient = [np.ones(x.shape) / np.product(x.shape)] + self.check_grad(["X"], "Out", user_defined_grads=gradient) + + +class TestChannelWiseFakeQuantDequantOp1(TestChannelWiseFakeQuantDequantOp): + def set_arg(self): + self.quant_axis = 1 + self.inputs = { + 'X': np.random.random((15, 20, 5, 5)).astype("float32"), + } + + +class TestChannelWiseFakeQuantDequantOp2(TestChannelWiseFakeQuantDequantOp): + def set_arg(self): + self.quant_axis = 0 + self.inputs = {'X': np.random.random((30, 15)).astype("float32"), } + + +class TestChannelWiseFakeQuantDequantOp3(TestChannelWiseFakeQuantDequantOp): + def set_arg(self): + self.quant_axis = 1 + self.inputs = {'X': np.random.random((30, 15)).astype("float32"), } + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 3a90b363f2744f421bfab8eb4d55dd2c6e51e7e9..45597e7253c4d5bab50aa58f5f58e13e89ce1c1e 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -24,10 +24,10 @@ import numpy as np class TestFleetBase(unittest.TestCase): def setUp(self): os.environ["POD_IP"] = "127.0.0.1" - os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36000" os.environ["PADDLE_TRAINERS_NUM"] = "2" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ - "127.0.0.1:36001,127.0.0.2:36001" + "127.0.0.1:36001,127.0.0.2:36002" def test_init(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) @@ -58,32 +58,51 @@ class TestFleetBase(unittest.TestCase): def test_worker_endpoints(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - print(fleet.worker_endpoints(to_string=True)) + self.assertEqual( + "127.0.0.1:36000", fleet.worker_endpoints(to_string=True)) + self.assertEqual(["127.0.0.1:36000"], fleet.worker_endpoints()) def test_server_num(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) - if fleet.is_server(): - print("fleet server num: {}".format(fleet.server_num())) + os.environ["PADDLE_TRAINERS_NUM"] = "2" + self.assertEqual(2, fleet.server_num()) def test_server_index(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) - if fleet.is_server(): - print("fleet server index: {}".format(fleet.server_index())) + self.assertEqual(0, fleet.server_index()) def test_server_endpoints(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) if fleet.is_server(): - print("fleet server index: {}".format( - fleet.server_endpoints(to_string=True))) + self.assertEqual( + "127.0.0.1:36001,127.0.0.2:36002", + fleet.server_endpoints(to_string=True)) + self.assertEqual(["127.0.0.1:36001", "127.0.0.2:36002"], + fleet.server_endpoints()) def test_is_server(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) - if fleet.is_server(): - print("test fleet is server") + self.assertTrue(fleet.is_server()) def test_util(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py index 927c155ff1116a821a13730a9d2a779a7c68b254..f06f1eaefaeb3ee56b849e062dd4e3b0b581d119 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py @@ -17,6 +17,8 @@ import paddle import os from launch_function_helper import launch_func, wait, _find_free_port +paddle.enable_static() + class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py index a831f6e838e950f9955c762544c312ed2d8766a9..dae7907161697107a50eaf1b1501881f74509b76 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py @@ -87,7 +87,7 @@ class TestCloudRoleMaker2(unittest.TestCase): role2._all_gather(1) role2._all_gather(1) role2._barrier_server() - role2.all_gather(1) + role2._all_gather(1) role3 = GeneralRoleMaker(path="./test_gloo_3") role3._worker_gather(1) role3._worker_gather(1) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py index d786fa1eba8901f53ac76a47632f63f6fb6641eb..4dd254af251ae955878f9846e0f0e06f65c3ec90 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py @@ -30,19 +30,19 @@ class TestRoleMakerBase(unittest.TestCase): def test_rolemaker_base(self): role = role_maker.RoleMakerBase() - self.assertRaises(Exception, role.is_worker) - self.assertRaises(Exception, role.is_server) - self.assertRaises(Exception, role.is_first_worker) - self.assertRaises(Exception, role.worker_num) - self.assertRaises(Exception, role.server_num) - self.assertRaises(Exception, role.worker_index) - self.assertRaises(Exception, role.server_index) - self.assertRaises(Exception, role.role_id) - self.assertRaises(Exception, role.node_num) - - trainer_endpoints = role.get_trainer_endpoints() + self.assertRaises(Exception, role._is_worker) + self.assertRaises(Exception, role._is_server) + self.assertRaises(Exception, role._is_first_worker) + self.assertRaises(Exception, role._worker_num) + self.assertRaises(Exception, role._server_num) + self.assertRaises(Exception, role._worker_index) + self.assertRaises(Exception, role._server_index) + self.assertRaises(Exception, role._role_id) + self.assertRaises(Exception, role._node_num) + + trainer_endpoints = role._get_trainer_endpoints() self.assertTrue(len(trainer_endpoints) == 0) - pserver_endpoints = role.get_pserver_endpoints() + pserver_endpoints = role._get_pserver_endpoints() self.assertTrue(len(pserver_endpoints) == 0) print(role.to_string()) @@ -77,20 +77,32 @@ class TestCloudRoleMaker(unittest.TestCase): return ro = role_maker.PaddleCloudRoleMaker(is_collective=False) - - self.assertTrue(ro.is_worker()) - self.assertFalse(ro.is_server()) - self.assertEqual(ro.worker_num(), 2) - self.assertTrue(ro.is_first_worker()) - worker_endpoints = ro.get_trainer_endpoints() + self.assertTrue(ro._is_worker()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertFalse(ro._is_server()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._worker_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertTrue(ro._is_first_worker()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + worker_endpoints = ro._get_trainer_endpoints() self.assertEqual(worker_endpoints[0], '127.0.0.1:36001') - self.assertEqual(ro.role_id(), 0) - self.assertEqual(ro.node_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._role_id(), 0) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._node_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertFalse(ro._is_non_distributed()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._heter_worker_num(), 0) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertFalse(ro._is_heter_worker()) def test_tr_rolemaker_collective(self): ro = role_maker.PaddleCloudRoleMaker(is_collective=True) - self.assertEqual(ro.worker_num(), 2) - self.assertEqual(ro.node_num(), 2) + self.assertEqual(ro._worker_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=True) + self.assertEqual(ro._node_num(), 2) def test_ps_rolemaker(self): """Test ps rolemaker.""" @@ -106,11 +118,11 @@ class TestCloudRoleMaker(unittest.TestCase): ro = role_maker.PaddleCloudRoleMaker( is_collective=False, init_gloo=False) - self.assertEqual(ro.server_index(), 0) - self.assertFalse(ro.is_worker()) - self.assertTrue(ro.is_server()) - self.assertEqual(ro.server_num(), 2) - pserver_endpoints = ro.get_pserver_endpoints() + self.assertEqual(ro._server_index(), 0) + self.assertFalse(ro._is_worker()) + self.assertTrue(ro._is_server()) + self.assertEqual(ro._server_num(), 2) + pserver_endpoints = ro._get_pserver_endpoints() self.assertEqual(pserver_endpoints[0], '127.0.0.1:36001') self.assertEqual(ro._all_gather(1, "worker"), 1) @@ -126,7 +138,7 @@ class TestCloudRoleMaker(unittest.TestCase): return ro = role_maker.PaddleCloudRoleMaker(is_collective=False) - self.assertRaises(ValueError, ro.generate_role) + self.assertRaises(ValueError, ro._generate_role) class TestUserDefinedRoleMaker(unittest.TestCase): @@ -151,10 +163,10 @@ class TestUserDefinedRoleMaker(unittest.TestCase): role=role_maker.Role.SERVER, current_id=0, worker_num=2) - self.assertEqual(ro.server_num(), 2) - ro.generate_role() - self.assertTrue(ro.is_server()) - self.assertEqual(ro.role_id(), 0) + self.assertEqual(ro._server_num(), 2) + ro._generate_role() + self.assertTrue(ro._is_server()) + self.assertEqual(ro._role_id(), 0) def test_tr_rolemaker(self): try: @@ -171,9 +183,9 @@ class TestUserDefinedRoleMaker(unittest.TestCase): current_id=0, worker_num=2) - self.assertIn("127.0.0.1:36001", ro.get_pserver_endpoints()) - self.assertTrue(ro.is_worker()) - self.assertEqual(ro.role_id(), 0) + self.assertIn("127.0.0.1:36001", ro._get_pserver_endpoints()) + self.assertTrue(ro._is_worker()) + self.assertEqual(ro._role_id(), 0) class TestGlooWithCloudRoleMaker(unittest.TestCase): @@ -216,7 +228,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "worker") self.clean(tmp) @@ -234,7 +246,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "worker") self.clean(tmp) @@ -256,7 +268,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.clean(tmp) @@ -280,7 +292,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.clean(tmp) @@ -302,7 +314,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_HTTP_PORT"] = "30019" role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() import time time.sleep(3) @@ -326,7 +338,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.case(role, "all") self.clean(tmp) @@ -354,7 +366,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.case(role, "all") self.clean(tmp) @@ -377,7 +389,323 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_RENDEZVOUS"] = "5" role = role_maker.PaddleCloudRoleMaker() - self.assertRaises(ValueError, role.generate_role) + self.assertRaises(ValueError, role._generate_role) + + def test_fs_gloo8(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + def net(): + x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost( + input=y_predict, label=y) + avg_cost = paddle.fluid.layers.mean(cost) + return avg_cost + + from paddle.distributed import fleet + + role = role_maker.PaddleCloudRoleMaker() + fleet.init(role) + avg_cost = net() + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.a_sync = False + + optimizer = paddle.optimizer.SGD(0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy) + optimizer.minimize(avg_cost) + + comm_world = "server" + fleet.util().barrier(comm_world) + + gather = fleet.util().all_gather(1, comm_world) + self.assertEqual(gather[0], 1) + + all_reduce = fleet.util().all_reduce(1, "sum", comm_world) + self.assertEqual(1, all_reduce) + + self.clean(tmp) + + +class TestGlooWithCloudRoleMaker(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_TRAINERS_NUM"] = "1" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_TRAINER_ID"] = "0" + + def case(self, role, comm_world): + role._barrier(comm_world) + + gather = role._all_gather(1, comm_world) + self.assertEqual(gather[0], 1) + + all_reduce = role._all_reduce(1, "sum", comm_world) + self.assertEqual(1, all_reduce) + + def mkdir(self): + tmp = tempfile.mkdtemp() + return tmp + + def clean(self, tmp): + shutil.rmtree(tmp) + + def test_hdfs_gloo(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "worker") + self.clean(tmp) + + def test_fs_gloo(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "worker") + self.clean(tmp) + + def test_fs_gloo2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.clean(tmp) + + def test_fs_gloo3(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.clean(tmp) + + def test_fs_gloo4(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3" + os.environ["PADDLE_GLOO_HTTP_HOST"] = "127.0.0.1" + os.environ["PADDLE_GLOO_HTTP_PORT"] = "30019" + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + import time + time.sleep(3) + + def test_fs_gloo5(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.case(role, "all") + self.clean(tmp) + + def test_fs_gloo6(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.case(role, "all") + self.clean(tmp) + + def test_fs_gloo7(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "5" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) + + def test_hdfs_gloo_v2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "" + os.environ["PADDLE_GLOO_FS_UGI"] = "" + os.environ["PADDLE_GLOO_FS_PATH"] = "" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) + + def test_fs_gloo_v2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = "" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) + + def test_http_gloo_v2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3" + os.environ["PADDLE_GLOO_HTTP_HOST"] = "" + os.environ["PADDLE_GLOO_HTTP_PORT"] = "" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) def test_fs_gloo8(self): plats = platform.platform() diff --git a/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py b/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc305cd1f4dcd3faaaf8ccbe813bdf08e966d6e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py @@ -0,0 +1,215 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import core + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "Paddle core is not compiled with CUDA") +class TestFusedBnAddActAPI(unittest.TestCase): + def setUp(self): + self.conv_param_attr1 = fluid.ParamAttr( + name='conv2d_1.weight', + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + self.conv_param_attr2 = fluid.ParamAttr( + name='conv2d_2.weight', + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + self.bn_param_attr1 = fluid.ParamAttr( + name='batch_norm_w_1', + initializer=fluid.initializer.Constant(value=1.0)) + self.bn_bias_attr1 = fluid.ParamAttr( + name='batch_norm_b_1', + initializer=fluid.initializer.Constant(value=0.0)) + self.bn_param_attr2 = fluid.ParamAttr( + name='batch_norm_w_2', + initializer=fluid.initializer.Constant(value=1.0)) + self.bn_bias_attr2 = fluid.ParamAttr( + name='batch_norm_b_2', + initializer=fluid.initializer.Constant(value=0.0)) + self.fc_param_attr = fluid.ParamAttr( + name='fc.weight', + initializer=fluid.initializer.Xavier(uniform=False)) + + def build_fused_program(self, + main_program, + startup_program, + use_cuda, + seed=1): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm( + input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act( + conv1_2, + bn, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2) + prediction = fluid.layers.fc(input=fused_bn_add_act, + size=10, + act='softmax', + param_attr=self.fc_param_attr) + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + def build_origin_program(self, + main_program, + startup_program, + use_cuda, + seed=1): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn1 = fluid.layers.batch_norm( + input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + bn2 = fluid.layers.batch_norm( + input=conv1_2, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2, + act=None, + data_layout='NHWC') + out = bn1 + bn2 + out = fluid.layers.relu(out) + prediction = fluid.layers.fc(input=out, + size=10, + act='softmax', + param_attr=self.fc_param_attr) + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + def check(self, place, use_cuda): + paddle.manual_seed(1) + paddle.framework.random._manual_program_seed(1) + iters = 5 + batch_size = 16 + + # build_fused_program + main_program = fluid.Program() + startup_program = fluid.Program() + x, y, loss = self.build_fused_program(main_program, startup_program, + use_cuda) + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + exe = fluid.Executor(place) + loss_vals_fused = [] + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + loss_vals_fused.append(loss_v[0][0]) + + # build_origin_program + main_program = fluid.Program() + startup_program = fluid.Program() + x, y, loss = self.build_origin_program(main_program, startup_program, + use_cuda) + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + loss_vals = [] + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + loss_vals.append(loss_v[0][0]) + + # check loss + for i in range(iters): + self.assertAlmostEqual(loss_vals[i], loss_vals_fused[i], delta=1e-5) + + def test_fuse_bn_add_act(self): + place = fluid.CUDAPlace(0) + self.check(place, use_cuda=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py index c7476a8a74256d8eb656778c945c96ee0aa88df4..c176ff09e024db90ea5a81bcf2afe18939c4f538 100644 --- a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py +++ b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py @@ -16,6 +16,7 @@ from __future__ import print_function import ast import gast +import sys import textwrap import unittest @@ -143,47 +144,60 @@ class TestPythonCompatibility(unittest.TestCase): """ self._check_compatibility(source, target) - def test_with(self): - """ - The fileds `context_expr/optional_vars` of `ast.With` in PY2 - is moved into `ast.With.items.withitem` in PY3. - """ - source = """ - with guard(): - a = 1 - """ - target = """ - with guard_new(): - a = 1 - """ - self._check_compatibility(source, target) - - def test_subscript_Index(self): - source = """ - x = y()[10] - """ - target = """ - x = y()[20] - """ - self._check_compatibility(source, target) - - def test_subscript_Slice(self): - source = """ - x = y()[10:20] - """ - target = """ - x = y()[20:40] - """ - self._check_compatibility(source, target) - - def test_call(self): - source = """ - y = foo(*arg) - """ - target = """ - y = foo(*arg_new) - """ - self._check_compatibility(source, target) + # The 0.3.3 version of gast has a bug in python3.8 that + # would cause the following tests to fail. But this + # problem doesn't affect the use of Paddle's related + # functions, therefore, the following tests would be + # disable in python3.8. + # + # This problem had been fixed and updated to version + # 0.4.1 of gast. + # + # More information please refer to: + # https://github.com/serge-sans-paille/gast/issues/49 + if sys.version_info < (3, 8): + + def test_with(self): + """ + The fileds `context_expr/optional_vars` of `ast.With` in PY2 + is moved into `ast.With.items.withitem` in PY3. + """ + source = """ + with guard(): + a = 1 + """ + target = """ + with guard_new(): + a = 1 + """ + self._check_compatibility(source, target) + + def test_subscript_Index(self): + source = """ + x = y()[10] + """ + target = """ + x = y()[20] + """ + self._check_compatibility(source, target) + + def test_subscript_Slice(self): + source = """ + x = y()[10:20] + """ + target = """ + x = y()[20:40] + """ + self._check_compatibility(source, target) + + def test_call(self): + source = """ + y = foo(*arg) + """ + target = """ + y = foo(*arg_new) + """ + self._check_compatibility(source, target) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py index 720c9f95c251ec54c7e7fa74c8e59e135a8c6be7..39c6fca89ccbef8c61055cd7d1547d3450ae96cb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py @@ -346,7 +346,7 @@ class TestRaiseNoDoubleGradOp(TestCase): with fluid.dygraph.guard(): x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32') x.stop_gradient = False - y = paddle.fluid.layers.batch_norm(x) + y = paddle.fluid.layers.group_norm(x, groups=1) dx = fluid.dygraph.grad( outputs=[y], inputs=[x], create_graph=True, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py index e94157fa047eef065bc4bd0bfb3d6b6c778ea7b9..1ab37aaed23530f7cd886193dbf02d0a94fa61e2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py @@ -592,7 +592,7 @@ class TestStarGANWithGradientPenalty(unittest.TestCase): cfg = Config(place) dataset = create_mnist_dataset(cfg) - dataset = fluid.io.cache(dataset) + dataset = paddle.reader.cache(dataset) static_graph_model = StaticGraphTrainModel(cfg) static_loss = [] diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..c75acd7c15b1e96c49fba61b9f8348b62ab73894 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -0,0 +1,114 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +from paddle.fluid.backward import calc_gradient +import numpy as np + + +class ConvBNLayer(fluid.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + use_cudnn=False): + super(ConvBNLayer, self).__init__() + + self._conv = fluid.dygraph.Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False, + use_cudnn=use_cudnn) + + self._batch_norm = fluid.dygraph.BatchNorm(num_filters, act=act) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +def create_program(): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + x = fluid.data(name='img', shape=[-1, 3, 224, 224]) + x.stop_gradient = False + x = fluid.layers.prelu(x, mode="channel") + conv = ConvBNLayer( + num_channels=3, + num_filters=3, + filter_size=1, + act='relu', + use_cudnn=True) + y = conv(x) + x + + loss = fluid.layers.reduce_sum(y) + + sgd = fluid.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + return loss, main, startup, conv._conv.weight + + +class TestInplaceAddto(unittest.TestCase): + def test_result(self): + def run_program(enable_addto): + np.random.seed(10) + paddle.manual_seed(10) + paddle.framework.random._manual_program_seed(10) + if fluid.core.is_compiled_with_cuda(): + fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + fluid.set_flags({"FLAGS_max_inplace_grad_add": 2}) + loss, main, startup, w = create_program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + strategy = fluid.BuildStrategy() + strategy.enable_addto = enable_addto + compiled = fluid.CompiledProgram(main).with_data_parallel( + loss_name=loss.name, build_strategy=strategy) + + exe.run(startup) + img = np.random.uniform(-128, 128, + [8, 3, 224, 224]).astype(np.float32) + for i in range(2): + res = exe.run(compiled, + feed={'img': img}, + fetch_list=[loss.name, w.name]) + return res + + res1, w1 = run_program(True) + res2, w2 = run_program(False) + print(res1, res2) + self.assertTrue(np.array_equal(res1, res2)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py index 6da37fe4d294b426ba5e494c35396fb01a43a559..6751c8870615438bb051b53f64095e5eb1937892 100644 --- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py +++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py @@ -28,6 +28,8 @@ import unittest from multiprocessing import Process from op_test import OpTest +paddle.enable_static() + def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id): remove_ps_flag(os.getpid()) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py index 39cb6651a4b7e7a31c90110771676641a14be292..9634f5af30a4649768ddcaf3ae117548d29b1726 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid as fluid -from paddle.fluid.io import multiprocess_reader +from paddle.reader import multiprocess_reader import unittest import numpy as np import six diff --git a/python/paddle/fluid/tests/unittests/test_mv_op.py b/python/paddle/fluid/tests/unittests/test_mv_op.py new file mode 100644 index 0000000000000000000000000000000000000000..6b930e59aa554c57ba1ecae2c01aaefabbe578e9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_mv_op.py @@ -0,0 +1,94 @@ +#Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +import paddle.fluid.core as core +from op_test import OpTest + + +class TestMVOp(OpTest): + def setUp(self): + self.op_type = "mv" + self.init_config() + self.inputs = {'X': self.x, 'Vec': self.vec} + self.outputs = {'Out': np.dot(self.x, self.vec)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X', 'Vec'], 'Out') + + def init_config(self): + self.x = np.random.random((5, 100)).astype("float64") + self.vec = np.random.random((100)).astype("float64") + + +class TestMVAPI(unittest.TestCase): + def test_dygraph_api_out(self): + paddle.disable_static() + + self.x_data = np.random.random((5, 100)).astype("float64") + self.x = paddle.to_tensor(self.x_data) + self.vec_data = np.random.random((100)).astype("float64") + self.vec = paddle.to_tensor(self.vec_data) + z = paddle.mv(self.x, self.vec) + np_z = z.numpy() + z_expected = np.array(np.dot(self.x_data, self.vec_data)) + self.assertTrue(np.allclose(np_z, z_expected)) + + paddle.enable_static() + + def test_static_graph(self): + paddle.enable_static() + + self.input_x = np.random.rand(5, 100).astype("float64") + self.input_vec = np.random.rand(100).astype("float64") + + data_x = paddle.static.data("x", shape=[5, 100], dtype="float64") + data_vec = paddle.static.data("vec", shape=[100], dtype="float64") + result_vec = paddle.mv(data_x, data_vec) + self.place = paddle.CPUPlace() + exe = paddle.static.Executor(self.place) + res, = exe.run(feed={"x": self.input_x, + "vec": self.input_vec}, + fetch_list=[result_vec]) + z_expected = np.array(np.dot(self.input_x, self.input_vec)) + self.assertTrue(np.allclose(res, z_expected)) + + +class TestMVError(unittest.TestCase): + def test_input(self): + def test_shape(): + paddle.enable_static() + + self.input_x = np.random.rand(5, 100).astype("float64") + self.input_vec = np.random.rand(100).astype("float64") + + data_x = paddle.static.data("x", shape=[5, 100], dtype="float64") + data_vec = paddle.static.data( + "vec", shape=[100, 2], dtype="float64") + result_vec = paddle.mv(data_x, data_vec) + + self.assertRaises(ValueError, test_shape) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py index d4a971d25bc334906ce1737d963fcf419d452df3..dc9ea5d957aed42e11e978ce6d221c873696030c 100644 --- a/python/paddle/fluid/tests/unittests/test_nan_inf.py +++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py @@ -19,6 +19,9 @@ import unittest import os import sys import subprocess +import paddle + +paddle.enable_static() class TestNanInf(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py index c44ea454271f3aa6cb12451cd85490b57284ea35..a89b9fde7f92de0d493ad87a2f0950548ba8ff98 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py @@ -68,5 +68,67 @@ class TestInstanceNormDoubleGradCheckWithoutParamBias( [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) +class TestBatchNormDoubleGradCheck(unittest.TestCase): + def setUp(self): + self.init_test() + + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = False + self.shape = [2, 3, 4, 5] + + @prog_scope() + def func(self, place): + prog = fluid.Program() + with fluid.program_guard(prog): + np.random.seed() + dtype = "float32" + eps = 0.005 + atol = 1e-4 + x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x') + z = fluid.layers.batch_norm( + input=x, + data_layout=self.data_layout, + use_global_stats=self.use_global_stats) + x_arr = np.random.uniform(-1, 1, self.shape).astype(dtype) + gradient_checker.double_grad_check( + [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestBatchNormDoubleGradCheckCase1(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NHWC' + self.use_global_stats = False + self.shape = [2, 3, 4, 5] + + +class TestBatchNormDoubleGradCheckCase2(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = True + self.shape = [2, 3, 4, 5] + + +class TestBatchNormDoubleGradCheckCase3(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NHWC' + self.use_global_stats = True + self.shape = [2, 3, 4, 5] + + +class TestBatchNormDoubleGradCheckCase4(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = False + self.shape = [2, 2, 3, 4, 5] + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py index 2d977caa03369840d4ac31344195878a9998f685..624927d809fba4f13e30a62748c8cb6747d4eda3 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid as fluid import unittest import numpy as np @@ -60,8 +61,8 @@ class TestPyReaderCombination(unittest.TestCase): py_reader2 = fluid.io.PyReader( feed_list=[image, label], capacity=16, iterable=True) - reader1 = fluid.io.cache(self.create_reader(self.n1)) - reader2 = fluid.io.cache(self.create_reader(self.n2)) + reader1 = paddle.reader.cache(self.create_reader(self.n1)) + reader2 = paddle.reader.cache(self.create_reader(self.n2)) py_reader1.decorate_batch_generator(reader1, places=place) py_reader2.decorate_batch_generator(reader2, places=place) diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter.py b/python/paddle/fluid/tests/unittests/test_reducescatter.py index 58bcc11cd89c0573bc572008eb174e7070937cad..7c355d46285c59197759689d9a457aec96b89135 100644 --- a/python/paddle/fluid/tests/unittests/test_reducescatter.py +++ b/python/paddle/fluid/tests/unittests/test_reducescatter.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestReduceScatterOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py index 5fa75cc3effe37197195da7555a1a3266e30754b..5a494b5529efbef420c6e65532352fd58cc1db11 100644 --- a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py +++ b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py @@ -16,9 +16,12 @@ from __future__ import print_function import unittest import numpy as np import paddle.fluid as fluid +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestReduceScatterAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_segment_ops.py b/python/paddle/fluid/tests/unittests/test_segment_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..b58d66676b05524766366d9587d395aadc32a7b4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_segment_ops.py @@ -0,0 +1,202 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +from op_test import OpTest + + +def compute_segment_sum(x, segment_ids): + length = segment_ids[-1] + 1 + target_shape = list(x.shape) + target_shape[0] = length + results = np.zeros(target_shape, dtype=x.dtype) + for index, ids in enumerate(segment_ids): + results[ids, :] += x[index, :] + return results + + +def compute_segment_mean(x, segment_ids): + length = segment_ids[-1] + 1 + target_shape = list(x.shape) + target_shape[0] = length + results = np.zeros(target_shape, dtype=x.dtype) + count = np.zeros(length, dtype=x.dtype) + 1e-8 + for index, ids in enumerate(segment_ids): + results[ids, :] += x[index, :] + count[ids] += 1 + results = results / count.reshape([-1, 1]) + return results + + +def compute_segment_min_max(x, segment_ids, pooltype="MAX"): + length = segment_ids[-1] + 1 + target_shape = list(x.shape) + target_shape[0] = length + gradient = np.zeros_like(x) + results = np.zeros(target_shape, dtype=x.dtype) + last_idx = 0 + current_id = segment_ids[0] + for idx in range(1, len(segment_ids) + 1): + if idx < len(segment_ids): + if segment_ids[idx] == current_id: + continue + sub_x = x[last_idx:idx, :] + if pooltype == "MAX": + results[current_id] = np.amax(sub_x, axis=0) + elif pooltype == "MIN": + results[current_id] = np.amin(sub_x, axis=0) + else: + raise ValueError("Invalid pooltype, only MAX, MIN supported!") + gradient[last_idx:idx, :][sub_x == results[current_id]] = 1 + last_idx = idx + if idx < len(segment_ids): + current_id = segment_ids[idx] + + return results, gradient / results.size + + +class TestSegmentOps(OpTest): + def set_data(self): + x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + segment_ids = self.set_segment(len(x), len(x) // 5 + 1) + return x, segment_ids + + def set_segment(self, origin_len, reduce_len): + segment = np.zeros(reduce_len, dtype='int64') + segment = np.random.randint(0, reduce_len, size=[origin_len]) + segment = np.sort(segment) + return segment.astype('int64') + + def compute(self, x, segment_ids): + return compute_segment_sum(x, segment_ids) + + def prepare(self): + self.op_type = "segment_pool" + self.dtype = np.float64 + self.shape = [30, 15] + self.attrs = {"pooltype": "SUM"} + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result = self.compute(x, segment_ids) + self.inputs = { + 'X': x.astype(self.dtype), + 'SegmentIds': segment_ids.astype(np.int64) + } + self.outputs = {'Out': result.astype(self.dtype)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestSegmentSum2(TestSegmentOps): + def prepare(self): + super(TestSegmentSum2, self).prepare() + self.shape = [40, 20] + self.dtype = np.float32 + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result = self.compute(x, segment_ids) + self.inputs = { + 'X': x.astype(self.dtype), + 'SegmentIds': segment_ids.astype(np.int32) + } + self.outputs = {'Out': result.astype(self.dtype)} + + +class TestSegmentMax(TestSegmentOps): + def compute(self, x, segment_ids): + return compute_segment_min_max(x, segment_ids, pooltype="MAX") + + def prepare(self): + super(TestSegmentMax, self).prepare() + self.shape = [40, 20] + self.attrs = {'pooltype': "MAX"} + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result, self.gradient = self.compute(x, segment_ids) + self.inputs = { + 'X': x.astype(self.dtype), + 'SegmentIds': segment_ids.astype(np.int32) + } + self.outputs = {'Out': result.astype(self.dtype)} + + def test_check_grad(self): + self.check_grad(["X"], "Out", user_defined_grads=[self.gradient]) + + +class TestSegmentMax2(TestSegmentMax): + def prepare(self): + super(TestSegmentMax2, self).prepare() + self.dtype = np.float32 + + +class TestSegmentMin(TestSegmentMax): + def compute(self, x, segment_ids): + return compute_segment_min_max(x, segment_ids, pooltype="MIN") + + def prepare(self): + super(TestSegmentMin, self).prepare() + self.attrs = {'pooltype': "MIN"} + + +class TestSegmentMin2(TestSegmentMin): + def prepare(self): + super(TestSegmentMin2, self).prepare() + self.dtype = np.float32 + + +class TestSegmentMean(TestSegmentOps): + def compute(self, x, segment_ids): + return compute_segment_mean(x, segment_ids) + + def prepare(self): + super(TestSegmentMean, self).prepare() + self.shape = [40, 20] + self.attrs = {'pooltype': "MEAN"} + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result = self.compute(x, segment_ids) + self.inputs = {'X': x, 'SegmentIds': segment_ids} + self.outputs = { + 'Out': result, + 'SummedIds': compute_segment_sum( + np.ones([len(x), 1]).astype(self.dtype), segment_ids) + } + + +class TestSegmentMean2(TestSegmentMean): + def prepare(self): + super(TestSegmentMean2, self).prepare() + self.dtype = np.float32 + self.shape = [30, 20] + self.attrs = {'pooltype': "MEAN"} + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py index 54e7765c0fb76844a6123fceea6c1ef79dc0c2bf..b9d96f329b5bb48f7167d005f11f64136fdf5d01 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py @@ -63,28 +63,28 @@ class TestTopkOp(OpTest): self.check_grad(set(['X']), 'Out') -class TestTopOp1(TestTopkOp): +class TestTopkOp1(TestTopkOp): def init_args(self): self.k = 3 self.axis = 0 self.largest = True -class TestTopOp2(TestTopkOp): +class TestTopkOp2(TestTopkOp): def init_args(self): self.k = 3 self.axis = 0 self.largest = False -class TestTopOp3(TestTopkOp): +class TestTopkOp3(TestTopkOp): def init_args(self): self.k = 4 self.axis = 0 self.largest = False -class TestTopOp4(TestTopkOp): +class TestTopkOp4(TestTopkOp): def init_args(self): self.k = 4 self.axis = 0 @@ -189,6 +189,8 @@ class TestTopKAPI(unittest.TestCase): result1 = paddle.topk(input_tensor, k=2) result2 = paddle.topk(input_tensor, k=2, axis=-1) result3 = paddle.topk(input_tensor, k=k_tensor, axis=1) + self.assertEqual(result3[0].shape, (6, -1, 8)) + self.assertEqual(result3[1].shape, (6, -1, 8)) result4 = paddle.topk(input_tensor, k=2, axis=1, largest=False) result5 = paddle.topk(input_tensor, k=2, axis=-1, largest=False) result6 = paddle.topk(large_input_tensor, k=1, axis=-1) @@ -239,6 +241,15 @@ class TestTopKAPI(unittest.TestCase): self.run_dygraph(place) self.run_static(place) + def test_errors(self): + paddle.disable_static() + x = paddle.to_tensor([1, 2, 3]) + with self.assertRaises(BaseException): + paddle.topk(x, k=-1) + + with self.assertRaises(BaseException): + paddle.topk(x, k=0) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/inference/__init__.py b/python/paddle/inference/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c388301ec3408e436eacb2567e8e529d0bbc03bb --- /dev/null +++ b/python/paddle/inference/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..fluid.inference import Config, DataType, PlaceType, PrecisionType, Tensor, \ + Predictor, create_predictor, get_version, get_num_bytes_of_data_type, PredictorPool diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index b67779cb2a2ae699c8206dc717670bf6eb23b25e..6f0b0f3c9c135e00a01c69869742a40ff615a96b 100644 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -31,15 +31,6 @@ __all__ = [ 'set_program_state', 'load_inference_model', 'save_inference_model', - 'batch', - 'shuffle', - 'buffered', - 'cache', - 'chain', - 'firstn', - 'compose', - 'map_readers', - 'xmap_readers' ] from ..fluid.io import DataLoader @@ -47,4 +38,3 @@ from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worke TensorDataset, Sampler, SequenceSampler, RandomSampler, DistributedBatchSampler from ..fluid.io import load, save, load_program_state, set_program_state, \ load_inference_model, save_inference_model, batch -from ..reader import shuffle, buffered, cache, chain, firstn, compose, map_readers, xmap_readers diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 042625a3dbd6b07487d6f77442621959f7492af6..1eb9167d0352f36bfcb87db79ba23dce14bac507 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -713,7 +713,7 @@ def max_pool2d(x, 'data_format', data_format) return output - op_type = 'max_pool2d_with_index' if data_format == "NCHW" else "max_pool2d" + op_type = 'max_pool2d_with_index' if data_format == "NCHW" else "pool2d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) @@ -839,7 +839,7 @@ def max_pool3d(x, 'data_format', data_format) return output - op_type = "max_pool3d_with_index" if data_format == "NCDHW" else "max_pool3d" + op_type = "max_pool3d_with_index" if data_format == "NCDHW" else "pool3d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 708aaa788f60d56a2adb41c8a571079354b3c192..24cebf8e6e6388a2d1e9711e3f862090918876a3 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -282,14 +282,13 @@ class Adam(Optimizer): for param in self._parameter_list: if not param.trainable: continue - if hasattr( - param, "_is_sparse" - ) and param._is_sparse and self.regularization is not None: - raise RuntimeError( - "Adam don't support weight_decay with sparse parameters, please set it to None." - ) if param._grad_ivar() is not None: grad_var = param._grad_ivar() + if hasattr(grad_var, "_is_sparse") and grad_var._is_sparse( + ) and self.regularization is not None: + raise RuntimeError( + "Adam don't support weight_decay with sparse parameters, please set it to None." + ) params_grads.append((param, grad_var)) optimize_ops = self._apply_optimize( diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py index 29337cf06682f5f5bf8e0e6d9b1bf8ec32512d45..881cfd813141653fed8e7d9107cdebe54c9df791 100644 --- a/python/paddle/reader/__init__.py +++ b/python/paddle/reader/__init__.py @@ -66,4 +66,4 @@ An example implementation for multiple item data reader creator: import paddle.reader.decorator from paddle.reader.decorator import * -__all__ = decorator.__all__ +__all__ = [] diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index aadfb3f49ed61367b9502e1a00ad5b9c027a32b7..8ee4d73ea847ea116ea4401b5b05ef1b925950fe 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -42,7 +42,7 @@ import paddle.compat as cpt # For more details, please refer to # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods # https://bugs.python.org/issue33725 -if sys.version_info >= (3, 8): +if sys.version_info >= (3, 8) and sys.platform == 'darwin': fork_context = multiprocessing.get_context('fork') else: fork_context = multiprocessing @@ -62,6 +62,22 @@ def cache(reader): Returns: generator: a decorated reader object which yields data from cached memory. + + Examples: + .. code-block:: python + + import paddle + + def reader(): + for i in range(3): + yield i + + # All data is cached into memory + cached_reader = paddle.io.cache(reader) + + # Output: 0 1 2 + for i in cached_reader(): + print(i) """ all_data = tuple(reader()) @@ -296,12 +312,28 @@ def buffered(reader, size): buffer. Reading from the buffered data reader will proceed as long as the buffer is not empty. - :param reader: the data reader to read from. - :type reader: callable - :param size: max buffer size. - :type size: int + Args: + reader(generator): the data reader to read from. + size(int): max buffer size. + + Returns: + generator: the buffered data reader. + + Examples: + .. code-block:: python - :returns: the buffered data reader. + import paddle + + def reader(): + for i in range(3): + yield i + + # Create a buffered reader, and the buffer size is 2. + buffered_reader = paddle.io.buffered(reader, 2) + + # Output: 0 1 2 + for i in buffered_reader(): + print(i) """ class EndSignal(): diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index a713663e1822d4af2d09efb2986aeb513930bbc0..cec989fba8b0887499876f94bb862f72ba0e18d5 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -56,6 +56,7 @@ from .linalg import cholesky #DEFINE_ALIAS # from .linalg import tensordot #DEFINE_ALIAS from .linalg import bmm #DEFINE_ALIAS from .linalg import histogram #DEFINE_ALIAS +from .linalg import mv #DEFINE_ALIAS from .logic import equal #DEFINE_ALIAS from .logic import greater_equal #DEFINE_ALIAS from .logic import greater_than #DEFINE_ALIAS @@ -170,7 +171,6 @@ from .math import prod #DEFINE_ALIAS from .random import standard_normal from .random import normal from .random import uniform #DEFINE_ALIAS -from .random import shuffle #DEFINE_ALIAS from .random import randn #DEFINE_ALIAS from .random import rand #DEFINE_ALIAS from .random import randint #DEFINE_ALIAS diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 67e3ce21ffba0c312eb01163cdf32f87c6433ee1..f27cfba487d78f284408815eaba933b18f303df9 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -32,7 +32,8 @@ __all__ = [ 'cholesky', # 'tensordot', 'bmm', - 'histogram' + 'histogram', + 'mv' ] @@ -920,3 +921,64 @@ def histogram(input, bins=100, min=0, max=0): 'min': min, 'max': max}) return out + + +def mv(x, vec, name=None): + """ + Performs a matrix-vector product of the matrix x and the vector vec. + + Args: + x (Variable): A tensor with shape :math:`[M, N]` , The data type of the input Tensor x + should be one of float32, float64. + vec (Variable): A tensor with shape :math:`[N]` , The data type of the input Tensor x + should be one of float32, float64. + name(str, optional): The default value is None. Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: The tensor which is producted by x and vec. + + Examples: + .. code-block:: python + + # x: [M, N], vec: [N] + # paddle.mv(x, vec) # out: [M] + + import numpy as np + import paddle + + paddle.disable_static() + x_data = np.array([[2, 1, 3], [3, 0, 1]]).astype("float64") + x = paddle.to_tensor(x_data) + vec_data = np.array([3, 5, 1]) + vec = paddle.to_tensor(vec_data).astype("float64") + out = paddle.mv(x, vec) + paddle.enable_static() + """ + if in_dygraph_mode(): + out = core.ops.mv(x, vec) + return out + + def __check_input(x, vec): + var_names = {'x': x, 'vec': vec} + for name, val in var_names.items(): + check_variable_and_dtype(val, name, ['float32', 'float64'], 'mv') + x_shape = list(x.shape) + vec_shape = list(vec.shape) + if len(x_shape) != 2: + raise ValueError( + "x should be 2-dimensional. But received x's dimention: {}". + format(x_shape)) + if len(vec_shape) != 1: + raise ValueError( + "vec should be 1-dimensional. But received vec's dimention: {}". + format(vec_shape)) + + __check_input(x, vec) + + helper = LayerHelper('mv', **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='mv', inputs={'X': x, + 'Vec': vec}, outputs={'Out': out}) + return out diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index b38a1d0f5b7e92b0eac907170aad76a2b5c69bc1..9ffd81995eda407740fce03b488375e06a3ae37b 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -21,14 +21,11 @@ from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtyp from ..fluid.layers import utils import paddle -from ..fluid.io import shuffle #DEFINE_ALIAS - __all__ = [ 'bernoulli', 'standard_normal', 'normal', 'uniform', - 'shuffle', 'randn', 'rand', 'randint', diff --git a/python/paddle/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py index 2ecc41c3f0a81a56cc34e826483ea4f5cc6681d9..672de7ae8e94eceded92dfa0e77621eedac0e3b0 100644 --- a/python/paddle/tests/test_dataset_cifar.py +++ b/python/paddle/tests/test_dataset_cifar.py @@ -27,8 +27,10 @@ class TestCifar10Train(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 50000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 9) @@ -41,8 +43,10 @@ class TestCifar10Test(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 10000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 9) @@ -55,8 +59,10 @@ class TestCifar100Train(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 50000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 99) @@ -69,8 +75,10 @@ class TestCifar100Test(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 10000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 99) diff --git a/python/paddle/tests/test_datasets.py b/python/paddle/tests/test_datasets.py index 1e50ff60aa5c3039c21d6e1e3a714c32000462c7..1e0d6dbacf6c4c5a781aaa40440921fe1a281ca9 100644 --- a/python/paddle/tests/test_datasets.py +++ b/python/paddle/tests/test_datasets.py @@ -103,12 +103,14 @@ class TestMNISTTest(unittest.TestCase): class TestMNISTTrain(unittest.TestCase): def test_main(self): - mnist = MNIST(mode='train', chw_format=False) + mnist = MNIST(mode='train') self.assertTrue(len(mnist) == 60000) for i in range(len(mnist)): image, label = mnist[i] - self.assertTrue(image.shape[0] == 784) + self.assertTrue(image.shape[0] == 1) + self.assertTrue(image.shape[1] == 28) + self.assertTrue(image.shape[2] == 28) self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9) diff --git a/python/paddle/tests/test_text.py b/python/paddle/tests/test_text.py index 43968896c18bda6445de46773899128e1bedff53..fa83b0cc6f3408bb8fdf33522b17664e35b8f503 100644 --- a/python/paddle/tests/test_text.py +++ b/python/paddle/tests/test_text.py @@ -28,6 +28,8 @@ from paddle import Model, set_device from paddle.static import InputSpec as Input from paddle.text import * +paddle.enable_static() + class ModuleApiTest(unittest.TestCase): @classmethod diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py index a0d465eb1775431ffa0527dfae8031bebd6fc340..a8dfbc44a97127dd074ef5cbfc727aa535d56872 100644 --- a/python/paddle/text/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -17,6 +17,7 @@ from __future__ import print_function import six import numpy as np +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -88,6 +89,8 @@ class UCIHousing(Dataset): # read dataset into memory self._load_data() + self.dtype = paddle.get_default_dtype() + def _load_data(self, feature_num=14, ratio=0.8): data = np.fromfile(self.data_file, sep=' ') data = data.reshape(data.shape[0] // feature_num, feature_num) @@ -103,7 +106,8 @@ class UCIHousing(Dataset): def __getitem__(self, idx): data = self.data[idx] - return np.array(data[:-1]), np.array(data[-1:]) + return np.array(data[:-1]).astype(self.dtype), \ + np.array(data[-1:]).astype(self.dtype) def __len__(self): return len(self.data) diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py index 1193be26da56780058beadfe15640bc76533114a..c531f3d0e4e3d276d9831b2ac868af9b0761107d 100644 --- a/python/paddle/vision/datasets/cifar.py +++ b/python/paddle/vision/datasets/cifar.py @@ -19,6 +19,7 @@ import numpy as np import six from six.moves import cPickle as pickle +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -113,6 +114,8 @@ class Cifar10(Dataset): # read dataset into memory self._load_data() + self.dtype = paddle.get_default_dtype() + def _init_url_md5_flag(self): self.data_url = CIFAR10_URL self.data_md5 = CIFAR10_MD5 @@ -139,9 +142,10 @@ class Cifar10(Dataset): def __getitem__(self, idx): image, label = self.data[idx] + image = np.reshape(image, [3, 32, 32]) if self.transform is not None: image = self.transform(image) - return image, label + return image.astype(self.dtype), np.array(label).astype('int64') def __len__(self): return len(self.data) diff --git a/python/paddle/vision/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py index 1c0f41123e2313d9db6f5e846d133ecdebc7f1af..2251333fd8d281bd07402fbbf3a05fea47a69cce 100644 --- a/python/paddle/vision/datasets/flowers.py +++ b/python/paddle/vision/datasets/flowers.py @@ -21,6 +21,7 @@ import numpy as np import scipy.io as scio from PIL import Image +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -104,6 +105,8 @@ class Flowers(Dataset): # read dataset into memory self._load_anno() + self.dtype = paddle.get_default_dtype() + def _load_anno(self): self.name2mem = {} self.data_tar = tarfile.open(self.data_file) @@ -124,7 +127,7 @@ class Flowers(Dataset): if self.transform is not None: image = self.transform(image) - return image, label.astype('int64') + return image.astype(self.dtype), label.astype('int64') def __len__(self): return len(self.indexes) diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py index 8a3053abefc1b28ba36150a1ff68a4dd4c3469c9..19d913504bdf7b09de9d888c0caa5cc1c049ac57 100644 --- a/python/paddle/vision/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -15,6 +15,7 @@ import os import sys +import paddle from paddle.io import Dataset from paddle.utils import try_import @@ -143,6 +144,8 @@ class DatasetFolder(Dataset): self.samples = samples self.targets = [s[1] for s in samples] + self.dtype = paddle.get_default_dtype() + def _find_classes(self, dir): """ Finds the class folders in a dataset. diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py index a98561333921d182c0b3a3f486c90a94e79b6a3d..16c39e56ef0d65ba89bb611c62e0e957b840a826 100644 --- a/python/paddle/vision/datasets/mnist.py +++ b/python/paddle/vision/datasets/mnist.py @@ -19,6 +19,7 @@ import gzip import struct import numpy as np +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -44,8 +45,6 @@ class MNIST(Dataset): :attr:`download` is True. Default None label_path(str): path to label file, can be set None if :attr:`download` is True. Default None - chw_format(bool): If set True, the output shape is [1, 28, 28], - otherwise, output shape is [1, 784]. Default True. mode(str): 'train' or 'test' mode. Default 'train'. download(bool): whether to download dataset automatically if :attr:`image_path` :attr:`label_path` is not set. Default True @@ -70,14 +69,12 @@ class MNIST(Dataset): def __init__(self, image_path=None, label_path=None, - chw_format=True, mode='train', transform=None, download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train' or 'test', but got {}".format(mode) self.mode = mode.lower() - self.chw_format = chw_format self.image_path = image_path if self.image_path is None: assert download, "image_path is not set and downloading automatically is disabled" @@ -99,6 +96,8 @@ class MNIST(Dataset): # read dataset into memory self._parse_dataset() + self.dtype = paddle.get_default_dtype() + def _parse_dataset(self, buffer_size=100): self.images = [] self.labels = [] @@ -139,10 +138,6 @@ class MNIST(Dataset): cols)).astype('float32') offset_img += struct.calcsize(fmt_images) - images = images / 255.0 - images = images * 2.0 - images = images - 1.0 - for i in range(buffer_size): self.images.append(images[i, :]) self.labels.append( @@ -150,11 +145,10 @@ class MNIST(Dataset): def __getitem__(self, idx): image, label = self.images[idx], self.labels[idx] - if self.chw_format: - image = np.reshape(image, [1, 28, 28]) + image = np.reshape(image, [1, 28, 28]) if self.transform is not None: image = self.transform(image) - return image, label + return image.astype(self.dtype), label.astype('int64') def __len__(self): return len(self.labels) diff --git a/python/paddle/vision/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py index ae14ea3016363c828d17ba34aca8e1a6663ecf76..5fc9d7c38153e5d8c10da5275f3bb11164b12e54 100644 --- a/python/paddle/vision/datasets/voc2012.py +++ b/python/paddle/vision/datasets/voc2012.py @@ -19,6 +19,7 @@ import tarfile import numpy as np from PIL import Image +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -96,6 +97,8 @@ class VOC2012(Dataset): # read dataset into memory self._load_anno() + self.dtype = paddle.get_default_dtype() + def _load_anno(self): self.name2mem = {} self.data_tar = tarfile.open(self.data_file) @@ -127,7 +130,7 @@ class VOC2012(Dataset): label = np.array(label) if self.transform is not None: data = self.transform(data) - return data, label + return data.astype(self.dtype), label.astype(self.dtype) def __len__(self): return len(self.data) diff --git a/python/setup.py.in b/python/setup.py.in index d85a23a5edd31f77514b468731097759f47533c1..467c5cb86779b80e51794cf800226d64534e8676 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -155,6 +155,7 @@ packages=['paddle', 'paddle.distributed.fleet.utils', 'paddle.framework', 'paddle.jit', + 'paddle.inference', 'paddle.fluid', 'paddle.fluid.inference', 'paddle.fluid.dygraph', diff --git a/setup.py b/setup.py deleted file mode 100644 index af558c2ef0b42b68e47fe98ebd626c9b9034bef9..0000000000000000000000000000000000000000 --- a/setup.py +++ /dev/null @@ -1,577 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import subprocess -import os -import os.path -import errno -import re -import shutil -import sys -import fnmatch -import errno -import platform - -from contextlib import contextmanager -from setuptools import Command -from setuptools import setup, Distribution, Extension -from setuptools.command.install import install as InstallCommandBase - - -class BinaryDistribution(Distribution): - def has_ext_modules(foo): - return True - - -RC = 0 - -ext_name = '.dll' if os.name == 'nt' else ('.dylib' if sys.platform == 'darwin' - else '.so') - - -def git_commit(): - try: - cmd = ['git', 'rev-parse', 'HEAD'] - git_commit = subprocess.Popen( - cmd, stdout=subprocess.PIPE, - cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip() - except: - git_commit = 'Unknown' - git_commit = git_commit.decode() - return str(git_commit) - - -def _get_version_detail(idx): - assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \ - so detail index must less than 3" - - if re.match('@TAG_VERSION_REGEX@', '@PADDLE_VERSION@'): - version_details = '@PADDLE_VERSION@'.split('.') - - if len(version_details) >= 3: - return version_details[idx] - - return 0 - - -def get_major(): - return int(_get_version_detail(0)) - - -def get_minor(): - return int(_get_version_detail(1)) - - -def get_patch(): - return str(_get_version_detail(2)) - - -def is_taged(): - try: - cmd = [ - 'git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null' - ] - git_tag = subprocess.Popen( - cmd, stdout=subprocess.PIPE, - cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip() - git_tag = git_tag.decode() - except: - return False - - if str(git_tag).replace('v', '') == '@PADDLE_VERSION@': - return True - else: - return False - - -def write_version_py(filename='paddle/version.py'): - cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY -# -full_version = '%(major)d.%(minor)d.%(patch)s' -major = '%(major)d' -minor = '%(minor)d' -patch = '%(patch)s' -rc = '%(rc)d' -istaged = %(istaged)s -commit = '%(commit)s' -with_mkl = '%(with_mkl)s' - -def show(): - if istaged: - print('full_version:', full_version) - print('major:', major) - print('minor:', minor) - print('patch:', patch) - print('rc:', rc) - else: - print('commit:', commit) - -def mkl(): - return with_mkl -''' - commit = git_commit() - with open(filename, 'w') as f: - f.write(cnt % { - 'major': get_major(), - 'minor': get_minor(), - 'patch': get_patch(), - 'rc': RC, - 'version': '${PADDLE_VERSION}', - 'commit': commit, - 'istaged': is_taged(), - 'with_mkl': '@WITH_MKL@' - }) - - -write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') - - -def write_distributed_training_mode_py( - filename='paddle/fluid/incubate/fleet/parameter_server/version.py'): - cnt = '''from __future__ import print_function - -# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY - -from paddle.fluid.incubate.fleet.base.mode import Mode - -BUILD_MODE=Mode.%(mode)s - -def is_transpiler(): - return Mode.TRANSPILER == BUILD_MODE - -''' - - dirname = os.path.dirname(filename) - - try: - os.makedirs(dirname) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - with open(filename, 'w') as f: - f.write(cnt % - {'mode': 'PSLIB' if '${WITH_PSLIB}' == 'ON' else 'TRANSPILER'}) - - -write_distributed_training_mode_py( - filename='@PADDLE_BINARY_DIR@/python/paddle/fluid/incubate/fleet/parameter_server/version.py' -) - -packages = [ - 'paddle', - 'paddle.libs', - 'paddle.utils', - 'paddle.dataset', - 'paddle.reader', - 'paddle.distributed', - 'paddle.incubate', - 'paddle.incubate.complex', - 'paddle.incubate.complex.tensor', - 'paddle.distributed.fleet', - 'paddle.distributed.fleet.base', - 'paddle.distributed.fleet.meta_optimizers', - 'paddle.distributed.fleet.runtime', - 'paddle.distributed.fleet.dataset', - 'paddle.distributed.fleet.metrics', - 'paddle.distributed.fleet.proto', - 'paddle.distributed.fleet.utils', - 'paddle.framework', - 'paddle.jit', - 'paddle.fluid', - 'paddle.fluid.inference', - 'paddle.fluid.dygraph', - 'paddle.fluid.dygraph.dygraph_to_static', - 'paddle.fluid.dygraph.amp', - 'paddle.fluid.proto', - 'paddle.fluid.proto.profiler', - 'paddle.fluid.distributed', - 'paddle.fluid.layers', - 'paddle.fluid.dataloader', - 'paddle.fluid.contrib', - 'paddle.fluid.contrib.decoder', - 'paddle.fluid.contrib.quantize', - 'paddle.fluid.contrib.reader', - 'paddle.fluid.contrib.slim', - 'paddle.fluid.contrib.slim.quantization', - 'paddle.fluid.contrib.slim.quantization.imperative', - 'paddle.fluid.contrib.utils', - 'paddle.fluid.contrib.extend_optimizer', - 'paddle.fluid.contrib.mixed_precision', - 'paddle.fluid.contrib.layers', - 'paddle.fluid.transpiler', - 'paddle.fluid.transpiler.details', - 'paddle.fluid.incubate', - 'paddle.fluid.incubate.data_generator', - 'paddle.fluid.incubate.fleet', - 'paddle.fluid.incubate.checkpoint', - 'paddle.fluid.incubate.fleet.base', - 'paddle.fluid.incubate.fleet.parameter_server', - 'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler', - 'paddle.fluid.incubate.fleet.parameter_server.pslib', - 'paddle.fluid.incubate.fleet.parameter_server.ir', - 'paddle.fluid.incubate.fleet.collective', - 'paddle.fluid.incubate.fleet.utils', - 'paddle.hapi', - 'paddle.vision', - 'paddle.vision.models', - 'paddle.vision.transforms', - 'paddle.vision.datasets', - 'paddle.text', - 'paddle.text.datasets', - 'paddle.incubate', - 'paddle.io', - 'paddle.optimizer', - 'paddle.nn', - 'paddle.nn.functional', - 'paddle.nn.layer', - 'paddle.nn.initializer', - 'paddle.nn.utils', - 'paddle.metric', - 'paddle.static', - 'paddle.static.nn', - 'paddle.tensor', -] - -with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: - setup_requires = f.read().splitlines() - -# Note(wangzhongpu): -# When compiling paddle under python36, the dependencies belonging to python2.7 will be imported, resulting in errors when installing paddle -if sys.version_info >= (3, 6) and sys.version_info < (3, 7): - setup_requires_tmp = [] - for setup_requires_i in setup_requires: - if "<\"3.6\"" in setup_requires_i or "<\"3.5\"" in setup_requires_i or "<=\"3.5\"" in setup_requires_i: - continue - setup_requires_tmp += [setup_requires_i] - setup_requires = setup_requires_tmp -if sys.version_info >= (3, 5) and sys.version_info < (3, 6): - setup_requires_tmp = [] - for setup_requires_i in setup_requires: - if "<\"3.5\"" in setup_requires_i: - continue - setup_requires_tmp += [setup_requires_i] - setup_requires = setup_requires_tmp -if sys.version_info >= (3, 7): - setup_requires_tmp = [] - for setup_requires_i in setup_requires: - if "<\"3.6\"" in setup_requires_i or "<=\"3.6\"" in setup_requires_i or "<\"3.5\"" in setup_requires_i or "<=\"3.5\"" in setup_requires_i or "<\"3.7\"" in setup_requires_i: - continue - setup_requires_tmp += [setup_requires_i] - setup_requires = setup_requires_tmp - -if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: - setup_requires += ['opencv-python'] - -# the prefix is sys.prefix which should always be usr -paddle_bins = '' - -if not '${WIN32}': - paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'] -package_data = { - 'paddle.fluid': - ['${FLUID_CORE_NAME}' + ('.so' if os.name != 'nt' else '.pyd')] -} -if '${HAS_NOAVX_CORE}' == 'ON': - package_data['paddle.fluid'] += [ - 'core_noavx' + ('.so' if os.name != 'nt' else '.pyd') - ] - -package_dir = { - '': '${PADDLE_BINARY_DIR}/python', - # The paddle.fluid.proto will be generated while compiling. - # So that package points to other directory. - 'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform', - 'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework', - 'paddle.fluid': '${PADDLE_BINARY_DIR}/python/paddle/fluid', -} - -# put all thirdparty libraries in paddle.libs -libs_path = '${PADDLE_BINARY_DIR}/python/paddle/libs' - -package_data['paddle.libs'] = [] -package_data['paddle.libs'] = [('libwarpctc' - if os.name != 'nt' else 'warpctc') + ext_name] -shutil.copy('${WARPCTC_LIBRARIES}', libs_path) - -if '${WITH_MKL}' == 'ON': - shutil.copy('${MKLML_SHARED_LIB}', libs_path) - shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path) - package_data['paddle.libs'] += [ - ('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, - ('libiomp5' if os.name != 'nt' else 'libiomp5md') + ext_name - ] -else: - if os.name == 'nt': - # copy the openblas.dll - shutil.copy('${OPENBLAS_SHARED_LIB}', libs_path) - package_data['paddle.libs'] += ['openblas' + ext_name] - -if '${WITH_LITE}' == 'ON': - shutil.copy('${LITE_SHARED_LIB}', libs_path) - package_data['paddle.libs'] += ['libpaddle_full_api_shared' + ext_name] - -if '${WITH_PSLIB}' == 'ON': - shutil.copy('${PSLIB_LIB}', libs_path) - if os.path.exists('${PSLIB_VERSION_PY}'): - shutil.copy( - '${PSLIB_VERSION_PY}', - '${PADDLE_BINARY_DIR}/python/paddle/fluid/incubate/fleet/parameter_server/pslib/' - ) - package_data['paddle.libs'] += ['libps' + ext_name] - -if '${WITH_MKLDNN}' == 'ON': - if '${CMAKE_BUILD_TYPE}' == 'Release' and os.name != 'nt': - # only change rpath in Release mode. - # TODO(typhoonzero): use install_name_tool to patch mkl libs once - # we can support mkl on mac. - # - # change rpath of libdnnl.so.1, add $ORIGIN/ to it. - # The reason is that all thirdparty libraries in the same directory, - # thus, libdnnl.so.1 will find libmklml_intel.so and libiomp5.so. - command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" - if os.system(command) != 0: - raise Exception("patch libdnnl.so failed, command: %s" % command) - shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) - if os.name != 'nt': - shutil.copy('${MKLDNN_SHARED_LIB_1}', libs_path) - package_data['paddle.libs'] += ['libmkldnn.so.0', 'libdnnl.so.1'] - else: - package_data['paddle.libs'] += ['mkldnn.dll'] - -if '${WITH_XPU}' == 'ON': - # only change rpath in Release mode, - if '${CMAKE_BUILD_TYPE}' == 'Release': - if os.name != 'nt': - if "@APPLE@" == "1": - command = "install_name_tool -id \"@loader_path/\" ${XPU_API_LIB}" - else: - command = "patchelf --set-rpath '$ORIGIN/' ${XPU_API_LIB}" - if os.system(command) != 0: - raise Exception("patch ${XPU_API_LIB} failed, command: %s" % - command) - shutil.copy('${XPU_API_LIB}', libs_path) - shutil.copy('${XPU_RT_LIB}', libs_path) - shutil.copy('${XPU_SIM_LIB}', libs_path) - package_data['paddle.libs'] += [ - '${XPU_API_LIB_NAME}', '${XPU_RT_LIB_NAME}', '${XPU_SIM_LIB_NAME}' - ] - -# copy libfuild_framework.so to libs -if os.name != 'nt' and sys.platform != 'darwin': - paddle_framework_lib = '${FLUID_FRAMEWORK_SHARED_LIB}' - shutil.copy(paddle_framework_lib, libs_path) - package_data['paddle.libs'] += [ - ('libpaddle_framework' - if os.name != 'nt' else 'paddle_framework') + ext_name - ] - -# remove unused paddle/libs/__init__.py -if os.path.isfile(libs_path + '/__init__.py'): - os.remove(libs_path + '/__init__.py') -package_dir['paddle.libs'] = libs_path - -# change rpath of ${FLUID_CORE_NAME}.ext, add $ORIGIN/../libs/ to it. -# The reason is that libwarpctc.ext, libiomp5.ext etc are in paddle.libs, and -# ${FLUID_CORE_NAME}.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. -# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213 -if '${CMAKE_BUILD_TYPE}' == 'Release': - if os.name != 'nt': - # only change rpath in Release mode, since in Debug mode, ${FLUID_CORE_NAME}.xx is too large to be changed. - if "@APPLE@" == "1": - command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' - else: - command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' - # The dynamic library compiled under aarch64 is greater than 64M, - # and an oversize error will be reported when using patchelf. - if platform.machine() != 'aarch64': - if os.system(command) != 0: - raise Exception( - "patch ${FLUID_CORE_NAME}.%s failed, command: %s" % - (ext_name, command)) - -ext_modules = [Extension('_foo', ['stub.cc'])] -if os.name == 'nt': - # fix the path separator under windows - fix_package_dir = {} - for k, v in package_dir.items(): - fix_package_dir[k] = v.replace('/', '\\') - package_dir = fix_package_dir - ext_modules = [] -elif sys.platform == 'darwin': - ext_modules = [] - - -def find_files(pattern, root): - for dirpath, _, files in os.walk(root): - for filename in fnmatch.filter(files, pattern): - yield os.path.join(dirpath, filename) - - -headers = ( - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/imperative')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/platform')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/string')) + - list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/platform')) + - list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/framework')) + - list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) - + # errorMessage.pb for errormessage - ['${EIGEN_INCLUDE_DIR}/Eigen/Core'] + # eigen - list(find_files('*', '${EIGEN_INCLUDE_DIR}/Eigen/src')) + # eigen - list(find_files('*', '${EIGEN_INCLUDE_DIR}/unsupported/Eigen')) + # eigen - list(find_files('*', '${GFLAGS_INSTALL_DIR}/include')) + # gflags - list(find_files('*', '${GLOG_INSTALL_DIR}/include')) + # glog - list(find_files('*', '${BOOST_INCLUDE_DIR}/boost')) + # boost - list(find_files('*', '${XXHASH_INSTALL_DIR}/include')) + # xxhash - list(find_files('*', '${PROTOBUF_INCLUDE_DIR}')) + # protobuf - list(find_files('*', '${DLPACK_INCLUDE_DIR}')) + # dlpack - list(find_files('*.h', '${THREADPOOL_INCLUDE_DIR}'))) # threadpool - -if '${WITH_MKLDNN}' == 'ON': - headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn - -if '${WITH_GPU}' == 'ON': - headers += list(find_files( - '*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage - - -class InstallCommand(InstallCommandBase): - def finalize_options(self): - ret = InstallCommandBase.finalize_options(self) - self.install_headers = os.path.join(self.install_purelib, 'paddle', - 'include') - self.install_lib = self.install_platlib - return ret - - -class InstallHeaders(Command): - """Override how headers are copied. - """ - description = 'install C/C++ header files' - - user_options = [ - ('install-dir=', 'd', 'directory to install header files to'), - ('force', 'f', 'force installation (overwrite existing files)'), - ] - - boolean_options = ['force'] - - def initialize_options(self): - self.install_dir = None - self.force = 0 - self.outfiles = [] - - def finalize_options(self): - self.set_undefined_options( - 'install', ('install_headers', 'install_dir'), ('force', 'force')) - - def mkdir_and_copy_file(self, header): - if 'pb.h' in header: - install_dir = re.sub('${PADDLE_BINARY_DIR}/', '', header) - elif 'third_party' not in header: - # framework - install_dir = re.sub('@PADDLE_SOURCE_DIR@/', '', header) - else: - # third_party - install_dir = re.sub('${THIRD_PARTY_PATH}', 'third_party', header) - patterns = [ - 'eigen3/src/extern_eigen3', 'boost/src/extern_boost', - 'dlpack/src/extern_dlpack/include', 'install/protobuf/include', - 'install/gflags/include', 'install/glog/include', - 'install/xxhash/include', 'install/mkldnn/include', - 'threadpool/src/extern_threadpool' - ] - for pattern in patterns: - install_dir = re.sub(pattern, '', install_dir) - install_dir = os.path.join(self.install_dir, - os.path.dirname(install_dir)) - if not os.path.exists(install_dir): - self.mkpath(install_dir) - return self.copy_file(header, install_dir) - - def run(self): - # only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows - if os.name == 'nt' or sys.platform == 'darwin': - if '${WITH_GPU}' == 'ON': - self.mkdir_and_copy_file( - '${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb') - return - hdrs = self.distribution.headers - if not hdrs: - return - self.mkpath(self.install_dir) - for header in hdrs: - (out, _) = self.mkdir_and_copy_file(header) - self.outfiles.append(out) - - def get_inputs(self): - return self.distribution.headers or [] - - def get_outputs(self): - return self.outfiles - - -# we redirect setuptools log for non-windows -if sys.platform != 'win32': - - @contextmanager - def redirect_stdout(): - f_log = open('${SETUP_LOG_FILE}', 'w') - origin_stdout = sys.stdout - sys.stdout = f_log - yield - f_log = sys.stdout - sys.stdout = origin_stdout - f_log.close() -else: - - @contextmanager - def redirect_stdout(): - yield - - -if '${WITH_GPU}' == 'ON': - os.environ['PACKAGE_NAME'] = "paddlepaddle-gpu" -else: - os.environ['PACKAGE_NAME'] = "paddlepaddle" - -with redirect_stdout(): - setup( - name='${PACKAGE_NAME}', - version='${PADDLE_VERSION}', - description='Parallel Distributed Deep Learning', - install_requires=setup_requires, - packages=packages, - ext_modules=ext_modules, - package_data=package_data, - package_dir=package_dir, - scripts=paddle_bins, - distclass=BinaryDistribution, - headers=headers, - cmdclass={ - 'install_headers': InstallHeaders, - 'install': InstallCommand, - }, - entry_points={ - 'console_scripts': - ['fleetrun = paddle.distributed.fleet.launch:launch'] - }) - -# As there are a lot of files in purelib which causes many logs, -# we don't print them on the screen, and you can open `setup.py.log` -# for the full logs. -if os.path.exists('${SETUP_LOG_FILE}'): - os.system('grep -v "purelib" ${SETUP_LOG_FILE}') diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh index b787ae625017d783a7221006ddd6867c21e238e8..943b8c01e8cc0c0e0a41e9b01951939f454c3181 100644 --- a/tools/check_api_approvals.sh +++ b/tools/check_api_approvals.sh @@ -39,9 +39,9 @@ fi api_spec_diff=`python ${PADDLE_ROOT}/tools/check_api_source_without_core_ops.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.source.md5 ${PADDLE_ROOT}/paddle/fluid/API_PR.source.md5` if [ "$api_spec_diff" != "" ]; then + echo_line="${echo_line}Related APIs: ${api_spec_diff}\n" echo_line="You must have one RD (zhiqiu (Recommend) or phlrain) approval for the api change for the opreator-related api without 'core.ops'.\n" echo_line="${echo_line}For more details, please click [https://github.com/PaddlePaddle/Paddle/wiki/paddle_api_development_manual.md]\n" - echo_line="${echo_line}Related APIs: ${api_spec_diff}\n" check_approval 1 6888866 43953930 fi diff --git a/tools/enforce/count_enforce_by_file.sh b/tools/enforce/count_enforce_by_file.sh index 1858bd0fd17aac7273318ddbb37fc0d9c512f48d..c1e2903c092ce4124c55566679e081dbe3a03445 100644 --- a/tools/enforce/count_enforce_by_file.sh +++ b/tools/enforce/count_enforce_by_file.sh @@ -57,7 +57,14 @@ FILE_WHITE_LIST="\ random_crop_op.h \ elementwise_op_function.cu.h \ fused_elemwise_activation_op.cc \ - auc_op.cu" + auc_op.cu \ + unsqueeze_op.h \ + unsqueeze_op.cc \ + enforce.h \ + errors_test.cc \ + cross_entropy.cu \ + cross_entropy.h \ + unpooling.cu" function count_file_recursively(){ dir_name=$1 diff --git a/tools/test_runner.py b/tools/test_runner.py index 9b9f165e7368364bbb0a78d6dcbbe4be0d6bf98b..bad98f9b5c3e80c80277528cf03519bc9ffac375 100644 --- a/tools/test_runner.py +++ b/tools/test_runner.py @@ -17,12 +17,14 @@ from __future__ import print_function import unittest import os import sys +import paddle import paddle.fluid as fluid import importlib from six.moves import cStringIO def main(): + paddle.enable_static() sys.path.append(os.getcwd()) some_test_failed = False for module_name in sys.argv[1:]: @@ -44,6 +46,7 @@ def main(): 'failed\n', buffer.getvalue(), file=sys.stderr) + paddle.disable_static() if some_test_failed: exit(1) diff --git a/tools/wlist.json b/tools/wlist.json index 20f6a9cbaedb391995b3757612ec24f2061a8a81..5591f90da4ba807871663e56fe4e3b11bf2fbd8f 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -105,8 +105,6 @@ "convert_dist_to_sparse_program", "load_persistables_for_increment", "load_persistables_for_inference", - "cache", - "buffered", "xmap_readers", "Metric.reset", "Metric.update",