diff --git a/CMakeLists.txt b/CMakeLists.txt index fb796103350ac4403d4151cf08eb4315bcde68fd..b1554fba5e1fa48b5cbdfe2e5b9f317a4f7fefb3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,8 +63,29 @@ if(WIN32) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif() + endforeach(flag_var) endif() - + + # windows build turn off warnings. + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") + endforeach(flag_var) + foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) + set(${flag_var} "${${flag_var}} /w") + endforeach(flag_var) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") message(STATUS "Using parallel compiling (/MP)") diff --git a/cmake/external/cryptopp.cmake b/cmake/external/cryptopp.cmake index af5dd0e2c9b2d19929f58363d08e7ff40d43b013..351ef1c7c7aebb698a5d41689352a913d0b950e8 100644 --- a/cmake/external/cryptopp.cmake +++ b/cmake/external/cryptopp.cmake @@ -22,23 +22,8 @@ SET(CRYPTOPP_TAG CRYPTOPP_8_2_0) IF(WIN32) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/cryptopp-static.lib" CACHE FILEPATH "cryptopp library." FORCE) - SET(CRYPTOPP_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") - set(CompilerFlags - CMAKE_CXX_FLAGS - CMAKE_CXX_FLAGS_DEBUG - CMAKE_CXX_FLAGS_RELEASE - CMAKE_C_FLAGS - CMAKE_C_FLAGS_DEBUG - CMAKE_C_FLAGS_RELEASE - ) - foreach(CompilerFlag ${CompilerFlags}) - string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}") - endforeach() ELSE(WIN32) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE) - SET(CRYPTOPP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) ENDIF(WIN32) set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} @@ -48,7 +33,7 @@ set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=${CRYPTOPP_INSTALL_DIR}/lib -DCMAKE_INSTALL_PREFIX=${CRYPTOPP_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CRYPTOPP_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 415e07c75425345f5f1ad29a8544e02a5bfb12e4..ed0bf8396b3faa22350811cf1711f5d1e5b89998 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -90,20 +90,6 @@ macro(safe_set_nvflag flag_name) endif() endmacro() -macro(safe_set_static_flag) # set c_flags and cxx_flags to static or shared - if (BUILD_SHARED_LIBS) - return() # if build shared libs, the flags keep same with '/MD' - endif(BUILD_SHARED_LIBS) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) -endmacro() CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS) @@ -229,20 +215,3 @@ endforeach() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") - -if(WIN32) - # windows build turn off warnings. - if(MSVC_STATIC_CRT) - safe_set_static_flag() - endif() - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") - endforeach(flag_var) - foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) - set(${flag_var} "${${flag_var}} /w") - endforeach(flag_var) -endif() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index e3c2409f103d36befed29176b354f77257fea9ec..f19f0eb43d34bd0f3748d7beb1fcf403fa1c9037 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -24,7 +24,7 @@ set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_d # so the generation of static lib is temporarily turned off. if(WIN32) #todo: remove the option - option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) + option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF) if(NOT PYTHON_EXECUTABLE) FIND_PACKAGE(PythonInterp REQUIRED) endif() @@ -165,25 +165,22 @@ copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") if(WIN32) if(WITH_STATIC_LIB) - set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_fluid.lib) + set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_fluid.lib + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.*) else() set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.dll - ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.lib) + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.lib) endif() + copy(inference_lib_dist + SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) else(WIN32) set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*) -endif(WIN32) - -if(WIN32 AND NOT WITH_STATIC_LIB) - copy(inference_lib_dist - SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib - ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) -else() - copy(inference_lib_dist + copy(inference_lib_dist SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) -endif() +endif(WIN32) copy(inference_lib_dist SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h @@ -211,12 +208,12 @@ add_custom_target(fluid_lib_dist ALL DEPENDS ${fluid_lib_deps}) set(dst_dir "${PADDLE_INSTALL_DIR}/paddle/fluid") set(module "inference") -if(WIN32 AND NOT WITH_STATIC_LIB) +if(WIN32) copy(fluid_lib_dist SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_fluid_lib} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ) -else() + else() copy(fluid_lib_dist SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_fluid_lib} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} diff --git a/cmake/operators.cmake b/cmake/operators.cmake index aea972ab3db2af862f5230ea6c1eabeed8b611c5..21080fbe8fd2e14cf7fd805e01948f2f28535c22 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -127,7 +127,8 @@ function(op_library TARGET) "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op" "fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op" -"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op") +"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op" +"fused_bn_add_activation_op") if ("${TARGET}" STREQUAL "${manual_pybind_op}") set(pybind_flag 1) endif() diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 9dc96fdfe8622e3e78673664637ab50970fe93c6..cf6fcb7b64365b382c648dd83639e0c44670014d 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -44,10 +44,11 @@ add_subdirectory(api) set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg}) -if(WIN32) +# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU +if(WIN32 AND WITH_GPU) cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_API}) else() - create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) + create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) endif() if(NOT APPLE AND NOT WIN32) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ac914700643af2e7e8eca5dcf0bdf8de88e320d6..42e62011f84c18b875a3fa48b95a05f152fb5791 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1048,6 +1048,7 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { + LOG(WARNING) << "Deprecated. Please use CreatePredictor instead."; return CreatePaddlePredictor( config); } diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index ca0a5148f0622a8c848cb18afb94f600a547bbfe..c78cdf24dec561f5fd5643cb50ee243a58b3ab6a 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -373,6 +373,7 @@ std::unique_ptr CreatePaddlePredictor< template <> std::unique_ptr CreatePaddlePredictor( const NativeConfig &config) { + LOG(WARNING) << "Deprecated. Please use CreatePredictor instead."; return CreatePaddlePredictor(config); } diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 08a1a5428193c2d506f511112e4a26d73c382ff1..6a3760e1f749b2b4875df00b01def57c979b3c93 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -51,8 +51,8 @@ if (WIN32) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + safe_set_static_flag() if (WITH_STATIC_LIB) - safe_set_static_flag() add_definitions(-DSTATIC_LIB) endif() endif() @@ -136,7 +136,7 @@ else() set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} glog gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) - set(DEPS ${DEPS} libcmt shlwapi.lib) + set(DEPS ${DEPS} shlwapi.lib) endif(NOT WIN32) if(WITH_GPU) diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index 6b7fb0f619a67cc01dac2b09525bb2bfa05207ba..a3e7bec398af7e193a75395ad40175336f5f7503 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -6,7 +6,7 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode DATA_DIR=$4 # dataset TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, default to /usr/local/TensorRT/include TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib - +MSVC_STATIC_CRT=$7 inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir cd `dirname $0` @@ -66,43 +66,54 @@ mkdir -p build cd build rm -rf * -if [ $(echo `uname` | grep "Win") != "" ]; then - # -----simple_on_word2vec on windows----- - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ - -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=OFF - msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln - Release/simple_on_word2vec.exe \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=False - if [ $? -ne 0 ]; then - echo "simple_on_word2vec demo runs fail." - exit 1 - fi - - # -----vis_demo on windows----- - rm -rf * - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ - -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=OFF - msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln - for vis_demo_name in $vis_demo_list; do - Release/vis_demo.exe \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=False - if [ $? -ne 0 ]; then - echo "vis demo $vis_demo_name runs fail." - exit 1 +for WITH_STATIC_LIB in ON OFF; do + if [ $(echo `uname` | grep "Win") != "" ]; then + # TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready. + if [ $TEST_GPU_CPU == ON] && [ $WITH_STATIC_LIB ==ON ]; then + return 0 fi - done -else - for WITH_STATIC_LIB in ON OFF; do + + # -----simple_on_word2vec on windows----- + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=simple_on_word2vec \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ + -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln + for use_gpu in $use_gpu_list; do + Release/simple_on_word2vec.exe \ + --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ + --use_gpu=$use_gpu + if [ $? -ne 0 ]; then + echo "simple_on_word2vec demo runs fail." + exit 1 + fi + done + + # -----vis_demo on windows----- + rm -rf * + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=vis_demo \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ + -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln + for use_gpu in $use_gpu_list; do + for vis_demo_name in $vis_demo_list; do + Release/vis_demo.exe \ + --modeldir=$DATA_DIR/$vis_demo_name/model \ + --data=$DATA_DIR/$vis_demo_name/data.txt \ + --refer=$DATA_DIR/$vis_demo_name/result.txt \ + --use_gpu=$use_gpu + if [ $? -ne 0 ]; then + echo "vis demo $vis_demo_name runs fail." + exit 1 + fi + done + done + else # -----simple_on_word2vec on linux/mac----- rm -rf * cmake .. -DPADDLE_LIB=${inference_install_dir} \ @@ -123,7 +134,6 @@ else fi done fi - # ---------vis_demo on linux/mac--------- rm -rf * cmake .. -DPADDLE_LIB=${inference_install_dir} \ @@ -145,7 +155,6 @@ else fi done done - # --------tensorrt mobilenet on linux/mac------ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then rm -rf * @@ -167,6 +176,6 @@ else exit 1 fi fi - done -fi + fi +done set +x diff --git a/paddle/fluid/inference/api/paddle_infer_declare.h b/paddle/fluid/inference/api/paddle_infer_declare.h index 39c9653f16cefb71a9f2a0ddcc08723d189d411c..e8525f440fe7f2d54d045eedb79aed228513e550 100644 --- a/paddle/fluid/inference/api/paddle_infer_declare.h +++ b/paddle/fluid/inference/api/paddle_infer_declare.h @@ -17,11 +17,7 @@ #if defined(_WIN32) #ifndef PD_INFER_DECL #ifdef PADDLE_DLL_INFERENCE -#ifndef PADDLE_ON_INFERENCE -#define PD_INFER_DECL -#else #define PD_INFER_DECL __declspec(dllexport) -#endif // PADDLE_ON_INFERENCE #else #define PD_INFER_DECL __declspec(dllimport) #endif // PADDLE_DLL_INFERENCE diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index 31915496893e6242dc7cd10ffd48af278d124245..c1bf4c974fac8c80c3e8e31fbd247332a325e2aa 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -131,7 +131,9 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, PADDLE_ENFORCE_EQ( input_names.size(), in_size, paddle::platform::errors::InvalidArgument( - "The number of input and the number of model's input must match.")); + "The number of input and the number of model's input must match. The " + "number of input is %d, the number of model's input is %d.", + input_names.size(), in_size)); for (int i = 0; i < in_size; ++i) { auto input_t = predictor->GetInputTensor(inputs[i].name); std::vector tensor_shape; diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index c497ab384b5fac74b5241d61517485fd8f2b40c4..84e011c6505a8fe974effbecf54101e0e51d29fa 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -47,7 +47,9 @@ void Init(const std::vector argv) { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -133,9 +135,10 @@ std::unique_ptr Load(framework::Executor* executor, std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); // model_from_memory is false in separate parameters. LoadPersistables(executor, scope, *main_program, dirname, "", @@ -151,9 +154,10 @@ std::unique_ptr Load( std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); LoadPersistables(executor, scope, *main_program, "", param_filename, false /* model_from_memory */); @@ -165,9 +169,10 @@ std::unique_ptr LoadFromMemory( const std::string& prog_buffer, const std::string& param_buffer) { std::unique_ptr main_program( new framework::ProgramDesc(prog_buffer)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); LoadPersistables(executor, scope, *main_program, "", param_buffer, true /* model_filename */); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc index 76b0832c546b92068364ba6b2eda65a04742e5f0..0bf8a1691e2192b278fcd209162135027ed24e71 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc @@ -27,8 +27,8 @@ PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name, PADDLE_ENFORCE_EQ( Has(plugin_type), true, - platform::errors::NotFound( - "trt plugin type %s does not exists, check it.", plugin_type)); + platform::errors::NotFound("TensorRT plugin type `%s` does not exists.", + plugin_type)); auto plugin = plugin_registry_[plugin_type](serial_data, serial_length); owned_plugins_.emplace_back(plugin); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 6fcb70c6d3299f830e1e95e328b2645aedf9cc31..16751c764bd03af9bbb7cbd77dd9287c17150dd5 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -103,12 +103,11 @@ struct Serializer, DeserializeValue(buffer, buffer_size, &size); value->resize(size); size_t nbyte = value->size() * sizeof(T); - PADDLE_ENFORCE_GE( - *buffer_size, nbyte, - platform::errors::InvalidArgument("Expect buffer size >= value size in " - "trt plugin deserialization, but got " - "buffer size = %d, value size = %d.", - *buffer_size, nbyte)); + PADDLE_ENFORCE_GE(*buffer_size, nbyte, + platform::errors::InvalidArgument( + "Insufficient data in buffer, expect contains %d " + "byte, but actually only contains %d byte.", + *buffer_size, nbyte)); std::memcpy(value->data(), *buffer, nbyte); reinterpret_cast(*buffer) += nbyte; *buffer_size -= nbyte; diff --git a/paddle/fluid/inference/utils/singleton.h b/paddle/fluid/inference/utils/singleton.h index 990bef359499834c3a7cb025c3fb1d94ceea958e..6828924c300fdfec6640e7b19a2c06b0826aa455 100644 --- a/paddle/fluid/inference/utils/singleton.h +++ b/paddle/fluid/inference/utils/singleton.h @@ -46,7 +46,9 @@ struct Registry { template void Register(const std::string& name) { - PADDLE_ENFORCE_EQ(items_.count(name), 0); + PADDLE_ENFORCE_EQ(items_.count(name), 0, + platform::errors::AlreadyExists( + "Item `%s` has beed registered.", name)); items_[name] = new ItemChild; } diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index 629fedba6e3db474869ebddc02470c2ff007e658..e5fcd270eb8b8fa58175e11e955161ebfbb2846c 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -69,12 +69,18 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("alpha", "The scale of Original Embedding.") .SetDefault(1.0f) .AddCustomChecker([](const float& alpha) { - PADDLE_ENFORCE(alpha >= 0.0f, "'alpha' must be above 0.0."); + PADDLE_ENFORCE_GE( + alpha, 0.0f, + platform::errors::InvalidArgument( + "Attribute 'alpha' must be greater than or equal to 0.0.")); }); AddAttr("beta", "The scale of Position Embedding.") .SetDefault(1.0f) .AddCustomChecker([](const float& beta) { - PADDLE_ENFORCE(beta >= 0.0f, "'beta' must be between 0.0."); + PADDLE_ENFORCE_GE( + beta, 0.0f, + platform::errors::InvalidArgument( + "Attribute 'beta' must be greater than or equal to 0.0.")); }); AddComment(R"DOC( Add Position Encoding Operator. diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index b462c43d23a534c3520a2a852252fe0333222d77..1418d96b67b75ea3a2d4b3d95d3e4bdfb17618ee 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -76,7 +76,10 @@ class AssignValueKernel : public framework::OpKernel { value_name = "int64_values"; break; default: - PADDLE_THROW("Unsupported dtype for assign_value_op: %d", dtype); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type(code %d) for AssignValue operator, only " + "supports bool, int32, float32 and int64.", + dtype)); break; } CopyVecotorToTensor(value_name, out, ctx); diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 5b7bcde21a99f23b653cc8b822aa3e22539e9d82..d67d90c348e6f1db9fff604b3eff7b6a79141d07 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -33,29 +33,37 @@ class CoalesceTensorOpKernel : public framework::OpKernel { auto out_vars = context.MultiOutputVar("Output"); PADDLE_ENFORCE_GT(in_var_names.size(), static_cast(0), - "The CoalesceTensorOp has no input."); - PADDLE_ENFORCE_EQ( - in_var_names.size(), out_var_names.size(), - "The number of CoalesceTensorOp's input and output is not match."); + platform::errors::InvalidArgument( + "The CoalesceTensor operator has no input.")); + PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size(), + platform::errors::InvalidArgument( + "The number of CoalesceTensor operator's input and " + "output is not match, " + "input number is %u, output number is %u.", + in_var_names.size(), out_var_names.size())); // Input & Output check: only support LoDTensor for (size_t i = 0; i < in_var_names.size(); ++i) { PADDLE_ENFORCE_NOT_NULL( in_vars[i], - "The input variable %s of CoalesceTensorOp does not exist.", - in_var_names[i]); + platform::errors::NotFound("The input variable %s of CoalesceTensor " + "operator does not exist.", + in_var_names[i])); PADDLE_ENFORCE_NOT_NULL( out_vars[i], - "The output variable %s of CoalesceTensorOp does not exist.", - out_var_names[i]); - PADDLE_ENFORCE_EQ( - in_vars[i]->IsType(), true, - "The input variable %s of CoalesceTensorOp is not LoDTensor.", - in_var_names[i]); - PADDLE_ENFORCE_EQ( - out_vars[i]->IsType(), true, - "The output variable %s of CoalesceTensorOp is not LoDTensor.", - in_var_names[i]); + platform::errors::NotFound("The output variable %s of CoalesceTensor " + "operator does not exist.", + out_var_names[i])); + PADDLE_ENFORCE_EQ(in_vars[i]->IsType(), true, + platform::errors::InvalidArgument( + "The input variable %s of CoalesceTensor operator " + "is not LoDTensor.", + in_var_names[i])); + PADDLE_ENFORCE_EQ(out_vars[i]->IsType(), true, + platform::errors::InvalidArgument( + "The output variable %s of CoalesceTensor operator " + "is not LoDTensor.", + in_var_names[i])); } auto in_tensors = context.MultiInput("Input"); @@ -64,7 +72,10 @@ class CoalesceTensorOpKernel : public framework::OpKernel { for (size_t i = 0; i < in_var_names.size(); ++i) { PADDLE_ENFORCE_EQ( in_var_names[i], out_var_names[i], - "The input and output variable of CoalesceTensorOp is different."); + platform::errors::InvalidArgument( + "The input and output variable of CoalesceTensor operator is " + "different, %dth input is %s, %dth output is %s.", + i, in_var_names[i], i, out_var_names[i])); } } else { // Init the output as input @@ -134,16 +145,25 @@ class CoalesceTensorOpKernel : public framework::OpKernel { const std::vector &lod_tensors, const std::vector var_names, size_t *numel, const size_t &size_of_dtype, const platform::Place &place) const { - PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size()); + PADDLE_ENFORCE_EQ( + lod_tensors.size(), var_names.size(), + platform::errors::InvalidArgument( + "The number of input tensor and variable does not match, the " + "number of input tensor is %u, the number of input variable is %u.", + lod_tensors.size(), var_names.size())); *numel = 0; std::stringstream ss; ss << "alloc_space_for_vars: "; for (size_t i = 0; i < var_names.size(); ++i) { PADDLE_ENFORCE_EQ(lod_tensors[i]->IsInitialized(), true, - "%s is not initialized.", var_names[i]); + platform::errors::InvalidArgument( + "Tensor `%s` is not initialized.", var_names[i])); auto size = lod_tensors[i]->numel(); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT( + size, 0, + platform::errors::InvalidArgument( + "The number of tensor `%s`'s elements is 0.", var_names[i])); ss << "input(" << var_names[i] << ") dim:(" << lod_tensors[i]->dims() << ") " << " addres:" << lod_tensors[i]->data() << ", "; diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 4f337c03599a548ac3d95ddd06c726be30d7c13f..7937e432d22faa3ffd93e46a39b7b1cc5500dbf8 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/concat_op.h" + #include #include #include @@ -78,7 +79,8 @@ class ConcatOp : public framework::OperatorWithKernel { } } if (flag == 0) { - PADDLE_THROW("All Inputs of Concat OP are Empty!"); + PADDLE_THROW(platform::errors::InvalidArgument( + "All Inputs of Concat OP are Empty!")); } #ifdef PADDLE_WITH_MKLDNN if (platform::CanMKLDNNBeUsed(ctx)) { diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index 25b45f281a799ade12ec9cbfb8fb262dbc572196..fac8e24251033c301c911f35dcfd0ddb82b713ce 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -162,7 +162,20 @@ struct SearchAlgorithm { workspace_size = GetWorkspaceSize(args, algo); if (workspace_size > workspace_size_limit) { +#if CUDNN_VERSION >= 8000 workspace_size_limit = workspace_size; +#else + VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " + "the workspace size request(" + << workspace_size << ") exceeds the limit(" + << workspace_size_limit << ")"; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionForwardAlgorithm( + args.handle, args.idesc.desc(), args.wdesc.desc(), + args.cdesc.desc(), args.odesc.desc(), + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); +#endif } #else PADDLE_ENFORCE_CUDA_SUCCESS( @@ -291,8 +304,23 @@ struct SearchAlgorithm { #endif workspace_size = GetWorkspaceSize(args, algo); if (workspace_size > workspace_size_limit) { - workspace_size_limit = workspace_size; has_got_workspace_size = false; +#if CUDNN_VERSION >= 8000 + // There is no cudnnGetConvolutionBackwardDataAlgorithm in CUDNN 8 + // version. + workspace_size_limit = workspace_size; +#else + VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " + "the workspace size request(" + << workspace_size << ") exceeds the limit(" + << workspace_size_limit << ")"; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm( + args.handle, args.wdesc.desc(), args.odesc.desc(), + args.cdesc.desc(), args.idesc.desc(), + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); +#endif } #else PADDLE_ENFORCE_CUDA_SUCCESS( diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cc b/paddle/fluid/operators/dequantize_abs_max_op.cc index 48743f2e48c8a7686497adff52f23f31346aeda7..0d4d68d9f622fef9df4819d6092411a4d7db65f7 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cc +++ b/paddle/fluid/operators/dequantize_abs_max_op.cc @@ -45,10 +45,8 @@ class DequantizeMaxAbsOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - "Input(X) of DequantizeMaxAbsOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - "Output(Out) of DequantizeMaxAbsOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "DequantizeMaxAbs"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "DequantizeMaxAbs"); ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/detection/gpc.cc b/paddle/fluid/operators/detection/gpc.cc index b46d231d0ff7774c64745b3b77953cf2ed8d82f7..6b1b0cd8b3578a344978afae642b66759589ffde 100644 --- a/paddle/fluid/operators/detection/gpc.cc +++ b/paddle/fluid/operators/detection/gpc.cc @@ -532,7 +532,8 @@ static int count_contours(polygon_node *polygon) { } static void add_left(polygon_node *p, double x, double y) { - PADDLE_ENFORCE_NOT_NULL(p); + PADDLE_ENFORCE_NOT_NULL(p, paddle::platform::errors::InvalidArgument( + "Input polygon node is nullptr.")); vertex_node *nv = NULL; /* Create a new vertex node and set its fields */ @@ -588,7 +589,8 @@ static void add_right(polygon_node *p, double x, double y) { } static void merge_right(polygon_node *p, polygon_node *q, polygon_node *list) { - PADDLE_ENFORCE_NOT_NULL(p); + PADDLE_ENFORCE_NOT_NULL(p, paddle::platform::errors::InvalidArgument( + "Input polygon node is nullptr.")); polygon_node *target = NULL; /* Label contour as external */ @@ -664,7 +666,8 @@ void add_vertex(vertex_node **t, double x, double y) { } void gpc_vertex_create(edge_node *e, int p, int s, double x, double y) { - PADDLE_ENFORCE_NOT_NULL(e); + PADDLE_ENFORCE_NOT_NULL(e, paddle::platform::errors::InvalidArgument( + "Input edge node is nullptr.")); add_vertex(&(e->outp[p]->v[s]), x, y); e->outp[p]->active++; } @@ -693,7 +696,8 @@ static bbox *create_contour_bboxes(gpc_polygon *p) { gpc_malloc(box, p->num_contours * sizeof(bbox), const_cast("Bounding box creation")); - PADDLE_ENFORCE_NOT_NULL(box); + PADDLE_ENFORCE_NOT_NULL(box, paddle::platform::errors::ResourceExhausted( + "Failed to malloc box memory.")); /* Construct contour bounding boxes */ for (c = 0; c < p->num_contours; c++) { @@ -857,7 +861,9 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) { /* Create an extended hole array */ gpc_malloc(extended_hole, (p->num_contours + 1) * sizeof(int), const_cast("contour hole addition")); - PADDLE_ENFORCE_NOT_NULL(extended_hole); + PADDLE_ENFORCE_NOT_NULL(extended_hole, + paddle::platform::errors::ResourceExhausted( + "Failed to malloc extended hole memory.")); /* Create an extended contour array */ gpc_malloc(extended_contour, @@ -975,7 +981,9 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); - PADDLE_ENFORCE_NOT_NULL(sbt); + PADDLE_ENFORCE_NOT_NULL(sbt, paddle::platform::errors::ResourceExhausted( + "Failed to malloc scanbeam table memory.")); + build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1017,7 +1025,9 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, e0 = aet; e1 = aet; /* Set up bundle fields of first edge */ - PADDLE_ENFORCE_NOT_NULL(aet); + PADDLE_ENFORCE_NOT_NULL(aet, paddle::platform::errors::InvalidArgument( + "Edge node AET is nullptr.")); + aet->bundle[ABOVE][aet->type] = (aet->top.y != yb); aet->bundle[ABOVE][!aet->type] = 0; aet->bstate[ABOVE] = UNBUNDLED; @@ -1612,7 +1622,8 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); - PADDLE_ENFORCE_NOT_NULL(sbt); + PADDLE_ENFORCE_NOT_NULL(sbt, paddle::platform::errors::ResourceExhausted( + "Failed to malloc scanbeam table memory.")); build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1650,7 +1661,8 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, e1 = aet; /* Set up bundle fields of first edge */ - PADDLE_ENFORCE_NOT_NULL(aet); + PADDLE_ENFORCE_NOT_NULL(aet, paddle::platform::errors::InvalidArgument( + "Edge node AET is nullptr.")); aet->bundle[ABOVE][aet->type] = (aet->top.y != yb); aet->bundle[ABOVE][!aet->type] = 0; aet->bstate[ABOVE] = UNBUNDLED; diff --git a/paddle/fluid/operators/diag_v2_op.cu b/paddle/fluid/operators/diag_v2_op.cu index 4386cc6b8183c03b4d4a19aba7d1126eac2ab495..12ea31945f8d032e1f395c2fb92d9ef31d10c7e8 100644 --- a/paddle/fluid/operators/diag_v2_op.cu +++ b/paddle/fluid/operators/diag_v2_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/diag_v2_op.h" @@ -58,6 +59,17 @@ class DiagV2CUDAKernel : public framework::OpKernel { auto out_dims = out->dims(); auto& dev_ctx = context.template device_context(); + auto GetBlockGridSize = [&dev_ctx](int64_t size) { + const int64_t block_size = + std::min(size, static_cast(dev_ctx.GetMaxThreadsPerBlock())); + int64_t max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + const int64_t max_blocks = std::max(((max_threads - 1) / block_size + 1), + static_cast(1)); + const int64_t grid_size = + std::min(max_blocks, (size + block_size - 1) / block_size); + return std::tuple{block_size, grid_size}; + }; + if (x_dims.size() == 1) { float padding_value = context.Attr("padding_value"); math::SetConstant set_padding_value; @@ -67,26 +79,23 @@ class DiagV2CUDAKernel : public framework::OpKernel { auto size = (offset > 0) ? x_length + offset : x_length - offset; const int& x_stride = ComputeStride(0, x_dims); if (size > 0) { - const int block_num = std::min(static_cast(size), - dev_ctx.GetMaxPhysicalThreadCount()); - int size_ = static_cast(size); - int block_num_ = static_cast(block_num); - const int grid_num = - std::min(1024, (size_ + block_num_ - 1) / block_num_); const auto& out_stride_0 = ComputeStride(0, out_dims); const auto& out_stride_1 = ComputeStride(1, out_dims); auto start = (offset >= 0 ? offset * out_stride_1 : -offset * out_stride_0); - PasteDiagonalKernel<<>>( - out_data, x_data, start, x_length, out_stride_0 + out_stride_1, - x_stride); + std::tuple block_grid_size = GetBlockGridSize(size); + + PasteDiagonalKernel< + T><<(block_grid_size), std::get<0>(block_grid_size), 0, + dev_ctx.stream()>>>(out_data, x_data, start, x_length, + out_stride_0 + out_stride_1, x_stride); } } else { const int& x_stride_0 = ComputeStride(0, x_dims); const int& x_stride_1 = ComputeStride(1, x_dims); - int size; + int64_t size; if (offset > 0) { size = std::min(x_dims[0], x_dims[1] - offset); } else { @@ -94,18 +103,15 @@ class DiagV2CUDAKernel : public framework::OpKernel { } if (size > 0) { - const int block_num = std::min(static_cast(size), - dev_ctx.GetMaxPhysicalThreadCount()); - int size_ = static_cast(size); - int block_num_ = static_cast(block_num); - const int grid_num = - std::min(1024, (size_ + block_num_ - 1) / block_num_); auto start = (offset >= 0 ? offset * x_stride_1 : -offset * x_stride_0); const auto& out_stride_0 = ComputeStride(0, out_dims); - ExtractDiagonalKernel<<>>( - out_data, x_data, start, size, x_stride_0 + x_stride_1, - out_stride_0); + std::tuple block_grid_size = GetBlockGridSize(size); + + ExtractDiagonalKernel< + T><<(block_grid_size), std::get<0>(block_grid_size), 0, + dev_ctx.stream()>>>(out_data, x_data, start, size, + x_stride_0 + x_stride_1, out_stride_0); } } } diff --git a/paddle/fluid/operators/distributed/communicator.cc b/paddle/fluid/operators/distributed/communicator.cc index b2cc9390fa2267404ac246c6b36800833d0dd679..a0ac82a6f4a432ee0f0427a90508c88a262799e3 100644 --- a/paddle/fluid/operators/distributed/communicator.cc +++ b/paddle/fluid/operators/distributed/communicator.cc @@ -74,8 +74,12 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx, } else { recv_threadpool_.reset(new ::ThreadPool(thread_pool_size_)); } + + InitParams(); } +void AsyncCommunicator::InitParams() { RecvNoBarrier(); } + AsyncCommunicator::~AsyncCommunicator() { running_ = false; if (main_thread_) main_thread_->join(); @@ -157,16 +161,18 @@ void AsyncCommunicator::MainThread() { } while (running_) { - int meet = Meet(); - - VLOG(1) << "async_meet: " << meet; - - SendGlobalStep(meet); - SendByCommunicator(meet); - BarrierSend(); - RecvByCommunicator(); - BarrierRecv(); - BarrierWeakUp(); + int batches = BatchesCounter(); + + if (batches > 0) { + SendGlobalStep(batches); + SendByCommunicator(batches); + BarrierSend(); + RecvByCommunicator(); + BarrierRecv(); + BarrierWeakUp(); + } else { + VLOG(1) << "get nothing from sending queue, will skip send/recv"; + } } VLOG(1) << "communicator stopped, send thread exit"; } @@ -187,7 +193,7 @@ void AsyncCommunicator::RecvNoBarrier() { auto &var_name = iter.first; VLOG(4) << "recv var " << var_name; auto recv_functor = distributed::ParameterRecv(); - recv_functor(iter.second, *recv_scope_, false); + recv_functor(iter.second, *recv_scope_); }; task_futures.emplace_back(recv_threadpool_->enqueue(std::move(recv_task))); } @@ -197,7 +203,7 @@ void AsyncCommunicator::RecvNoBarrier() { } } -int AsyncCommunicator::Meet() { +int AsyncCommunicator::BatchesCounter() { auto &step_queue = send_varname_to_queue_.at(STEP_COUNTER); size_t merged_var_num = 0; @@ -316,7 +322,7 @@ void HalfAsyncCommunicator::Clean() { } } -int HalfAsyncCommunicator::Meet() { +int HalfAsyncCommunicator::BatchesCounter() { while (running_) { if (barrier_counter_.load() >= barrier_trigger_.load() && barrier_trigger_.load() != 0) { @@ -443,7 +449,7 @@ void GeoCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx, old_scope_.reset(new Scope()); pserver_scope_.reset(new Scope()); - Init(); + InitParams(); } void GeoCommunicator::Send(const std::vector &var_names, @@ -626,9 +632,7 @@ void GeoCommunicator::RecvByCommunicator() { if (recv_ctx.is_sparse) { RecvSparse(var_name); } else { - VLOG(1) << "recv dense " << var_name << " begin"; RecvDense(var_name); - VLOG(1) << "recv dense " << var_name << " done"; } }; tasks.emplace_back(send_threadpool_->enqueue(std::move(recv_task))); @@ -696,7 +700,7 @@ void GeoCommunicator::RecvDense(const std::string &varname) { auto &ctx = recv_varname_to_ctx_.at(varname); auto recv = distributed::ParameterRecv(); - recv(ctx, *pserver_scope_, true); + recv(ctx, *pserver_scope_); PADDLE_ENFORCE_EQ( var_psrever->IsInitialized(), true, @@ -721,7 +725,7 @@ void GeoCommunicator::RecvDense(const std::string &varname) { t_timestamp->data()); } -void GeoCommunicator::Init() { +void GeoCommunicator::InitParams() { std::vector> tasks; tasks.reserve(recv_varname_to_ctx_.size()); @@ -744,12 +748,17 @@ void GeoCommunicator::Init() { } void GeoCommunicator::InitDense(const std::string varname) { - auto *var = old_scope_->Var(varname); - var->GetMutable(); - auto &ctx = recv_varname_to_ctx_.at(varname); auto recv = distributed::ParameterRecv(); - recv(ctx, *old_scope_); + recv(ctx, *recv_scope_); + + auto *global_var = recv_scope_->FindVar(varname); + global_var->GetMutable(); + + auto *old_var = old_scope_->Var(varname); + old_var->GetMutable(); + + framework::CopyVariable(*global_var, old_var); VLOG(1) << "init dense variable " << varname << " done"; } @@ -781,22 +790,41 @@ void GeoCommunicator::InitSparse() { LargeScaleKV::Init(metas); - for (size_t i = 0; i < metas.size(); i++) { - auto &varname = metas[i].name; - auto &dict = dicts[i]; + for (auto &meta : metas) { + auto &ctx = recv_varname_to_ctx_.at(meta.name); + auto recv = distributed::ParameterRecv(); - std::vector ids; - ids.reserve(dict); + auto *global_var = recv_scope_->FindVar(meta.name); + auto global_value = global_var->Get(); + auto rows = global_value.dims()[0]; + auto dim1 = global_value.dims()[1]; - for (auto j = 0; j < dict; ++j) { - ids.push_back(j); - } + recv(ctx, *recv_scope_); + VLOG(1) << "recv " << meta.name << " with global scope for init"; + + auto n_rows = global_var->Get().dims()[0]; + + PADDLE_ENFORCE_EQ( + rows, n_rows, + platform::errors::InvalidArgument( + "global var: %s origin dim must equal recved rows", meta.name)); + + std::vector ids(rows); + std::iota(ids.begin(), ids.end(), 0); auto *ins = distributed::LargeScaleKV::GetInstance(); - ins->Get(varname)->Init(ids); + std::vector *>> values; + + ins->Get(meta.name)->Init(ids); + ins->Get(meta.name)->Get(ids, {"Param"}, &values); - VLOG(3) << "GeoCommunicator init sparse " << varname << " with size " - << ids.size(); + auto blas = math::GetBlas( + paddle::platform::CPUDeviceContext()); + + for (auto &id : ids) { + blas.VCOPY(dim1, global_value.data() + id * dim1, + values[id][0]->data()); + } } VLOG(3) << "init sparse variable done"; diff --git a/paddle/fluid/operators/distributed/communicator.h b/paddle/fluid/operators/distributed/communicator.h index 2f6da150d1e1375c332f7e55ea5b16c07f067a40..4a9a9eb1701f5a9102de9de164a7679999ee2a3e 100644 --- a/paddle/fluid/operators/distributed/communicator.h +++ b/paddle/fluid/operators/distributed/communicator.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/distributed/communicator_common.h" #include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/large_scale_kv.h" @@ -279,6 +281,8 @@ class AsyncCommunicator : public Communicator { const RpcCtxMap &recv_varname_to_ctx, Scope *recv_scope) override; + void InitParams(); + void MainThread(); void Send(const std::vector &var_names, @@ -293,7 +297,7 @@ class AsyncCommunicator : public Communicator { virtual void RecvNoBarrier(); - virtual int Meet(); + virtual int BatchesCounter(); virtual void BarrierSend() {} @@ -350,7 +354,7 @@ class HalfAsyncCommunicator : public AsyncCommunicator { void BarrierTriggerReset(int initial_val) override; - int Meet(); + int BatchesCounter(); void BarrierWeakUp(); @@ -435,7 +439,7 @@ class GeoCommunicator : public AsyncCommunicator { void RecvDense(const std::string &varname); - void Init(); + void InitParams(); void InitSparse(); diff --git a/paddle/fluid/operators/distributed/parameter_recv.cc b/paddle/fluid/operators/distributed/parameter_recv.cc index 5409ec54987fbb7ad89f61cc1655a4c3ef302ac0..3b8479c91b0b619430ebde26b26f0ae6c9fc59cb 100644 --- a/paddle/fluid/operators/distributed/parameter_recv.cc +++ b/paddle/fluid/operators/distributed/parameter_recv.cc @@ -41,8 +41,67 @@ using SelectedRows = framework::SelectedRows; using DDim = framework::DDim; template -void RecvSelectedRows(const CommContext &rpc_ctx, - const framework::Scope &scope) { +void RecvSparseLodTensor(const CommContext &rpc_ctx, + const framework::Scope &scope) { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto cpu_place = platform::CPUPlace(); + auto &cpu_ctx = *pool.Get(cpu_place); + + distributed::RPCClient *rpc_client = + distributed::RPCClient::GetInstance(rpc_ctx.trainer_id); + + std::unique_ptr local_scope = scope.NewTmpScope(); + std::vector tensors; + std::vector rets; + for (size_t i = 0; i < rpc_ctx.splited_varnames.size(); i++) { + auto &recv_var_name = rpc_ctx.splited_varnames[i]; + auto *local_var = local_scope->Var(recv_var_name); + VLOG(4) << "recv " << recv_var_name << " from " << rpc_ctx.epmap[i]; + // sparse param in recv_scope is LoDTensor + rets.push_back(rpc_client->AsyncGetVarNoBarrier( + rpc_ctx.epmap[i], cpu_ctx, *local_scope.get(), recv_var_name, + recv_var_name)); + + const auto *value = local_var->Get().data(); + tensors.push_back(value); + } + + for (size_t i = 0; i < rets.size(); i++) { + PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, platform::errors::ExecutionTimeout( + "internal error in RPCClient")); + } + + auto *merged_var = scope.FindVar(rpc_ctx.var_name); + + if (merged_var == nullptr || !merged_var->IsInitialized()) { + PADDLE_THROW( + platform::errors::InvalidArgument("%s must initialized at first.")); + } + auto dims1 = merged_var->Get().dims()[1]; + int64_t height = 0; + for (size_t i = 0; i < rpc_ctx.splited_varnames.size(); i++) { + auto *splited_var = local_scope->FindVar(rpc_ctx.splited_varnames[i]); + height += splited_var->Get().dims()[0]; + } + + PADDLE_ENFORCE_EQ(merged_var->Get().dims()[0], height, + "recved var must has same dims with local var"); + + auto *merged_t = merged_var->GetMutable(); + auto *merged_d = merged_t->mutable_data(cpu_place); + + auto pserver_num = rpc_ctx.splited_varnames.size(); + for (int x = 0; x < height; ++x) { + auto id = x % pserver_num; + auto idx = x / pserver_num; + std::memcpy(merged_d + x * dims1, tensors[id] + idx * dims1, + sizeof(float) * dims1); + } +} + +template +void RecvGeoSparseRecords(const CommContext &rpc_ctx, + const framework::Scope &scope) { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto cpu_place = platform::CPUPlace(); auto &cpu_ctx = *pool.Get(cpu_place); @@ -84,9 +143,14 @@ void RecvSelectedRows(const CommContext &rpc_ctx, ids_num += recv_t.rows().size(); width = recv_t.value().dims()[1]; - std::transform(recv_t.rows().begin(), recv_t.rows().end(), - std::back_inserter(all_ids), - [&](int64_t id) { return id * pserver_num + i; }); + if (rpc_ctx.is_distributed) { + std::copy(recv_t.rows().begin(), recv_t.rows().end(), + std::back_inserter(all_ids)); + } else { + std::transform(recv_t.rows().begin(), recv_t.rows().end(), + std::back_inserter(all_ids), + [&](int64_t id) { return id * pserver_num + i; }); + } } auto *var = scope.FindVar(rpc_ctx.var_name); @@ -146,7 +210,8 @@ void RecvLodTensor(const CommContext &rpc_ctx, const framework::Scope &scope) { template void ParameterRecv::operator()(const CommContext &rpc_ctx, - const framework::Scope &scope, bool barrier) { + const framework::Scope &scope, + bool geo_records) { VLOG(3) << "ParameterRecv in " << rpc_ctx.var_name; PADDLE_ENFORCE_GE(rpc_ctx.origin_varnames.size(), 1, @@ -154,18 +219,21 @@ void ParameterRecv::operator()(const CommContext &rpc_ctx, "origin_varnames.size() >= 1 is permitted")); if (rpc_ctx.is_sparse) { - RecvSelectedRows(rpc_ctx, scope); + if (geo_records) { + RecvGeoSparseRecords(rpc_ctx, scope); + } else { + RecvSparseLodTensor(rpc_ctx, scope); + } } else { RecvLodTensor(rpc_ctx, scope); } VLOG(3) << "ParameterRecv out " << rpc_ctx.var_name; } - template void ParameterRecv::operator()(const CommContext &rpc_ctx, const framework::Scope &scope) { - this->operator()(rpc_ctx, scope, true); + this->operator()(rpc_ctx, scope, false); } template struct ParameterRecv; diff --git a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc index b064265917b2a36b2261c6c43d355f9891aa9811..c9f9daf3b3c0442e379cd7a22fcf48dbe3acbb5d 100644 --- a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc @@ -48,7 +48,9 @@ class FetchBarrierOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, + platform::errors::Unavailable( + "Internal error occurred in RPCClient.")); } } }; diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..e53ce8cc67c08269e15a20e2cd2fc57a2c5ace17 --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc @@ -0,0 +1,153 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h" + +#include +namespace paddle { +namespace operators { + +class LargeScaleFuseAdamOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of LargeScaleFuseAdamOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("LearningRate"), + "Input(LearningRate) of LargeScaleFuseAdamOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + + PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function."); + + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 element"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Grad"); + return framework::OpKernelType(data_type, ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const { + if (var_name == "LearningRate") { + return framework::OpKernelType(tensor.type(), tensor.place(), + tensor.layout()); + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +class LargeScaleFuseAdamOpInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override { + auto in_var_type = ctx->GetInputType("Grad"); + PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS || + in_var_type == framework::proto::VarType::LOD_TENSOR, + true, platform::errors::InvalidArgument( + "The input Var's type should be LoDtensor or " + "SelectedRows, but the received type is %s", + in_var_type)); + } +}; + +class LargeScaleFuseAdamOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Grad", + "(SelectedRows) Ids's type should be SelectedRows" + "THe ids to be looked up in W."); + + AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); + AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator"); + AddInput("LearningRate", "(Tensor) Learning rate of SGD"); + AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator"); + AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator"); + + AddAttr("beta1", + "(float, default 0.9) " + "Exponential decay rate for the " + "first moment estimates.") + .SetDefault(0.9f); + + AddAttr("beta2", + "(float, default 0.999) " + "exponential decay rate for the " + "second moment estimates.") + .SetDefault(0.999f); + + AddAttr("epsilon", + "(float, default 1.0e-8) " + "Constant for numerical stability") + .SetDefault(1.0e-8f); + + AddAttr("is_entry", + "(bool)" + "sparse table need entry"); + + AddAttr("tablename", + "(string)" + "sparse table name"); + + AddAttr>("value_names", + "(strings)" + "sparse table name"); + + AddComment(R"DOC( +Adam Optimizer. + +This implements the Adam optimizer from Section 2 of the Adam +paper : https://arxiv.org/abs/1412.6980. +Adam is a first-order gradient-based optimization method based on +adaptive estimates of lower-order moments. + +Adam updates: + +$$ +moment\_1\_out = \beta_1 * moment\_1 + (1 - \beta_1) * grad \\ +moment\_2_\out = \beta_2 * moment\_2 + (1 - \beta_2) * grad * grad \\ +learning\_rate = learning\_rate * + \frac{\sqrt{1 - \beta_{2\_pow}}}{1 - \beta_{1\_pow}} \\ +param\_out = param - learning\_rate * \frac{moment\_1}{\sqrt{moment\_2} + \epsilon} +$$ + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + lookup_sparse_table_fuse_adam, ops::LargeScaleFuseAdamOp, + ops::LargeScaleFuseAdamOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker, + ops::LargeScaleFuseAdamOpInferVarType); + +REGISTER_OP_CPU_KERNEL( + lookup_sparse_table_fuse_adam, + ops::LargeScaleFuseAdamOpKernel); diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h new file mode 100644 index 0000000000000000000000000000000000000000..89b8d54a463b03076c9489b842540ea4a4f68a82 --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h @@ -0,0 +1,142 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for sqrt in CPU and CUDA +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/distributed/large_scale_kv.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" + +namespace paddle { +namespace operators { + +template +class LargeScaleFuseAdamOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override; +}; + +template +class LargeScaleFuseAdamOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + using paddle::framework::LoDTensor; + + const auto *learning_rate = ctx.Input("LearningRate"); + const auto *grad_var = ctx.InputVar("Grad"); + + PADDLE_ENFORCE( + grad_var->IsType(), + platform::errors::InvalidArgument( + "in large scale optimize, gradient should only be SelectedRows")); + + const auto &grad = grad_var->Get(); + + // for distributed training, a sparse var may be empty, + // just skip updating. + if (grad.rows().size() == 0) { + return; + } + + framework::SelectedRows tmp_grad_merge; + const framework::SelectedRows *grad_merge_ptr; + math::scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), grad, + &tmp_grad_merge, true); + grad_merge_ptr = &tmp_grad_merge; + + std::vector in_rows; + in_rows.reserve(grad_merge_ptr->rows().size()); + std::copy(grad_merge_ptr->rows().begin(), grad_merge_ptr->rows().end(), + std::back_inserter(in_rows)); + + const auto *lr = learning_rate->data(); + auto grad_v = grad_merge_ptr->value(); + auto grad_width = grad_v.dims()[1]; + + // auto is_entry = context.Attr("is_entry"); + auto tablename = ctx.Attr("tablename"); + auto value_names = ctx.Attr>("value_names"); + + auto *beta1_pow = ctx.Input("Beta1Pow"); + auto *beta2_pow = ctx.Input("Beta2Pow"); + auto *beta1_pow_out = ctx.Output("Beta1PowOut"); + auto *beta2_pow_out = ctx.Output("Beta2PowOut"); + T epsilon = static_cast(ctx.Attr("epsilon")); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + + PADDLE_ENFORCE_EQ(beta1_pow_out->numel(), 1, + platform::errors::InvalidArgument( + "beta1 pow output size should be 1, but received " + "value is:%d.", + beta1_pow_out->numel())); + + PADDLE_ENFORCE_EQ(beta2_pow_out->numel(), 1, + platform::errors::InvalidArgument( + "beta2 pow output size should be 1, but received " + "value is:%d.", + beta2_pow_out->numel())); + + // update beta1 and beta2 + beta1_pow_out->mutable_data(ctx.GetPlace())[0] = + beta1 * beta1_pow->data()[0]; + beta2_pow_out->mutable_data(ctx.GetPlace())[0] = + beta2 * beta2_pow->data()[0]; + + std::vector *>> values; + std::vector dims; + + auto *ins = distributed::LargeScaleKV::GetInstance(); + auto *table = ins->Get(tablename); + table->Get(in_rows, value_names, &values); + table->Dims({"Param"}, &dims); + + PADDLE_ENFORCE_EQ(dims[0], grad_width, + platform::errors::InvalidArgument( + "param_row should have the same size with grad_row")); + + T lr_ = lr[0]; + T beta1_pow_ = beta1_pow->data()[0]; + T beta2_pow_ = beta2_pow->data()[0]; + + lr_ *= sqrt(1 - beta2_pow_) / (1 - beta1_pow_); + + for (size_t i = 0; i < in_rows.size(); i++) { + auto ¶ms = values[i][0]; + auto &moment_1 = values[i][1]; + auto &moment_2 = values[i][2]; + + auto *p_data = params->data(); + auto *m1_data = moment_1->data(); + auto *m2_data = moment_2->data(); + + for (int x = 0; x < grad_width; ++x) { + auto g = grad_v.data()[grad_width * i + x]; + m1_data[x] = beta1 * m1_data[x] + (1 - beta1) * g; + m2_data[x] = beta2 * m2_data[x] + (1 - beta2) * g * g; + p_data[x] -= lr_ * (m1_data[x] / (sqrt(m2_data[x]) + epsilon)); + } + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..010658b5280d7feeb683112b401dbcaaa265daac --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h" + +#include +namespace paddle { +namespace operators { + +class LargeScaleFuseSGDOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of LargeScaleFuseSGDOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("LearningRate"), + "Input(LearningRate) of LargeScaleFuseSGDOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + + PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function."); + + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 element"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Grad"); + return framework::OpKernelType(data_type, ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const { + if (var_name == "LearningRate") { + return framework::OpKernelType(tensor.type(), tensor.place(), + tensor.layout()); + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +class LargeScaleFuseSGDOpInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override { + auto in_var_type = ctx->GetInputType("Grad"); + PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS || + in_var_type == framework::proto::VarType::LOD_TENSOR, + true, platform::errors::InvalidArgument( + "The input Var's type should be LoDtensor or " + "SelectedRows, but the received type is %s", + in_var_type)); + } +}; + +class LargeScaleFuseSGDOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Grad", + "(SelectedRows) Ids's type should be SelectedRows" + "THe ids to be looked up in W."); + AddInput("LearningRate", "(Tensor) Learning rate of SGD"); + AddAttr("is_entry", + "(bool)" + "sparse table need entry"); + + AddAttr("tablename", + "(string)" + "sparse table name"); + + AddAttr>("value_names", + "(strings)" + "sparse table name"); + + AddComment(R"DOC( + +LargeScaleFuseSGD operator + +This operator implements one step of the stochastic gradient descent algorithm. + +$$param\_out = param - learning\_rate * grad$$ + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + lookup_sparse_table_fuse_sgd, ops::LargeScaleFuseSGDOp, + ops::LargeScaleFuseSGDOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker, + ops::LargeScaleFuseSGDOpInferVarType); + +REGISTER_OP_CPU_KERNEL( + lookup_sparse_table_fuse_sgd, + ops::LargeScaleFuseSGDOpKernel); diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5d4bf1015fa3a8c2c8fb102fcd890f41b296269d --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/distributed/large_scale_kv.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" + +namespace paddle { +namespace operators { + +template +class LargeScaleFuseSGDOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override; +}; + +template +class LargeScaleFuseSGDOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *learning_rate = ctx.Input("LearningRate"); + + const auto *grad_var = ctx.InputVar("Grad"); + + PADDLE_ENFORCE( + grad_var->IsType(), + platform::errors::InvalidArgument( + "in large scale optimize, gradient should only be SelectedRows")); + + const auto &grad = grad_var->Get(); + + // for distributed training, a sparse var may be empty, + // just skip updating. + if (grad.rows().size() == 0) { + return; + } + + framework::SelectedRows tmp_grad_merge; + const framework::SelectedRows *grad_merge_ptr; + math::scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), grad, + &tmp_grad_merge, true); + grad_merge_ptr = &tmp_grad_merge; + + std::vector in_rows; + in_rows.reserve(grad_merge_ptr->rows().size()); + std::copy(grad_merge_ptr->rows().begin(), grad_merge_ptr->rows().end(), + std::back_inserter(in_rows)); + + const auto *lr = learning_rate->data(); + auto grad_v = grad_merge_ptr->value(); + auto grad_width = grad_v.dims()[1]; + + // auto is_entry = context.Attr("is_entry"); + auto tablename = ctx.Attr("tablename"); + auto value_names = ctx.Attr>("value_names"); + + std::vector *>> values; + std::vector dims; + + auto *ins = distributed::LargeScaleKV::GetInstance(); + auto *table = ins->Get(tablename); + table->Get(in_rows, value_names, &values); + table->Dims({"Param"}, &dims); + + PADDLE_ENFORCE_EQ(dims[0], grad_width, + platform::errors::InvalidArgument( + "param_row should have the same size with grad_row")); + + auto blas = math::GetBlas(ctx); + + std::vector grads; + framework::TensorToVector(grad_v, ctx.device_context(), &grads); + + blas.SCAL(grads.size(), lr[0], grads.data()); + + for (int x = 0; x < static_cast(in_rows.size()); ++x) { + auto ¶ms = values[x][0]; + blas.VSUB(grad_width, params->data(), grads.data() + grad_width * x, + params->data()); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/distributed_ops/recv_op.cc b/paddle/fluid/operators/distributed_ops/recv_op.cc index 15b36baeada300e1ab472737b4e35538f9882cb7..2547ba3acb16031245ceae622e11893597bb9b9b 100644 --- a/paddle/fluid/operators/distributed_ops/recv_op.cc +++ b/paddle/fluid/operators/distributed_ops/recv_op.cc @@ -37,12 +37,6 @@ class RecvOp : public framework::OperatorBase { void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { - int do_not_run = Attr("do_not_run"); - if (do_not_run) { - VLOG(3) << "recv do not run!"; - return; - } - std::vector epmap = Attr>("epmap"); std::vector varnames = Attr>("varnames"); @@ -63,11 +57,10 @@ class RecvOp : public framework::OperatorBase { if (recv_varnames.size() > 0) { auto *communicator = distributed::Communicator::GetInstance(); - if (communicator == nullptr) { + if (communicator != nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( - "need run fleet.init_worker first")); + "execute startup program must before fleet.init_worker")); } - communicator->RecvNoBarrier(); } else { std::vector rets; if (with_barrier) { diff --git a/paddle/fluid/operators/distributed_ops/send_recv_util.h b/paddle/fluid/operators/distributed_ops/send_recv_util.h index c05a1ff1da8803c1ef3161d0e9d8604f9f1e5f3b..7dc0596ac31e2506ae02de11b33bd0532f02cc7a 100644 --- a/paddle/fluid/operators/distributed_ops/send_recv_util.h +++ b/paddle/fluid/operators/distributed_ops/send_recv_util.h @@ -34,16 +34,16 @@ inline bool NeedSend(const framework::Scope& scope, std::string::npos) return false; auto* var = scope.FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", - varname); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "Can not find variable '%s' in the send side.", varname)); if (var->IsType()) { return var->Get().IsInitialized(); } else if (var->IsType()) { return var->Get().rows().size() > 0UL; } else { - PADDLE_THROW( - "Variable type in send side should be in " - "[LodTensor, SelectedRows]"); + PADDLE_THROW(platform::errors::Unimplemented( + "Variable type in send side should be LodTensor or SelectedRows.")); } return false; } diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 3fc5f3bfc6b1633ffe835606bbac6118e6b32ca6..477a9162fe3f779d4006deb2e20b3a16f70cdf47 100644 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -8,7 +8,8 @@ register_operators(EXCLUDES multihead_matmul_op fused_embedding_eltwise_layernorm_op fusion_group_op - fusion_gru_op) + fusion_gru_op + fused_bn_add_activation_op) # fusion_gru_op does not have CUDA kernel op_library(fusion_gru_op) @@ -47,4 +48,9 @@ if (WITH_GPU) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fusion_group);\n") cc_test(test_fusion_group_op SRCS fusion_group_op_test.cc DEPS fusion_group_op) endif() + # fused_bn_add_activation + if (NOT ${CUDNN_VERSION} VERSION_LESS 7401) + op_library(fused_bn_add_activation_op) + file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_bn_add_activation);\n") + endif() endif() diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index b22f28fbbe3ce8ce178a3d9c17a048817cb750e7..49fded886a0339a0456ee55d0d4d1249461f93b9 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -204,6 +204,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { auto x_dims = framework::vectorize(transformed_input.dims()); auto f_dims = framework::vectorize(filter->dims()); if (!exhaustive_search) { +#if CUDNN_VERSION >= 8000 int perf_count; int best_algo_idx = 0; size_t tmp_size = 0; @@ -215,13 +216,20 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { cudnn_output_desc, kNUM_CUDNN_FWD_ALGS, &perf_count, perf_results.get())); algo = (perf_results.get())[best_algo_idx].algo; - VLOG(3) << "cuDNN forward algo " << algo; PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnGetConvolutionForwardWorkspaceSize( handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, cudnn_output_desc, algo, &workspace_size_in_bytes)); if (workspace_size_in_bytes > workspace_size_limit) workspace_size_limit = workspace_size_in_bytes; +#else + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionForwardAlgorithm( + handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, + cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); + VLOG(3) << "cuDNN forward algo " << algo; +#endif } else { std::function search_func = [&]() -> cudnnConvolutionFwdAlgo_t { diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5b3ed03bb6419cd3c36f6ee2e856f1816d314c75 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; + +void FusedBatchNormAddActOp::InferShape( + framework::InferShapeContext *ctx) const { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", + "FusedBatchNormAddActOp"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("MeanOut"), "Output", "MeanOut", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("VarianceOut"), "Output", "VarianceOut", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("SavedMean"), "Output", "SavedMean", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("SavedVariance"), "Output", "SavedVariance", + "FusedBatchNormAddActOp"); + + const auto x_dims = ctx->GetInputDim("X"); + const auto z_dims = ctx->GetInputDim("Z"); + PADDLE_ENFORCE_EQ(x_dims, z_dims, + platform::errors::InvalidArgument( + "ShapeError: the shapes of input " + "must be equal. But received: the shape " + "of input X = [%s], and the shape of " + "input Y = [%s]", + x_dims, z_dims)); + PADDLE_ENFORCE_GE(x_dims.size(), 2, platform::errors::InvalidArgument( + "ShapeError: the dimensions of input " + "must greater than or equal to 2." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + PADDLE_ENFORCE_LE(x_dims.size(), 5, platform::errors::InvalidArgument( + "ShapeError: the dimensions of input " + "must smaller than or equal to 5." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + + const int64_t C = x_dims[x_dims.size() - 1]; + + auto scale_dim = ctx->GetInputDim("Scale"); + auto bias_dim = ctx->GetInputDim("Bias"); + + PADDLE_ENFORCE_EQ( + scale_dim.size(), 1UL, + platform::errors::InvalidArgument( + "ShapeError: the dimension of scale must equal to 1." + "But received: the shape of scale is [%s], the dimension " + "of scale is [%d]", + scale_dim, scale_dim.size())); + PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, + platform::errors::InvalidArgument( + "ShapeError: the dimension of bias must equal to 1." + "But received: the shape of bias is [%s],the dimension " + "of bias is [%d]", + bias_dim, bias_dim.size())); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 || + framework::product(bias_dim) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(scale_dim[0], C, + platform::errors::InvalidArgument( + "ShapeError: the shape of scale must equal to [%d]" + "But received: the shape of scale is [%d]", + C, scale_dim[0])); + PADDLE_ENFORCE_EQ(bias_dim[0], C, + platform::errors::InvalidArgument( + "ShapeError: the shape of bias must equal to [%d]" + "But received: the shape of bias is [%d]", + C, bias_dim[0])); + } + ctx->SetOutputDim("Y", x_dims); + ctx->SetOutputDim("MeanOut", {C}); + ctx->SetOutputDim("VarianceOut", {C}); + ctx->SetOutputDim("SavedMean", {C}); + ctx->SetOutputDim("SavedVariance", {C}); + ctx->ShareLoD("X", "Y"); +} + +framework::OpKernelType FusedBatchNormAddActOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + // By default, the type of the scale, bias, mean, + // and var tensors should be float when input tensor's dtype is float16. + auto bn_param_type = framework::proto::VarType::FP32; + + PADDLE_ENFORCE_EQ( + bn_param_type, ctx.Input("Scale")->type(), + platform::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, ctx.Input("Bias")->type(), + platform::errors::InvalidArgument("Bias input should be of float type")); + + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + + return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, + library); +} + +void FusedBatchNormAddActOpMaker::Make() { + AddInput("X", "The input tensor"); + AddInput("Z", "The input tensor"); + AddInput("Scale", + "Scale is a 1-dimensional tensor of size C " + "that is applied to the output"); + AddInput("Bias", + "Bias is a 1-dimensional tensor of size C " + "that is applied to the output"); + AddOutput("Y", "result after normalization"); + AddOutput("MeanOut", + "Share memory with Mean. " + "Store the global mean when training"); + AddOutput("VarianceOut", + "Share memory with Variance. " + "Store the global Variance when training"); + AddOutput("SavedMean", + "Mean of the current mini batch, " + "will apply to output when training") + .AsIntermediate(); + AddOutput("SavedVariance", + "Variance of the current mini batch, " + "will apply to output when training") + .AsIntermediate(); + AddOutput("ReserveSpace", + "Reserve GPU space for triggering the new semi-persistent " + "NHWC kernel"); + AddAttr("momentum", "").SetDefault(0.9); + AddAttr("epsilon", "") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, + platform::errors::InvalidArgument( + "'epsilon' should be between 0.0 and 0.001.")); + }); + AddAttr("act_type", "The activation type to be fused.") + .SetDefault("relu"); + AddComment(R"DOC( +Fused Batch Normalization with activation. + +Batch Norm has been implemented as discussed in the paper: +https://arxiv.org/pdf/1502.03167.pdf +Batch Norm can be used as a normalizer function for conv2d and fully_connected operations. +Now, the required data format for FusedBatchNormAddActOp is NHWC `[batch, in_height, in_width, in_channels]`. + +)DOC"); +} + +void FusedBatchNormAddActGradOp::InferShape( + framework::InferShapeContext *ctx) const { + // check input + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), "Input", + framework::GradVarName("Y"), "FusedBatchNormAddActGradOp"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output", + framework::GradVarName("X"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Z")), "Output", + framework::GradVarName("Z"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Scale")), "Output", + framework::GradVarName("Scale"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias")), "Output", + framework::GradVarName("Bias"), "FusedBatchNormAddActGradOp"); + + const auto in_dims = ctx->GetInputDim("X"); + const int C = in_dims[in_dims.size() - 1]; + + ctx->SetOutputDim(framework::GradVarName("X"), in_dims); + ctx->SetOutputDim(framework::GradVarName("Z"), in_dims); + ctx->SetOutputDim(framework::GradVarName("Scale"), {C}); + ctx->SetOutputDim(framework::GradVarName("Bias"), {C}); +} + +framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + const auto *var = ctx.InputVar(framework::GradVarName("Y")); + if (var == nullptr) { + PADDLE_THROW(platform::errors::NotFound( + "Can not find Y@GRAD in the execution context.")); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW( + platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + } + + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(), layout, + library); +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + fused_bn_add_activation, ops::FusedBatchNormAddActOp, + ops::FusedBatchNormAddActOpMaker, ops::FusedBatchNormAddActOpInferVarType, + ops::FusedBatchNormAddActGradOpMaker, + ops::FusedBatchNormAddActGradOpMaker); +REGISTER_OPERATOR(fused_bn_add_activation_grad, + ops::FusedBatchNormAddActGradOp); diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..7f1d297cda3fae54cdde089f25ccdf6715142c5f --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -0,0 +1,338 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/norm_utils.h" +#include "paddle/fluid/platform/cudnn_helper.h" +#include "paddle/fluid/platform/float16.h" + +DECLARE_bool(cudnn_batchnorm_spatial_persistent); + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; +template +using CudnnDataType = platform::CudnnDataType; +template +using BatchNormParamType = typename CudnnDataType::BatchNormParamType; + +template +class FusedBatchNormAddActKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + double epsilon = static_cast(ctx.Attr("epsilon")); + float momentum = ctx.Attr("momentum"); + std::string act_type = ctx.Attr("act_type"); + + if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { + LOG(ERROR) << "Provided epsilon is smaller than " + << "CUDNN_BN_MIN_EPSILON. Setting it to " + << "CUDNN_BN_MIN_EPSILON instead."; + } + epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); + + // Get the size for each dimension. + // NHWC [batch_size, in_height, in_width, in_channels] + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); + const auto &in_dims = x->dims(); + + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + mean_out->mutable_data>(ctx.GetPlace()); + variance_out->mutable_data>(ctx.GetPlace()); + + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); + saved_mean->mutable_data>(ctx.GetPlace()); + saved_variance->mutable_data>(ctx.GetPlace()); + + auto *y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); + + int N, C, H, W, D; + const DataLayout data_layout = DataLayout::kNHWC; + ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); + + auto &dev_ctx = ctx.template device_context(); + + // ------------------- cudnn descriptors --------------------- + auto handle = dev_ctx.cudnn_handle(); + cudnnTensorDescriptor_t data_desc_; + cudnnTensorDescriptor_t bn_param_desc_; + cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + + std::vector dims = {N, C, H, W, D}; + std::vector strides = {H * W * D * C, 1, W * D * C, D * C, C}; + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( + data_desc_, CudnnDataType::type, + in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDeriveBNTensorDescriptor(bn_param_desc_, + data_desc_, mode_)); + + double this_factor = 1. - momentum; + cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; + platform::ScopedActivationDescriptor scope_act_desc; + cudnnActivationDescriptor_t activation_desc_ = + scope_act_desc.descriptor(act_type); + size_t workspace_size = 0; + size_t reserve_space_size = 0; + void *reserve_space_ptr = nullptr; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + // Create reserve space and workspace for batch norm. + // Create tensor for each batchnorm op, it will be used in the + // backward. Thus this tensor shouldn't be temp. + auto *reserve_space = ctx.Output("ReserveSpace"); + PADDLE_ENFORCE_NOT_NULL( + reserve_space, + platform::errors::NotFound( + "The argument ReserveSpace of batch_norm op is not found.")); + + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload:: + cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*xDesc=*/data_desc_, + /*zDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/activation_desc_, + /*sizeInBytes=*/&workspace_size)); + + // -------------- cudnn batchnorm reserve space -------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*activationDesc=*/activation_desc_, + /*xDesc=*/data_desc_, + /*sizeInBytes=*/&reserve_space_size)); + + reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(), + reserve_space_size); + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationForwardTrainingEx( + handle, mode_, bnOps_, CudnnDataType::kOne(), + CudnnDataType::kZero(), data_desc_, x->template data(), + data_desc_, z->template data(), data_desc_, + y->template data(), bn_param_desc_, + scale->template data>(), + bias->template data>(), this_factor, + mean_out->template mutable_data>( + ctx.GetPlace()), + variance_out->template mutable_data>( + ctx.GetPlace()), + epsilon, saved_mean->template mutable_data>( + ctx.GetPlace()), + saved_variance->template mutable_data>( + ctx.GetPlace()), + activation_desc_, workspace_ptr, workspace_size, reserve_space_ptr, + reserve_space_size)); + + // clean when exit. + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + } +}; + +template +class FusedBatchNormAddActGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + double epsilon = static_cast(ctx.Attr("epsilon")); + std::string act_type = ctx.Attr("act_type"); + + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); + const auto *y = ctx.Input("Y"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *reserve_space = ctx.Input("ReserveSpace"); + + const auto &in_dims = x->dims(); + + int N, C, H, W, D; + const DataLayout data_layout = DataLayout::kNHWC; + ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); + + // init output + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_z = ctx.Output(framework::GradVarName("Z")); + auto *d_scale = ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + + d_x->mutable_data(ctx.GetPlace()); + d_z->mutable_data(ctx.GetPlace()); + PADDLE_ENFORCE_EQ( + d_scale && d_bias, true, + platform::errors::PreconditionNotMet( + "Both the scale grad and the bias grad must not be null.")); + d_scale->mutable_data>(ctx.GetPlace()); + d_bias->mutable_data>(ctx.GetPlace()); + PADDLE_ENFORCE_EQ(scale->dims().size(), 1UL, + platform::errors::PreconditionNotMet( + "The scale only has one dimension.")); + PADDLE_ENFORCE_EQ( + scale->dims()[0], C, + platform::errors::PreconditionNotMet( + "The size of scale is equal to the channel of Input(X).")); + + auto &dev_ctx = ctx.template device_context(); + + std::vector dims = {N, C, H, W, D}; + std::vector strides = {H * W * C * D, 1, W * D * C, D * C, C}; + // ------------------- cudnn descriptors --------------------- + cudnnTensorDescriptor_t data_desc_; + cudnnTensorDescriptor_t bn_param_desc_; + cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { + LOG(ERROR) << "Provided epsilon is smaller than " + << "CUDNN_BN_MIN_EPSILON. Setting it to " + << "CUDNN_BN_MIN_EPSILON instead."; + } + epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( + data_desc_, CudnnDataType::type, + in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDeriveBNTensorDescriptor(bn_param_desc_, + data_desc_, mode_)); + + const auto *saved_mean = ctx.Input("SavedMean"); + const auto *saved_var = ctx.Input("SavedVariance"); + const auto *saved_mean_data = + saved_mean->template data>(); + const auto *saved_var_data = + saved_var->template data>(); + + size_t workspace_size = 0; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + auto reserve_space_size = reserve_space->memory_size(); + cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; + platform::ScopedActivationDescriptor scope_act_desc; + cudnnActivationDescriptor_t activation_desc_ = + scope_act_desc.descriptor(act_type); + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*xDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*dyDesc=*/data_desc_, + /*dzDesc=*/data_desc_, + /*dxDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/activation_desc_, + /*sizeInBytes=*/&workspace_size)); + + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationBackwardEx( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*alphaDataDiff=*/CudnnDataType::kOne(), + /*betaDataDiff=*/CudnnDataType::kZero(), + /*alphaParamDiff=*/CudnnDataType::kOne(), + /*betaParamDiff=*/CudnnDataType::kZero(), + /*xDesc=*/data_desc_, + /*xData=*/x->template data(), + /*yDesc=*/data_desc_, + /*yData=*/y->template data(), + /*dyDesc=*/data_desc_, + /*dyData=*/d_y->template data(), + /*dzDesc=*/data_desc_, + /*dzData=*/d_z->template data(), + /*dxDesc=*/data_desc_, + /*dxData=*/d_x->template data(), + /*dBnScaleBiasDesc=*/bn_param_desc_, + /*bnScaleData=*/scale->template data>(), + /*bnBiasData=*/bias->template data>(), + /*dBnScaleData=*/d_scale->template data>(), + /*dBnBiasData=*/d_bias->template data>(), + /*epsilon=*/epsilon, + /*savedMean=*/saved_mean_data, + /*savedInvVariance=*/saved_var_data, + /*activationDesmc=*/activation_desc_, + /*workspace=*/workspace_ptr, + /*workSpaceSizeInBytes=*/workspace_size, + /*reserveSpace=*/const_cast(reserve_space->template data()), + /*reserveSpaceSizeInBytes=*/reserve_space_size)); + + // clean when exit. + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + } +}; + +} // namespace operators +} // namespace paddle + +#if CUDNN_VERSION >= 7401 +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_CUDA_KERNEL( + fused_bn_add_activation, + ops::FusedBatchNormAddActKernel); +REGISTER_OP_CUDA_KERNEL(fused_bn_add_activation_grad, + ops::FusedBatchNormAddActGradKernel< + plat::CUDADeviceContext, plat::float16>); +#endif diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5c7df96e60dd89b74058ead837bb75555f3674ad --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/grad_op_desc_maker.h" +#include "paddle/fluid/framework/op_proto_maker.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/var_type_inference.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +class FusedBatchNormAddActOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedBatchNormAddActGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedBatchNormAddActOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +template +class FusedBatchNormAddActGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType(this->ForwardOpType() + "_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Z", this->Input("Z")); + op->SetInput("Y", this->Output("Y")); + op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); + + op->SetInput("Scale", this->Input("Scale")); + op->SetInput("Bias", this->Input("Bias")); + op->SetInput("SavedMean", this->Output("SavedMean")); + op->SetInput("SavedVariance", this->Output("SavedVariance")); + op->SetInput("ReserveSpace", this->Output("ReserveSpace")); + + op->SetAttrMap(this->Attrs()); + + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetOutput(framework::GradVarName("Z"), this->InputGrad("Z")); + op->SetOutput(framework::GradVarName("Scale"), this->InputGrad("Scale")); + op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); + } +}; + +class FusedBatchNormAddActOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map& GetInputOutputWithSameType() + const override { + static std::unordered_map m{{"X", /*->*/ "Y"}}; + return m; + } +}; + +template +class FusedBatchNormAddActKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +template +class FusedBatchNormAddActGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 712ef05d8631ac74b92795321202cb5590286e82..4865a02c5292ffb9d079d0711f0bf7d6e927c441 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -47,7 +47,9 @@ class GRUUnitKernel : public framework::OpKernel { else if (act_type == relu) ReluFunctor()(d, x, y); else - PADDLE_THROW("unsupported activation type"); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported activation type, only supports identity, sigmoid, tanh " + "and relu.")); } void Compute(const framework::ExecutionContext& context) const override { @@ -137,7 +139,9 @@ class GRUUnitGradKernel : public framework::OpKernel { else if (act_type == relu) ReluGradFunctor()(d, x, y, dy, dx); else - PADDLE_THROW("unsupported activation type"); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported activation type, only supports identity, sigmoid, tanh " + "and relu.")); } void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 1e99e22e12b2a23685dad742f175fd2b0684d334..e8a9ed878e9bd502b9bd7e7d82f574fb5740bb5d 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -104,12 +104,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto dim_x = ctx->GetInputDim("X"); auto interp_method = ctx->Attrs().Get("interp_method"); - PADDLE_ENFORCE( - "bilinear" == interp_method || "nearest" == interp_method || - "bicubic" == interp_method, - "Interpolation method can only be \"bilinear\" or \"nearest\" when " - "Input(X) dimension is 4, but got method = %s .", - interp_method); + PADDLE_ENFORCE_EQ("bilinear" == interp_method || "nearest" == interp_method || + "bicubic" == interp_method, + true, platform::errors::InvalidArgument( + "Interpolation method can only be \"bilinear\" " + "or \"nearest\" or \"bicubic\" when " + "Input(X) dimension is 4, but got method is %s.", + interp_method)); const DataLayout data_layout = framework::StringToDataLayout( ctx->Attrs().Get("data_layout")); @@ -169,13 +170,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto out_size_dim = ctx->GetInputDim("OutSize"); PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( - "OutSize's dimension size must be 1, but got dimension = %d .", - out_size_dim.size())); + platform::errors::InvalidArgument("OutSize's dimension size must be 1, " + "but got dimension size is %d .", + out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 2, platform::errors::InvalidArgument( - "OutSize's dim[0] must be 2, but got dimention = %d .", + "OutSize's dimension[0] must be 2, but got dimension[0] is %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; @@ -264,12 +265,15 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { if (ctx->HasInput("OutSize") && ctx->IsRuntime()) { auto out_size_dim = ctx->GetInputDim("OutSize"); - PADDLE_ENFORCE_EQ(out_size_dim.size(), 1, - "OutSize's dimension size must be 1, but got size =%d .", - out_size_dim.size()); + PADDLE_ENFORCE_EQ( + out_size_dim.size(), 1, + platform::errors::InvalidArgument( + "OutSize's dimension size must be 1, but got size is %d.", + out_size_dim.size())); PADDLE_ENFORCE_EQ(out_size_dim[0], 3, - "OutSize's dim[0] must be 3, but got size = %d .", - out_size_dim[0]); + platform::errors::InvalidArgument( + "OutSize's dim[0] must be 3, but got size is %d.", + out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; } @@ -289,10 +293,8 @@ class InterpolateOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of InterpolateOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of InterpolationOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Interpolate"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Interpolate"); auto dim_x = ctx->GetInputDim("X"); // NCHW format PADDLE_ENFORCE( @@ -534,9 +536,10 @@ class InterpolateOpGrad : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - "Input(Out@GRAD) should not be null"); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "InterpolateGrad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "InterpolateGrad"); + auto dim_x = ctx->GetInputDim("X"); if (ctx->HasOutput(framework::GradVarName("X"))) { ctx->SetOutputDim(framework::GradVarName("X"), dim_x); diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index 667c6e892956e29478f1401c3cb2622713433037..7cc07383bfa5f67a2404b220cb481d9017b40fd8 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/linspace_op.h" +#include namespace paddle { namespace operators { @@ -21,7 +22,7 @@ class LinspaceOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { + void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace"); @@ -50,11 +51,17 @@ class LinspaceOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::proto::VarType::Type(ctx.Attr("dtype")), ctx.GetPlace()); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const override { + return expected_kernel_type; + } }; class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index c9b852cfc05127a4bbf00ea23a751c59dc2d109d..87d914aa79753fbdc9d859c43bbf749b3ddf95cf 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -44,8 +44,10 @@ class MergeLoDTensorOp : public framework::OperatorBase { scope.FindVar(Output("Out"))->GetMutable(); auto level = static_cast(Attr("level")); - PADDLE_ENFORCE(in_true.numel() || in_false.numel(), - "Input(InTrue) or Input(InFalse) should be initialized."); + PADDLE_ENFORCE_EQ( + in_true.numel() || in_false.numel(), true, + platform::errors::InvalidArgument( + "Input(InTrue) or Input(InFalse) should be initialized.")); auto &mask_dim = mask.dims(); std::unique_ptr cpu_mask{new framework::LoDTensor()}; @@ -56,7 +58,9 @@ class MergeLoDTensorOp : public framework::OperatorBase { framework::TensorCopy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else - PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Not supported GPU, Please recompile or reinstall paddle with CUDA " + "support.")); #endif } auto *mask_data = cpu_mask->data(); @@ -109,7 +113,11 @@ class MergeLoDTensorOp : public framework::OperatorBase { size_t start_offset = lod_and_offset.second.first; size_t end_offset = lod_and_offset.second.second; - PADDLE_ENFORCE_GE(end_offset, start_offset); + PADDLE_ENFORCE_GE(end_offset, start_offset, + platform::errors::InvalidArgument( + "The end offset less than start offset, end offset " + "is %d, start offset is %d.", + end_offset, start_offset)); size_t len = end_offset - start_offset; if (len == 0) { continue; @@ -189,22 +197,24 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { "merge_lod_tensor"); auto mask_dim = context->GetInputDim("Mask"); PADDLE_ENFORCE_EQ(mask_dim.size(), 2, - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor and " - "the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), "].\n"); + platform::errors::InvalidArgument( + "If you are using IfElse OP:" + "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " + "ie.true_block():\n out_1 = ie.input(x)\n\n" + "Please ensure that the cond is a 2-D tensor and " + "the second dim size of cond is 1. " + "But now the cond's shape is [%s].\n", + mask_dim)); if (context->IsRuntime() || mask_dim[1] > 0) { PADDLE_ENFORCE_EQ(mask_dim[1], 1, - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor " - "and the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), "].\n"); + platform::errors::InvalidArgument( + "If you are using IfElse OP:" + "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " + "ie.true_block():\n out_1 = ie.input(x)\n\n" + "Please ensure that the cond is a 2-D tensor " + "and the second dim size of cond is 1. " + "But now the cond's shape is [%s].\n", + mask_dim)); } context->SetOutputDim("Out", context->GetInputDim("InTrue")); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 5c6c38da92808f05c90e7dad2482e7c7364a1f80..eb41d21e09218b203f887d8fd812d46dc8367c71 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -23,46 +23,54 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Param"), - "Input(Param) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Grad"), - "Input(Grad) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Moment"), - "Input(Moment) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE( - ctx->HasInput("LearningRate"), - "Input(LearningRate) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Param").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Grad").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front()); - - PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), - "Output(ParamOut) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), - "Output(MomentOut) of DecayedAdagradOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("Param"), "Input", "Param", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("Grad"), "Input", "Grad", "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("Moment"), "Input", "Moment", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("LearningRate"), "Input", "LearningRate", + "DecayedAdagradOp"); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Param").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->Inputs("Param").front(), + ctx->GetInputsVarType("Param").front())); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Grad").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->Inputs("Grad").front(), + ctx->GetInputsVarType("Grad").front())); + + OP_INOUT_CHECK(ctx->HasOutput("ParamOut"), "Output", "ParamOut", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasOutput("MomentOut"), "Output", "MomentOut", + "DecayedAdagradOp"); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, - "Maybe the Input variable LearningRate has not " - "been initialized. You may need to confirm " - "if you put exe.run(startup_program) " - "after optimizer.minimize function."); + platform::errors::InvalidArgument( + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "LearningRate should have one element"); + platform::errors::InvalidArgument( + "LearningRate should have one element")); auto param_dims = ctx->GetInputDim("Param"); - PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Grad"), - "Param and Grad input of DecayedAdagradOp should have " - "the same dimension."); - PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Moment"), - "Param and Moment input of DecayedAdagradOp should have " - "the same dimension."); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + platform::errors::InvalidArgument( + "Param and Grad input of DecayedAdagradOp should have " + "the same dimension.")); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment"), + platform::errors::InvalidArgument( + "Param and Moment input of DecayedAdagradOp should have " + "the same dimension.")); ctx->SetOutputDim("ParamOut", param_dims); ctx->SetOutputDim("MomentOut", param_dims); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h index 279edfb015c26848d4078975a40bdca650bdc6a0..f264ebf8a32636a1e2076f8721b3c95d65f5382b 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h @@ -24,17 +24,19 @@ class DecayedAdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto* param_var = ctx.InputVar("Param"); - PADDLE_ENFORCE(param_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Param").front(), - framework::ToTypeName(param_var->Type())); + PADDLE_ENFORCE_EQ(param_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Param").front(), + framework::ToTypeName(param_var->Type()))); const auto* grad_var = ctx.InputVar("Grad"); - PADDLE_ENFORCE(grad_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Grad").front(), - framework::ToTypeName(grad_var->Type())); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); auto param_out_tensor = ctx.Output("ParamOut"); auto moment_out_tensor = ctx.Output("MomentOut"); diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.h b/paddle/fluid/operators/optimizers/lars_momentum_op.h index b579b5143ddbe6221738f9864f13fb7bea4ac509..55775bc08fb5ebc31cd231b8088a9798561fabfc 100755 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.h +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.h @@ -30,7 +30,12 @@ class LarsMomentumOpKernel : public framework::OpKernel { auto learning_rate = ctx.Input("LearningRate"); auto* grad_var = ctx.InputVar("Grad"); // only support dense for now. - PADDLE_ENFORCE_EQ(grad_var->IsType(), true); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); auto grad = ctx.Input("Grad"); param_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index f20bada8ab288fe74fd8ca82a73522a22b234191..142b00b4de66caaedda5c4f0723d31e3a819b8a4 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -60,20 +60,33 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(src_stride_numel.size(), dst_stride_numel.size(), - "src and dst tensor should have the same dims size."); + platform::errors::InvalidArgument( + "Source and destination tensor should have the same " + "dimension size, but source tensor dimension size is " + "%u, destination tensor size is %u.", + src_stride_numel.size(), dst_stride_numel.size())); for (int64_t i = 0; i < axis; ++i) { if (i < axis) { - PADDLE_ENFORCE_EQ(src_stride_numel[i] / src_stride_numel[axis], - dst_stride_numel[i] / dst_stride_numel[axis], - "src and dst should have the same elements " - "except the specified axis."); + PADDLE_ENFORCE_EQ( + src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis], + platform::errors::InvalidArgument( + "Source and destination tensor should have the same number of " + "elements except the specified axis, but the source elements " + "number is %d, destination elements number is %d.", + src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis])); } else if (i == axis) { continue; } else { - PADDLE_ENFORCE_EQ(src_stride_numel[i], dst_stride_numel[i], - "src and dst should have the same elements " - "except the specified axis."); + PADDLE_ENFORCE_EQ( + src_stride_numel[i], dst_stride_numel[i], + platform::errors::InvalidArgument( + "Source and destination tensor should have the same number of " + "elements except the specified axis, but the source elements " + "number is %d, destination elements number is %d.", + src_stride_numel[i], dst_stride_numel[i])); } } @@ -90,7 +103,8 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, memory::Copy(gpu_place, dst + i * dst_after, gpu_place, src + i * src_after, sizeof(T) * size, cuda_ctx.stream()); #else - PADDLE_THROW("Paddle is not compiled with GPU"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Paddle is not compiled with GPU.")); #endif } } diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index f8a29a52d7a3d9332b9dcb8189dfd7c1df902faa..db8b2c30501bd7f291b23728a26dcd3ea27e0ec5 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -78,21 +78,35 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { platform::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, - "The rank of X(Input) can't be less than 2."); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::InvalidArgument( + "The rank of X(Input) can't be less than 2, but received rank is %u.", + x_dims.size())); auto w_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(w_dims.size(), 2, "W should be 2-D tensor"); + PADDLE_ENFORCE_EQ( + w_dims.size(), 2, + platform::errors::InvalidArgument( + "Input W should be a 2-D tensor, but its actual dimension is %u.", + w_dims.size())); int output_channel = ctx->Attrs().Get("OutputChannel"); int input_channel = ctx->Attrs().Get("InputChannel"); int kernel_h = ctx->Attrs().Get("KernelH"); int kernel_w = ctx->Attrs().Get("KernelW"); - PADDLE_ENFORCE_EQ(w_dims[0], output_channel, - "W dim[0] should be equal to OutputChannel"); + PADDLE_ENFORCE_EQ( + w_dims[0], output_channel, + platform::errors::InvalidArgument( + "Input W's dimension[0] should be equal to OutputChannel, the " + "dimension[0] is %d, OutputChannel is %d.", + w_dims[0], output_channel)); PADDLE_ENFORCE_EQ( w_dims[1], input_channel * kernel_h * kernel_w, - "W dim[1] should be equal to InputChannel * StrideH * StrideW"); + platform::errors::InvalidArgument( + "Input W's dimension[1] should be equal to InputChannel * StrideH * " + "StrideW, the dimension[1] is %d, expected value is %d.", + w_dims[1], input_channel * kernel_h * kernel_w)); if (ctx->IsRuntime()) { framework::Variable* x_var = @@ -103,10 +117,14 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { platform::errors::InvalidArgument("The Input(X) Tensor of VarConv2dOP " "does not contain LoD information.")); - PADDLE_ENFORCE_GE(x_lod.size(), 1, "The Input(X)'s lod info is corrupted."); - PADDLE_ENFORCE_EQ( - x_dims[0], static_cast(x_lod[0].back()), - "The Input(X)'s lod info mismatches the actual tensor shape."); + PADDLE_ENFORCE_GE(x_lod.size(), 1, + platform::errors::InvalidArgument( + "The Input(X)'s lod info is corrupted.")); + PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod[0].back()), + platform::errors::InvalidArgument( + "The Input(X)'s lod info mismatches the actual " + "tensor shape, input lod is %s, tensor shape is %s.", + x_lod, x_dims)); framework::Variable* row_var = BOOST_GET(framework::Variable*, ctx->GetInputVarPtrs("ROW")[0]); diff --git a/paddle/fluid/platform/cuda_profiler.h b/paddle/fluid/platform/cuda_profiler.h index 957bdf1e698d0aedb86c5b0cb732ab545c260bcc..a9382f2c8adcb18e320ef44086a312f89c03ad09 100644 --- a/paddle/fluid/platform/cuda_profiler.h +++ b/paddle/fluid/platform/cuda_profiler.h @@ -24,7 +24,11 @@ namespace platform { void CudaProfilerInit(std::string output_file, std::string output_mode, std::string config_file) { - PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv"); + PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv", + platform::errors::InvalidArgument( + "Unsupported cuda profiler output mode, expect `kvp` or " + "`csv`, but received `%s`.", + output_mode)); cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; PADDLE_ENFORCE_CUDA_SUCCESS( cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode)); diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index 1166dc5e4ad93fa23ef00623de6777b78b56ea09..4c59fe5e9bae4b751d87b0d2feb1ea0bd02bcf1d 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -30,6 +30,10 @@ CUDNN_DNN_ROUTINE_EACH_R2(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DEFINE_WRAP); #endif +#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8 +CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DEFINE_WRAP); +#endif + #ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R4 CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP); #endif @@ -54,6 +58,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP); #endif +#ifdef CUDNN_DNN_ROUTINE_EACH_R8 +CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP); +#endif + bool HasCUDNN() { std::call_once(cudnn_dso_flag, []() { cudnn_dso_handle = GetCUDNNDsoHandle(); }); diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index fba41417648ba606727d00e71f48766f47479989..dd0a2e1968501a3375ad5691b27fa0c922cf2ab4 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -134,6 +134,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(__macro) \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm); \ __macro(cudnnGetConvolutionForwardAlgorithm); \ + __macro(cudnnGetConvolutionBackwardDataAlgorithm); \ __macro(cudnnSetRNNDescriptor); CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) #endif diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 489dd198876204486fc94518fbef0c806d0543d4..da9900e2b271d08394cbc5e397f31b84e3b4d156 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -649,61 +649,47 @@ void BindImperative(py::module *m_ptr) { return self.NewVarBase(tensor.place(), false); }, py::return_value_policy::copy, R"DOC( - **Notes**: - **This API is ONLY available in Dygraph mode** - Returns a new Variable, detached from the current graph. - - Returns: - ( :ref:`api_guide_Variable_en` | dtype is same as current Variable): The detached Variable. + Returns a new Tensor, detached from the current graph. + Returns: The detached Tensor. Examples: .. code-block:: python - import paddle.fluid as fluid - from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import Linear - import numpy as np - - data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') - with fluid.dygraph.guard(): - linear = Linear(32, 64) - data = to_variable(data) - x = linear(data) - y = x.detach() + import paddle + paddle.disable_static() + linear = Linear(32, 64) + data = paddle.uniform(shape=[30, 10, 32], -1, 1) + x = linear(data) + y = x.detach() )DOC") .def("clear_gradient", &imperative::VarBase::ClearGradient, R"DOC( - **Notes**: - **1. This API is ONLY available in Dygraph mode** - - **2. Use it only Variable has gradient, normally we use this for Parameters since other temporal Variable will be deleted by Python's GC** + Only for Tensor that has gradient, normally we use this for Parameters since other temporary Tensor doesen't has gradient. - Clear (set to ``0`` ) the Gradient of Current Variable + The Gradient of current Tensor will be set to ``0`` . Returns: None Examples: .. code-block:: python - import paddle.fluid as fluid - import numpy as np - - x = np.ones([2, 2], np.float32) - with fluid.dygraph.guard(): - inputs2 = [] - for _ in range(10): - tmp = fluid.dygraph.base.to_variable(x) - tmp.stop_gradient=False - inputs2.append(tmp) - ret2 = fluid.layers.sums(inputs2) - loss2 = fluid.layers.reduce_sum(ret2) - loss2.backward() - print(loss2.gradient()) - loss2.clear_gradient() - print("After clear {}".format(loss2.gradient())) + import paddle + paddle.disable_static() + + inputs = [] + for _ in range(10): + tmp = paddle.ones([2, 2]) + tmp.stop_gradient=False + inputs.append(tmp) + ret = paddle.sums(inputs2) + loss = paddle.reduce_sum(ret) + loss.backward() + print("Before clear_gradient {}".format(loss.grad)) + loss.clear_gradient() + print("After clear_gradient {}".format(loss.grad)) )DOC") .def("_run_backward", [](imperative::VarBase &self, const imperative::Tracer &tracer, diff --git a/paddle/fluid/train/CMakeLists.txt b/paddle/fluid/train/CMakeLists.txt index 235d92ac4f9e88947cea04425b0916b8a0290979..d587081fbac8a27df18bdacba3d94f6adcd3b171 100644 --- a/paddle/fluid/train/CMakeLists.txt +++ b/paddle/fluid/train/CMakeLists.txt @@ -26,7 +26,7 @@ function(train_test TARGET_NAME) ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.train.model/) endif() set_tests_properties(test_train_${TARGET_NAME}${arg} - PROPERTIES DEPENDS test_${TARGET_NAME}) + PROPERTIES FIXTURES_REQUIRED test_${TARGET_NAME}_infer_model) if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_train_${TARGET_NAME}${arg} PROPERTIES TIMEOUT 150) diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc index 1087f5672459506cc7b824127cd822c0df7ba566..1ef98720f83697715c05e868177faba489fd8760 100644 --- a/paddle/fluid/train/demo/demo_trainer.cc +++ b/paddle/fluid/train/demo/demo_trainer.cc @@ -29,7 +29,9 @@ namespace train { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -70,7 +72,8 @@ int main() { } } - PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + PADDLE_ENFORCE_NE(loss_name, "", + platform::errors::NotFound("Loss name is not found.")); // init all parameters executor.Run(*startup_program, &scope, 0); diff --git a/paddle/fluid/train/imdb_demo/demo_trainer.cc b/paddle/fluid/train/imdb_demo/demo_trainer.cc index d45edd563f03d7a1b156d063d5e7296290d0eaba..a08069a57ca824f307b4bf8836237f573ab3c429 100644 --- a/paddle/fluid/train/imdb_demo/demo_trainer.cc +++ b/paddle/fluid/train/imdb_demo/demo_trainer.cc @@ -45,7 +45,9 @@ namespace train { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -98,7 +100,11 @@ int main(int argc, char* argv[]) { file_vec.push_back(filename); } } - PADDLE_ENFORCE_GE(file_vec.size(), 1, "At least one file to train"); + PADDLE_ENFORCE_GE( + file_vec.size(), 1, + platform::errors::InvalidArgument( + "At least one file to train, but received number of file is %d.", + file_vec.size())); paddle::framework::InitDevices(false); const auto cpu_place = paddle::platform::CPUPlace(); paddle::framework::Executor executor(cpu_place); @@ -148,7 +154,9 @@ int main(int argc, char* argv[]) { const std::vector readers = dataset_ptr->GetReaders(); PADDLE_ENFORCE_EQ(readers.size(), 1, - "readers num should be equal to thread num"); + platform::errors::InvalidArgument( + "Readers num(%d) should be equal to thread num(1).", + readers.size())); readers[0]->SetPlace(paddle::platform::CPUPlace()); const std::vector& input_feed_names = readers[0]->GetUseSlotAlias(); diff --git a/paddle/fluid/train/test_train_recognize_digits.cc b/paddle/fluid/train/test_train_recognize_digits.cc index 45c438e8925b4e0a88e61ad509b88cd6226773a4..e7b698e1a34e267e392d696b67b92cd2e8c23f3b 100644 --- a/paddle/fluid/train/test_train_recognize_digits.cc +++ b/paddle/fluid/train/test_train_recognize_digits.cc @@ -51,7 +51,8 @@ void Train() { } } - PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + PADDLE_ENFORCE_NE(loss_name, "", + platform::errors::NotFound("Loss name is not found.")); // prepare data auto x_var = scope.Var("img"); diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 60e4496bc545759173b68efbf85922efe8976fa4..524c086c07925c880dfb46a70a1f930686bae867 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -26,6 +26,7 @@ wmic process where name="op_function_generator.exe" call terminate 2>NUL rem ------initialize common variable------ if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" if not defined BRANCH set BRANCH=develop +if not defined TENSORRT_ROOT set TENSORRT_ROOT="C:/TensorRT-5.1.5.0" if not defined WITH_MKL set WITH_MKL=ON if not defined WITH_GPU set WITH_GPU=OFF if not defined WITH_AVX set WITH_AVX=ON @@ -33,9 +34,11 @@ if not defined WITH_TESTING set WITH_TESTING=ON if not defined WITH_PYTHON set WITH_PYTHON=ON if not defined ON_INFER set ON_INFER=ON if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON +if not defined WITH_STATIC_LIB set WITH_STATIC_LIB=ON if not defined WITH_CACHE set WITH_CACHE=ON if not defined WITH_TPCACHE set WITH_TPCACHE=ON + rem -------set cache build work directory----------- if "%WITH_CACHE%"=="OFF" ( rmdir build /s/q @@ -99,6 +102,7 @@ set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 :: set maximum cache size to 20G clcache.exe -M 21474836480 + rem ------set cache third_party------ set cache_dir=%work_dir:Paddle=cache% dir %cache_dir% @@ -138,6 +142,7 @@ exit /b 1 :CASE_wincheck_mkl set WITH_MKL=ON set WITH_GPU=OFF +set MSVC_STATIC_CRT=ON call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error @@ -149,11 +154,13 @@ goto:success :CASE_wincheck_openblas set WITH_MKL=OFF set WITH_GPU=ON +set MSVC_STATIC_CRT=OFF rem Temporarily turn off WITH_INFERENCE_API_TEST on GPU due to compile hang set WITH_INFERENCE_API_TEST=OFF call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error +:: call :test_inference || goto test_inference_error goto:success rem "Other configurations are added here" @@ -172,12 +179,14 @@ set start=%start:~4,10% echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ -DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ -DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% goto:eof :cmake_error @@ -282,7 +291,9 @@ dir %THIRD_PARTY_PATH:/=\%\install\mklml\lib dir %THIRD_PARTY_PATH:/=\%\install\mkldnn\bin dir %THIRD_PARTY_PATH:/=\%\install\warpctc\bin -set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% +set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;^ +%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;^ +%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% ctest.exe --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4 goto:eof @@ -305,7 +316,7 @@ set end=%end:~4,10% call :timestamp "%start%" "%end%" "TestCases Total" cd %work_dir%\paddle\fluid\inference\api\demo_ci -%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo +%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo %TENSORRT_ROOT%/include %TENSORRT_ROOT%/lib %MSVC_STATIC_CRT% goto:eof :test_inference_error diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 81d5908ccd4f859e36ae98a157fac2d37214c271..f66f013e4dbaadd534d6859b7ba6530779c82a3b 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -605,7 +605,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): """ if not self._role_is_generated: self._generate_role() - return len(self._get_pserver_endpoints()) + return len(self._get_pserver_endpoints( + )) if self._get_pserver_endpoints() is not None else 0 def _node_num(self): """ diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index ae5c53b8a37c4958e58ed5b09ce7cc8194f1ff52..6dd4661f00062f55bb834bbee50daf1924a0c87a 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -220,12 +220,12 @@ class ParameterServerRuntime(RuntimeBase): else: model_dirname = None - if self.role_maker._is_heter_worker(): - self._init_worker() - executor = self._get_executor() executor.run(fluid.default_startup_program()) + if self.role_maker._is_heter_worker(): + self._init_worker() + if self.role_maker._is_heter_worker(): return diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index 7b564b3f837c001673bdd272ba60edf31cde21fb..ac6493b1c2969a8c3319bc8d29983b0ccc3a67d9 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -45,6 +45,7 @@ from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from paddle.fluid import core +from paddle.fluid.param_attr import ParamAttr from paddle.fluid.entry_attr import ProbabilityEntry, CountFilterEntry from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_ @@ -57,7 +58,7 @@ __all__ = [ 'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', 'partial_concat', 'sparse_embedding', 'partial_sum', 'tdm_child', 'rank_attention', 'tdm_sampler', 'batch_fc', '_pull_box_extended_sparse', 'bilateral_slice', - 'correlation' + 'correlation', 'fused_bn_add_act' ] @@ -1625,3 +1626,191 @@ def correlation(x, }, outputs={"Output": output}) return output + + +def fused_bn_add_act(x, + y, + momentum=0.9, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + moving_mean_name=None, + moving_variance_name=None, + act=None, + name=None): + """ + This Op performs batch norm on input x, and adds the result to input y. Then + it performs activation on the sum. The data format of inputs must be NHWC + `[batch, in_height, in_width, in_channels]`. + + Args: + x(Tensor): The rank of input tensor can be 2, 3, 4, 5. The data type + is float16. + y(Tensor): The rank of input tensor can be 2, 3, 4, 5. The data type + is float16. + momentum(float|Tensor, optional): The value used for the moving_mean and + moving_var computation. This should be a float number or a tensor with + shape [1] and data type as float32. The updated formula is: + :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` + :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` + Default is 0.9. + epsilon(float, optional): A value added to the denominator for + numerical stability. Default is 1e-5. + param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale` + of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as param_attr, the name of scale can be set in ParamAttr. + If the Initializer of the param_attr is not set, the parameter is initialized + with Xavier. Default: None. + bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm. + If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. + If the Initializer of the bias_attr is not set, the bias is initialized zero. + Default: None. + moving_mean_name(str, optional): The name of moving_mean which store the global Mean. If it + is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm + will save global mean with the string. + moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. + If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm + will save global variance with the string. + act(string, optional): Activation type, linear|relu|prelu|... + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + def build_program(main_program, startup_program): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm( + input=conv1_1, + act=None, + data_layout='NHWC') + fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) + prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + iters = 5 + batch_size = 16 + support_gpu = fluid.is_compiled_with_cuda() + if support_gpu: + main_program = fluid.Program() + startup_program = fluid.Program() + place = fluid.CUDAPlace(0) + x, y, loss = build_program(main_program, startup_program) + + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + exe = fluid.Executor(place) + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, feed=feeder.feed(data), fetch_list=[loss]) + """ + helper = LayerHelper('fused_bn_add_act', **locals()) + + check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], + 'fused_bn_add_act') + check_variable_and_dtype(y, 'input', ['float16', 'float32', 'float64'], + 'fused_bn_add_act') + bn_param_dtype = core.VarDesc.VarType.FP32 + + x_shape = x.shape + channel_num = x_shape[-1] + param_shape = [channel_num] + + # create parameter + scale = helper.create_parameter( + attr=helper.param_attr, + shape=param_shape, + dtype=bn_param_dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter( + attr=helper.bias_attr, + shape=param_shape, + dtype=bn_param_dtype, + is_bias=True) + mean = helper.create_parameter( + attr=ParamAttr( + name=moving_mean_name, initializer=Constant(0.0), trainable=False), + shape=param_shape, + dtype=bn_param_dtype) + mean.stop_gradient = True + variance = helper.create_parameter( + attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False), + shape=param_shape, + dtype=bn_param_dtype) + variance.stop_gradient = True + + # create output + # mean and mean_out share the same memory + mean_out = mean + # variance and variance out share the same memory + variance_out = variance + saved_mean = helper.create_variable_for_type_inference( + dtype=bn_param_dtype, stop_gradient=True) + saved_variance = helper.create_variable_for_type_inference( + dtype=bn_param_dtype, stop_gradient=True) + reserve_space = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.FP16, stop_gradient=True) + batch_norm_out = helper.create_variable_for_type_inference( + core.VarDesc.VarType.FP16) + + inputs = { + "X": x, + "Z": y, + "Scale": scale, + "Bias": bias, + } + attrs = {"epsilon": epsilon, 'momentum': momentum} + + outputs = { + "Y": batch_norm_out, + "MeanOut": mean_out, + "VarianceOut": variance_out, + "SavedMean": saved_mean, + "SavedVariance": saved_variance, + "ReserveSpace": reserve_space + } + + helper.append_op( + type="fused_bn_add_activation", + inputs=inputs, + outputs=outputs, + attrs=attrs) + + return batch_norm_out diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index 1f301b7148d005d4e3d5d272fd78f78af6dc1e6a..a9f080c514dff078b0068bce262fa177fd0b0db2 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -135,6 +135,7 @@ gray_list = { 'get_tensor_from_selected_rows', 'sign', 'cast', + 'fused_bn_add_activation', } ''' # The set of ops that don't support fp16 calculation diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 0b142ff33de55f36410eb9c23cb75210fc9d6321..0ff166d8dc89ac79c36343df9bc379cb171c36fd 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -69,8 +69,10 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): ] for in_name in op.input_names: - if src_dtype == core.VarDesc.VarType.FP32 and op.type == 'batch_norm': - if in_name != 'X': + if src_dtype == core.VarDesc.VarType.FP32 and op.type in [ + 'batch_norm', 'fused_bn_add_activation' + ]: + if in_name not in {'X', 'Z'}: continue for in_var_name in op.input(in_name): in_var = block.var(in_var_name) @@ -102,7 +104,8 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): op._set_attr('in_dtype', dest_dtype) if src_dtype == core.VarDesc.VarType.FP32 and dest_dtype == core.VarDesc.VarType.FP16: for out_name in op.output_names: - if op.type == 'batch_norm' and out_name != 'Y': + if op.type in ['batch_norm', 'fused_bn_add_activation' + ] and out_name != 'Y': continue for out_var_name in op.output(out_name): out_var = block.var(out_var_name) diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 3aa7b9dfc262810686319819f717f3cfd06b5e50..68206f62860852b1124b65da0e4124f60a2a8051 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -17,8 +17,7 @@ from __future__ import print_function from .. import core from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator from ..layers.layer_function_generator import OpProtoHolder -from ..layers import common_methods -from . import to_variable, no_grad +from . import no_grad import numpy as np import six @@ -53,47 +52,25 @@ def monkey_patch_math_varbase(): def astype(self, dtype): """ - **Notes**: - **The variable must be a** :ref:`api_fluid_Tensor` - Cast a variable to a specified data type. + Cast a Tensor to a specified data type. Args: - - self(Variable): The source variable - - dtype: The target data type + dtype: The target data type. Returns: - Variable: Variable with new dtype + Tensor: a new Tensor with target dtype Examples: - In Static Graph Mode: - - .. code-block:: python - - import paddle.fluid as fluid - - startup_prog = fluid.Program() - main_prog = fluid.Program() - with fluid.program_guard(startup_prog, main_prog): - original_variable = fluid.data(name = "new_variable", shape=[2,2], dtype='float32') - new_variable = original_variable.astype('int64') - print("new var's dtype is: {}".format(new_variable.dtype)) - - In Dygraph Mode: - .. code-block:: python - import paddle.fluid as fluid + import paddle import numpy as np - x = np.ones([2, 2], np.float32) - with fluid.dygraph.guard(): - original_variable = fluid.dygraph.to_variable(x) - print("original var's dtype is: {}, numpy dtype is {}".format(original_variable.dtype, original_variable.numpy().dtype)) - new_variable = original_variable.astype('int64') - print("new var's dtype is: {}, numpy dtype is {}".format(new_variable.dtype, new_variable.numpy().dtype)) + original_tensor = paddle.ones([2, 2]) + print("original tensor's dtype is: {}".format(original_tensor.dtype)) + new_tensor = original_tensor.astype('float32') + print("new tensor's dtype is: {}".format(new_tensor.dtype)) """ if not isinstance(dtype, core.VarDesc.VarType): @@ -147,6 +124,10 @@ def monkey_patch_math_varbase(): def _ndim_(var): return len(var.shape) + @property + def _size_(var): + return np.prod(var.shape) + def _scalar_add_(var, value): return _scalar_elementwise_op_(var, 1.0, value) @@ -208,7 +189,6 @@ def monkey_patch_math_varbase(): __impl__.__doc__ = """ {0} Args: - self(Tensor): left hand Tensor other_var(Tensor|float|int): right hand Tensor Returns: @@ -217,23 +197,7 @@ def monkey_patch_math_varbase(): __impl__.__name__ = method_name return __impl__ - # Todo(zhouwei): implement dygraph template to adapt to any function, receive('op_type', 'arg_template') - # Such as _method_creator_('addmm', 'x, y, alpha=1.0, beta=1.0, name=None'). It can reduce call time. - def _method_creator_(op_type, arg_template=None): - def __impl__(self): - op = getattr(core.ops, op_type) - return op(self) - - __impl__.__doc__ = """ - - See paddle.{}""".format(op_type) - __impl__.__name__ = op_type - - return __impl__ - varbase_methods = [ - # Type1: From custom fun or lambda - ## b=-a ('__neg__', _neg_), ('__float__', _float_), ('__long__', _long_), @@ -244,8 +208,7 @@ def monkey_patch_math_varbase(): ('dim', lambda x: len(x.shape)), ('ndimension', lambda x: len(x.shape)), ('ndim', _ndim_), - ('size', lambda x: x.shape), - # Type2: From Template that create core.ops automatically. It's recommended. + ('size', _size_), ('__add__', _binary_creator_('__add__', 'elementwise_add', False, _scalar_add_)), ## a+b == b+a. Do not need to reverse explicitly @@ -283,31 +246,7 @@ def monkey_patch_math_varbase(): ('__le__', _binary_creator_('__le__', 'less_equal', False, None)), ('__gt__', _binary_creator_('__gt__', 'greater_than', False, None)), ('__ge__', _binary_creator_('__ge__', 'greater_equal', False, None)), - ('__array_ufunc__', None), - ('sigmoid', _method_creator_('sigmoid', 'name=None')), - ('log_sigmoid', _method_creator_('logsigmoid', 'name=None')), - ('exp', _method_creator_('exp', 'name=None')), - ('tanh', _method_creator_('tanh', 'name=None')), - ('atan', _method_creator_('atan', 'name=None')), - ('tanh_shrink', _method_creator_('tanh_shrink', 'name=None')), - ('sqrt', _method_creator_('sqrt', 'name=None')), - ('rsqrt', _method_creator_('rsqrt', 'name=None')), - ('abs', _method_creator_('abs', 'name=None')), - ('ceil', _method_creator_('ceil', 'name=None')), - ('floor', _method_creator_('floor', 'name=None')), - ('cos', _method_creator_('cos', 'name=None')), - ('acos', _method_creator_('acos', 'name=None')), - ('asin', _method_creator_('asin', 'name=None')), - ('sin', _method_creator_('sin', 'name=None')), - ('sinh', _method_creator_('sinh', 'name=None')), - ('cosh', _method_creator_('cosh', 'name=None')), - ('round', _method_creator_('round', 'name=None')), - ('reciprocal', _method_creator_('reciprocal', 'name=None')), - ('square', _method_creator_('square', 'name=None')), - ('softplus', _method_creator_('softplus', 'name=None')), - ('softsign', _method_creator_('softsign', 'name=None')), - # Type3: Form module 'paddle.tensor' defaultly. - # It's not a goodway, because it will increase call time. + ('__array_ufunc__', None) ] global _already_patch_varbase @@ -318,7 +257,15 @@ def monkey_patch_math_varbase(): setattr(core.VarBase, method_name, method_impl) else: import paddle.tensor - for method_name in common_methods: + # Tensor method from module paddle.tensor + tensor_methods = paddle.tensor.linalg.__all__ + \ + paddle.tensor.math.__all__ + \ + paddle.tensor.logic.__all__ + \ + paddle.tensor.manipulation.__all__ + \ + paddle.tensor.search.__all__ + \ + paddle.tensor.stat.__all__ + \ + paddle.tensor.attribute.__all__ + for method_name in tensor_methods: if hasattr(core.VarBase, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) if method_impl: setattr(core.VarBase, method_name, method_impl) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py index 236cb458be4c6a07f768761b41464e64d4d53f77..e556a98ed7504b199624deeac10ea594efa269b4 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py @@ -191,12 +191,14 @@ class FleetTranspiler(Fleet): self._communicator = Communicator( trainer_config.mode, kwargs, trainer_config.get_communicator_flags()) + self._communicator.init_with_ctx(send_ctx, recv_ctx) if not self._communicator.is_running(): self._communicator.start() else: - warnings.warn("communicator has been initialized, skip") + raise ValueError( + "Communicator can only be inited once, please check") def init_worker(self): """ diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py index 05deff10a2e1c914e9725c7d8697a704db6e7e42..a60c4e149f582e4f364910611d18cda5fbca4f07 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py @@ -624,6 +624,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): value_dims = [] grad = None opt_idx = -1 + fuse = False for op in block.ops: opt_idx += 1 @@ -631,6 +632,9 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): if op.type not in opt_value_map.keys(): continue + if op.type in ["sgd", "adam"]: + fuse = True + grad = main_program.global_block().vars[op.input("Grad")[0]] for value in opt_value_map[op.type]: @@ -644,7 +648,67 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): if value_names: break - return grad, opt_idx, value_names, value_dims, acture_names + return grad, opt_idx, value_names, value_dims, acture_names, fuse + + def add_fuse_large_scale_op(block, global_block, table_name, value_names, + acture_names, grad, is_entry, opt_idx): + + op = block.ops[opt_idx] + + if op.type == "sgd": + grad = main_program.global_block().vars[op.input("Grad")[0]] + lr = main_program.global_block().vars[op.input("LearningRate")[0]] + + block._insert_op( + opt_idx, + type="lookup_sparse_table_fuse_sgd", + inputs={"Grad": grad, + "LearningRate": lr}, + attrs={ + "is_entry": is_entry, + "tablename": table_name, + "value_names": value_names + }) + + elif op.type == "adam": + grad = main_program.global_block().vars[op.input("Grad")[0]] + lr = main_program.global_block().vars[op.input("LearningRate")[0]] + beta1_pow = main_program.global_block().vars[op.input("Beta1Pow")[ + 0]] + beta2_pow = main_program.global_block().vars[op.input("Beta2Pow")[ + 0]] + beta1_pow_o = main_program.global_block().vars[op.output( + "Beta1PowOut")[0]] + beta2_pow_o = main_program.global_block().vars[op.output( + "Beta2PowOut")[0]] + + beta1 = op.attr('beta1') + beta2 = op.attr('beta2') + epsilon = op.attr('epsilon') + + block._insert_op( + opt_idx, + type="lookup_sparse_table_fuse_adam", + inputs={ + "Grad": grad, + "LearningRate": lr, + "Beta1Pow": beta1_pow, + "Beta2Pow": beta2_pow + }, + outputs={ + "Beta1PowOut": beta1_pow_o, + "Beta2PowOut": beta2_pow_o + }, + attrs={ + "beta1": beta1, + "beta2": beta2, + "epsilon": epsilon, + "is_entry": is_entry, + "tablename": table_name, + "value_names": value_names + }) + else: + raise ValueError("only support sgd/adam optimizer now") def add_large_scale_op(block, global_block, table_name, value_names, acture_names, grad, is_entry, opt_idx): @@ -711,24 +775,35 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): for param, blockid in param_blockid_map.items(): opt_block = program.block(blockid) - grad, opt_idx, value_names, value_dims, acture_names = \ + grad, opt_idx, value_names, value_dims, acture_names, fuse = \ get_optimizer_values(opt_block) entry_attr = get_entry_attr(param) is_entry = False if entry_attr == "none" else True - add_large_scale_op(opt_block, - program.global_block(), param, value_names, - acture_names, grad, is_entry, opt_idx) + if fuse: + add_fuse_large_scale_op(opt_block, + program.global_block(), param, + value_names, acture_names, grad, + is_entry, opt_idx) + else: + add_large_scale_op(opt_block, + program.global_block(), param, value_names, + acture_names, grad, is_entry, opt_idx) else: large_scale_kv_metas = [] for param, blockid in param_blockid_map.items(): opt_block = main_program.block(blockid) - grad, _, value_names, value_dims, acture_names = \ + + grad, opt_idx, value_names, value_dims, acture_names, fuse = \ get_optimizer_values(opt_block) entry_attr = get_entry_attr(param) + if fuse: + # remove origin optimzier op + opt_block._remove_op(opt_idx) + # training/infer mode = "0" names_str = ",".join(value_names) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index 4543af9820e8c9326098fa254494ca1c896d3b12..3f826da3ae2beca51b639a69da4113e6d9580d6c 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -227,22 +227,6 @@ def init_from_server_pass(program, config): fetch_barrier_out = program.global_block().create_var( name=framework.generate_control_dev_var_name()) - recv_ctx = config.get_communicator_recv_context(recv_type=1) - recv_varnames = [] - - for name, ctxs in recv_ctx.items(): - recv_varnames.extend(ctxs.origin_varnames()) - - program.global_block().append_op( - type="recv", - inputs={"X": []}, - outputs={"Out": []}, - attrs={ - "recv_varnames": recv_varnames, - "trainer_id": config.get_role_id(), - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) - program.global_block().append_op( type="fetch_barrier", inputs={}, diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py index 60378aa98272dae32a97b33e84fc61e71193658c..06a90b78fd2e53d065f1abbaf9e95df848f9cc52 100644 --- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py +++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py @@ -164,8 +164,8 @@ def train(args): elif fleet.is_worker(): logger.info("run trainer") - fleet.init_worker() exe.run(fleet.startup_program) + fleet.init_worker() thread_num = 2 filelist = [] diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index 4595f0cf93916d71a3d0ec582af1917500d68f12..92b58a7e2ee4c76af7047a14f67e40d76be76dc0 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -54,29 +54,6 @@ EXPRESSION_MAP = { "__ge__": "A >= B" } -# method for Tensor from paddle.tensor -# edit it when paddle.tensor has new method about Tensor operation -common_methods = [ - 'exp', 'tanh', 'atan', 'sqrt', 'rsqrt', 'abs', 'ceil', 'floor', 'cos', - 'acos', 'asin', 'sin', 'sinh', 'cosh', 'round', 'reciprocal', 'square', - 'rank', 'matmul', 'dot', 'norm', 'transpose', 'dist', 't', 'cross', - 'cholesky', 'bmm', 'histogram', 'equal', 'greater_equal', 'greater_than', - 'is_empty', 'isfinite', 'less_equal', 'less_than', 'logical_and', - 'logical_not', 'logical_or', 'logical_xor', 'not_equal', 'reduce_all', - 'reduce_any', 'allclose', 'equal_all', 'cast', 'expand', 'expand_as', - 'tile', 'flatten', 'gather', 'gather_nd', 'reshape', 'reverse', 'scatter', - 'scatter_nd_add', 'scatter_nd', 'shard_index', 'slice', 'split', 'squeeze', - 'strided_slice', 'unique', 'unique_with_counts', 'unsqueeze', 'flip', - 'unbind', 'roll', 'cumsum', 'increment', 'log', 'pow', 'reciprocal', - 'round', 'rsqrt', 'scale', 'sign', 'stanh', 'sum', 'reduce_prod', 'max', - 'min', 'mm', 'div', 'multiply', 'add', 'logsumexp', 'log1p', 'erf', - 'addcmul', 'addmm', 'clamp', 'trace', 'kron', 'argmax', 'argmin', 'argsort', - 'has_inf', 'has_nan', 'topk', 'index_select', 'nonzero', 'sort', - 'index_sample', 'mean', 'std', 'var', 'elementwise_add', 'elementwise_div', - 'elementwise_floordiv', 'elementwise_mod', 'elementwise_pow', - 'elementwise_sub' -] - _already_patch_variable = False @@ -372,7 +349,14 @@ def monkey_patch_variable(): setattr(Variable, method_name, method_impl) else: import paddle.tensor - for method_name in common_methods: + variabel_methods = paddle.tensor.linalg.__all__ + \ + paddle.tensor.math.__all__ + \ + paddle.tensor.logic.__all__ + \ + paddle.tensor.manipulation.__all__ + \ + paddle.tensor.search.__all__ + \ + paddle.tensor.stat.__all__ + \ + paddle.tensor.attribute.__all__ + for method_name in variabel_methods: if hasattr(Variable, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) if method_impl: setattr(Variable, method_name, method_impl) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index cf52f3b00fb2739d186021dc51d6aa0f506be706..2fba578ec077f2a74388d433bf3ab5b3098e81ad 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -1453,11 +1453,14 @@ def linspace(start, stop, num, dtype=None, name=None): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) if not isinstance(start, Variable): - tensor_start = fill_constant([1], dtype, start) + with device_guard("cpu"): + tensor_start = fill_constant([1], dtype, start) if not isinstance(stop, Variable): - tensor_stop = fill_constant([1], dtype, stop) + with device_guard("cpu"): + tensor_stop = fill_constant([1], dtype, stop) if not isinstance(num, Variable): - tensor_num = fill_constant([1], 'int32', num) + with device_guard("cpu"): + tensor_num = fill_constant([1], 'int32', num) if in_dygraph_mode(): return core.ops.linspace(tensor_start, tensor_stop, tensor_num, 'dtype', dtype) diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index 673c965b662a022739f8d489c331f4de9455a926..96321aae566d1f910042f4e348d0be8b3e88c341 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -4,4 +4,5 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") # default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) + set_tests_properties(${src} PROPERTIES FIXTURES_SETUP ${src}_infer_model) endforeach() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 8277499fcce341207fa75a74dfda0a2ccc2e3b63..5721445c414cf94379f44cab6bd01cca511938bf 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -163,8 +163,10 @@ class TestDistCTR2x2(FleetDistRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() + exe.run(fluid.default_startup_program()) + fleet.init_worker() + batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) self.reader.decorate_sample_list_generator(train_reader) @@ -202,8 +204,8 @@ class TestDistCTR2x2(FleetDistRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() thread_num = 2 batch_size = 128 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py index 0e3c80992771424e4216a79b991de1c62884c757..3852b225234ffacc2be749245fb1341331868272 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py @@ -60,8 +60,9 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) exe = fluid.Executor(place) - fleet.init_worker() + exe.run(fleet.startup_program) + fleet.init_worker() batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) @@ -104,8 +105,8 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): place = fluid.CUDAPlace(device_id) exe = fluid.Executor(place) - fleet.init_worker() exe.run(fleet.startup_program) + fleet.init_worker() thread_num = 2 batch_size = 128 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index 2f938a813d8a7598e49023066759a490eab53263..470fb98d7991cf0cbffa47f6d5129b045f59ae97 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -152,8 +152,9 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() + batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) self.reader.decorate_sample_list_generator(train_reader) @@ -176,8 +177,8 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() thread_num = int(os.getenv("CPU_NUM", 2)) batch_size = 128 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index 2ea69e1b6763087bb2b278b59a8a59b4331847da..ff84848873924c52b0f7e8f5bc71ec2a266b73f1 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -222,8 +222,8 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() batch_size = 4 # reader train_reader = paddle.batch(fake_simnet_reader(), batch_size=batch_size) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index 77697896b4d556da8a98c17e281b3d7a6999fd64..81530573a604205f0202d088853038bbc71b92e6 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -151,8 +151,9 @@ class TestDistCTR2x2(FleetDistRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() + exe.run(fluid.default_startup_program()) + fleet.init_worker() batch_size = 4 diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py index 8d2677229a03f7bdac14a93e176747ba0a5f1d6b..ab1127afa58dd93aa92688eebdf82292990f59b1 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py @@ -47,7 +47,7 @@ class TestSimpleRNNCell(unittest.TestCase): prev_h = np.random.randn(4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def test_with_zero_state(self): @@ -57,7 +57,7 @@ class TestSimpleRNNCell(unittest.TestCase): x = np.random.randn(4, 16) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def runTest(self): @@ -90,7 +90,7 @@ class TestGRUCell(unittest.TestCase): prev_h = np.random.randn(4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def test_with_zero_state(self): @@ -100,7 +100,7 @@ class TestGRUCell(unittest.TestCase): x = np.random.randn(4, 16) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def runTest(self): @@ -134,8 +134,8 @@ class TestLSTMCell(unittest.TestCase): y1, (h1, c1) = rnn1(x, (prev_h, prev_c)) y2, (h2, c2) = rnn2( - paddle.to_variable(x), - (paddle.to_variable(prev_h), paddle.to_variable(prev_c))) + paddle.to_tensor(x), + (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -146,7 +146,7 @@ class TestLSTMCell(unittest.TestCase): x = np.random.randn(4, 16) y1, (h1, c1) = rnn1(x) - y2, (h2, c2) = rnn2(paddle.to_variable(x)) + y2, (h2, c2) = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py index ef297b3bb62497073fd667238cae8a83daaa4967..7c03b51837ef6f7be8021dca55daf3b43f7d3053 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py @@ -53,7 +53,7 @@ class TestSimpleRNN(unittest.TestCase): prev_h = np.random.randn(2 * self.num_directions, 4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -66,7 +66,7 @@ class TestSimpleRNN(unittest.TestCase): x = np.transpose(x, [1, 0, 2]) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -81,11 +81,11 @@ class TestSimpleRNN(unittest.TestCase): y1, h1 = rnn1(x, sequence_length=sequence_length) - seq_len = paddle.to_variable(sequence_length) + seq_len = paddle.to_tensor(sequence_length) mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) - y2, h2 = rnn2(paddle.to_variable(x), sequence_length=seq_len) + y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) y2 = paddle.multiply(y2, mask, axis=0) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) @@ -133,7 +133,7 @@ class TestGRU(unittest.TestCase): prev_h = np.random.randn(2 * self.num_directions, 4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -146,7 +146,7 @@ class TestGRU(unittest.TestCase): x = np.transpose(x, [1, 0, 2]) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -161,11 +161,11 @@ class TestGRU(unittest.TestCase): y1, h1 = rnn1(x, sequence_length=sequence_length) - seq_len = paddle.to_variable(sequence_length) + seq_len = paddle.to_tensor(sequence_length) mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) - y2, h2 = rnn2(paddle.to_variable(x), sequence_length=seq_len) + y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) y2 = paddle.multiply(y2, mask, axis=0) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) @@ -209,8 +209,8 @@ class TestLSTM(unittest.TestCase): y1, (h1, c1) = rnn1(x, (prev_h, prev_c)) y2, (h2, c2) = rnn2( - paddle.to_variable(x), - (paddle.to_variable(prev_h), paddle.to_variable(prev_c))) + paddle.to_tensor(x), + (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -224,7 +224,7 @@ class TestLSTM(unittest.TestCase): x = np.transpose(x, [1, 0, 2]) y1, (h1, c1) = rnn1(x) - y2, (h2, c2) = rnn2(paddle.to_variable(x)) + y2, (h2, c2) = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -240,11 +240,11 @@ class TestLSTM(unittest.TestCase): y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) - seq_len = paddle.to_variable(sequence_length) + seq_len = paddle.to_tensor(sequence_length) mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) - y2, (h2, c2) = rnn2(paddle.to_variable(x), sequence_length=seq_len) + y2, (h2, c2) = rnn2(paddle.to_tensor(x), sequence_length=seq_len) y2 = paddle.multiply(y2, mask, axis=0) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py index d032d6d75b5b3a48ea1e752190952f4c52e23b07..a86b80b2cf98829a683045ae302f72a694809138 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py @@ -30,11 +30,10 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distribu class TestCommunicator(unittest.TestCase): def net(self): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) + x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = fluid.layers.square_error_cost(input=x, label=y) avg_cost = fluid.layers.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index d9fc9262b311f949a1a89cd079517c5c93d0d28d..5916000fba79fc0da2ef545beac634a3edfe01df 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -83,8 +83,8 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() train_reader = paddle.batch(self.fake_reader(), batch_size=24) feeder = fluid.DataFeeder(place=place, feed_list=[x, z, y]) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py index 391588780f342dc17ea821334e28f941f9ce359a..b0f55f2939dc94af603f4cc5851dbb5e6317774f 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py @@ -71,8 +71,8 @@ class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) - fleet.init_worker() exe.run(fleet.startup_program) + fleet.init_worker() train_reader = paddle.batch(self.fake_reader(), batch_size=24) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/test_communicator_sync.py index c0044d9d620796057cce0e3a51b2dec2878a0e17..95b209b14602676a089a667b0a720056bbe1562b 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_sync.py @@ -27,11 +27,9 @@ import paddle.distributed.fleet as fleet class TestCommunicator(unittest.TestCase): def net(self): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) + x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = fluid.layers.square_error_cost(input=x, label=y) avg_cost = fluid.layers.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_diag.py b/python/paddle/fluid/tests/unittests/test_diag.py index 780d57b53310bb5f385a131d4ad52dd6f5e695f0..ddf1240e4ef27775a24cee540c5f193399112270 100644 --- a/python/paddle/fluid/tests/unittests/test_diag.py +++ b/python/paddle/fluid/tests/unittests/test_diag.py @@ -119,6 +119,16 @@ class TestDiagV2API(unittest.TestCase): (n, n)) + np.diag(self.input_np3, self.offset) - np.diag( self.padding_value * np.ones(n)) + self.input_np4 = np.random.random(size=(2000, 2000)).astype(np.float32) + self.expected6 = np.diag(self.input_np4) + self.expected7 = np.diag(self.input_np4, k=1) + self.expected8 = np.diag(self.input_np4, k=-1) + + self.input_np5 = np.random.random(size=(2000)).astype(np.float32) + self.expected9 = np.diag(self.input_np5) + self.expected10 = np.diag(self.input_np5, k=1) + self.expected11 = np.diag(self.input_np5, k=-1) + def run_imperative(self): x = paddle.to_tensor(self.input_np) y = paddle.diag(x) @@ -141,10 +151,32 @@ class TestDiagV2API(unittest.TestCase): y = paddle.diag(x, padding_value=-8) self.assertTrue(np.allclose(y.numpy(), self.expected5)) + x = paddle.to_tensor(self.input_np4) + y = paddle.diag(x) + self.assertTrue(np.allclose(y.numpy(), self.expected6)) + + y = paddle.diag(x, offset=1) + self.assertTrue(np.allclose(y.numpy(), self.expected7)) + + y = paddle.diag(x, offset=-1) + self.assertTrue(np.allclose(y.numpy(), self.expected8)) + + x = paddle.to_tensor(self.input_np5) + y = paddle.diag(x) + self.assertTrue(np.allclose(y.numpy(), self.expected9)) + + y = paddle.diag(x, offset=1) + self.assertTrue(np.allclose(y.numpy(), self.expected10)) + + y = paddle.diag(x, offset=-1) + self.assertTrue(np.allclose(y.numpy(), self.expected11)) + def run_static(self, use_gpu=False): x = paddle.data(name='input', shape=[10, 10], dtype='float32') x2 = paddle.data(name='input2', shape=[100], dtype='float64') x3 = paddle.data(name='input3', shape=[100], dtype='int64') + x4 = paddle.data(name='input4', shape=[2000, 2000], dtype='float32') + x5 = paddle.data(name='input5', shape=[2000], dtype='float32') result0 = paddle.diag(x) result1 = paddle.diag(x, offset=1) result2 = paddle.diag(x, offset=-1) @@ -152,17 +184,28 @@ class TestDiagV2API(unittest.TestCase): result4 = paddle.diag(x2, padding_value=8) result5 = paddle.diag(x3, padding_value=8.0) result6 = paddle.diag(x3, padding_value=-8) + result7 = paddle.diag(x4) + result8 = paddle.diag(x4, offset=1) + result9 = paddle.diag(x4, offset=-1) + result10 = paddle.diag(x5) + result11 = paddle.diag(x5, offset=1) + result12 = paddle.diag(x5, offset=-1) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - res0, res1, res2, res4, res5, res6 = exe.run( + res0, res1, res2, res4, res5, res6, res7, res8, res9, res10, res11, res12 = exe.run( feed={ "input": self.input_np, "input2": self.input_np2, - 'input3': self.input_np3 + 'input3': self.input_np3, + 'input4': self.input_np4, + 'input5': self.input_np5 }, - fetch_list=[result0, result1, result2, result4, result5, result6]) + fetch_list=[ + result0, result1, result2, result4, result5, result6, result7, + result8, result9, result10, result11, result12 + ]) self.assertTrue(np.allclose(res0, self.expected0)) self.assertTrue(np.allclose(res1, self.expected1)) @@ -171,6 +214,12 @@ class TestDiagV2API(unittest.TestCase): self.assertTrue(np.allclose(res4, self.expected3)) self.assertTrue(np.allclose(res5, self.expected4)) self.assertTrue(np.allclose(res6, self.expected5)) + self.assertTrue(np.allclose(res7, self.expected6)) + self.assertTrue(np.allclose(res8, self.expected7)) + self.assertTrue(np.allclose(res9, self.expected8)) + self.assertTrue(np.allclose(res10, self.expected9)) + self.assertTrue(np.allclose(res11, self.expected10)) + self.assertTrue(np.allclose(res12, self.expected11)) def test_cpu(self): paddle.disable_static(place=paddle.fluid.CPUPlace()) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index a82612b0ed2a6700dd157ddd6263cae2a879c274..7f55e956a94aee79dda07762e953e71807899bff 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -44,16 +44,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) + x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + avg_cost = paddle.fluid.layers.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True @@ -71,7 +66,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): sends += 1 if op.type == "sgd": sgds += 1 - self.assertEqual(sends, 7) + self.assertEqual(sends, 1) self.assertEqual(sgds, 0) fleet.init_worker() @@ -89,16 +84,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) + + x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + avg_cost = paddle.fluid.layers.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index b05a53c88bb9154b69640df6c39305a00e3c447b..db3f2afb3668bc1831286f8d13b274895e7632fd 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -36,16 +36,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def test_gradient_merge_optimizer(self): fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) + x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + avg_cost = paddle.fluid.layers.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = False @@ -63,7 +58,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): sends += 1 if op.type == "sgd": sgds += 1 - self.assertEqual(sends, 6) + self.assertEqual(sends, 0) self.assertEqual(sgds, 0) fleet.init_worker() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index 379bcaf684d53c2c72f6369e72418cdaaaf3ac84..6fe52ba9fe61ad83341ece5c29fcafa89095de82 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -70,15 +70,13 @@ class TestPSPassWithBow(unittest.TestCase): q = fluid.layers.data( name="query_ids", shape=[1], dtype="int64", lod_level=1) # embedding - q_emb = fluid.layers.embedding( + q_emb = fluid.contrib.layers.sparse_embedding( input=q, - is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr), - is_sparse=is_sparse) + learning_rate=emb_lr)) q_emb = fluid.layers.reshape(q_emb, [-1, emb_dim]) # vsum q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum') @@ -97,15 +95,13 @@ class TestPSPassWithBow(unittest.TestCase): pt = fluid.layers.data( name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) # embedding - pt_emb = fluid.layers.embedding( + pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, - is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr), - is_sparse=is_sparse) + learning_rate=emb_lr)) pt_emb = fluid.layers.reshape(pt_emb, [-1, emb_dim]) # vsum pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum') @@ -123,15 +119,13 @@ class TestPSPassWithBow(unittest.TestCase): nt = fluid.layers.data( name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) # embedding - nt_emb = fluid.layers.embedding( + nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, - is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr), - is_sparse=is_sparse) + learning_rate=emb_lr)) nt_emb = fluid.layers.reshape(nt_emb, [-1, emb_dim]) # vsum nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum') @@ -167,7 +161,7 @@ class TestPSPassWithBow(unittest.TestCase): fleet.init(role) loss, acc, _ = self.net() - optimizer = fluid.optimizer.SGD(base_lr) + optimizer = fluid.optimizer.Adam(base_lr) strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index fd069793473648a0dff731d66c85bd3fe61997c7..c570c4d8cd01dd7e7b113b1f5f35c9887f4a4376 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -168,12 +168,13 @@ class TestPSPassWithBow(unittest.TestCase): fleet.init(role) loss, acc, _ = self.net() - optimizer = fluid.optimizer.SGD( + optimizer = fluid.optimizer.Adagrad( learning_rate=fluid.layers.exponential_decay( learning_rate=base_lr, decay_steps=500, decay_rate=0.969, staircase=True)) + strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py new file mode 100644 index 0000000000000000000000000000000000000000..d5b1284e3ce316114122d5bbeb6d88cbabc3f160 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py @@ -0,0 +1,168 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory + +# For Net +base_lr = 0.2 +emb_lr = base_lr * 3 +dict_dim = 1500 +emb_dim = 128 +hid_dim = 128 +margin = 0.1 +sample_rate = 1 +batch_size = 4 + + +class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): + cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) + cond = fluid.layers.cast(cond, dtype='float64') + cond_3 = fluid.layers.reduce_sum(cond) + acc = fluid.layers.elementwise_div( + cond_3, + fluid.layers.fill_constant( + shape=[1], value=batch_size * 1.0, dtype='float64'), + name="simnet_acc") + return acc + + def get_loss(cos_q_pt, cos_q_nt): + loss_op1 = fluid.layers.elementwise_sub( + fluid.layers.fill_constant_batch_size_like( + input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), + cos_q_pt) + loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) + loss_op3 = fluid.layers.elementwise_max( + fluid.layers.fill_constant_batch_size_like( + input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + loss_op2) + avg_cost = fluid.layers.mean(loss_op3) + return avg_cost + + is_distributed = False + is_sparse = True + + # query + q = fluid.layers.data( + name="query_ids", shape=[1], dtype="int64", lod_level=1) + # embedding + q_emb = fluid.contrib.layers.sparse_embedding( + input=q, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__emb__", + learning_rate=emb_lr)) + q_emb = fluid.layers.reshape(q_emb, [-1, emb_dim]) + # vsum + q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum') + q_ss = fluid.layers.softsign(q_sum) + # fc layer after conv + q_fc = fluid.layers.fc( + input=q_ss, + size=hid_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__q_fc__", + learning_rate=base_lr)) + # label data + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + # pt + pt = fluid.layers.data( + name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + # embedding + pt_emb = fluid.contrib.layers.sparse_embedding( + input=pt, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__emb__", + learning_rate=emb_lr)) + pt_emb = fluid.layers.reshape(pt_emb, [-1, emb_dim]) + # vsum + pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum') + pt_ss = fluid.layers.softsign(pt_sum) + # fc layer + pt_fc = fluid.layers.fc( + input=pt_ss, + size=hid_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__fc__", + learning_rate=base_lr), + bias_attr=fluid.ParamAttr(name="__fc_b__")) + # nt + nt = fluid.layers.data( + name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + # embedding + nt_emb = fluid.contrib.layers.sparse_embedding( + input=nt, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__emb__", + learning_rate=emb_lr)) + nt_emb = fluid.layers.reshape(nt_emb, [-1, emb_dim]) + # vsum + nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum') + nt_ss = fluid.layers.softsign(nt_sum) + # fc layer + nt_fc = fluid.layers.fc( + input=nt_ss, + size=hid_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__fc__", + learning_rate=base_lr), + bias_attr=fluid.ParamAttr(name="__fc_b__")) + cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc) + cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc) + # loss + avg_cost = get_loss(cos_q_pt, cos_q_nt) + # acc + acc = get_acc(cos_q_nt, cos_q_pt, batch_size) + return [avg_cost, acc, cos_q_pt] + + def test(self): + endpoints = [ + "127.0.0.1:36004", "127.0.0.1:36005", "127.0.0.1:36006", + "127.0.0.1:36007" + ] + + role = role_maker.UserDefinedRoleMaker( + current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) + + fleet.init(role) + loss, acc, _ = self.net() + optimizer = fluid.optimizer.Adagrad(base_lr) + strategy = StrategyFactory.create_async_strategy() + optimizer = fleet.distributed_optimizer(optimizer, strategy) + optimizer.minimize(loss) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..bca91c536ba32b05138f2860c13fdd1899a2e011 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py @@ -0,0 +1,171 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.core as core + + +class TestLookupTableFuseOp(unittest.TestCase): + def test_fuse(self): + places = [core.CPUPlace()] + # currently only support CPU + for place in places: + self.check_with_place(place) + + def check_with_place(self, place): + scope = fluid.global_scope() + scope.var("LearningRate").get_tensor().set([0.01], place) + scope.var("Ids").get_tensor().set([i for i in range(100)], place) + + init_program = fluid.Program() + + lr = init_program.global_block().create_var( + name="LearningRate", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + ids = init_program.global_block().create_var( + name="Ids", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[100], + dtype="int64") + + output = init_program.global_block().create_var( + name="output", + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[100, 8], + dtype="float32") + + metas = [] + metas.append( + "embedding_1.block0:Param,Moment1,Moment2:8,8,8:0:embedding_1@GRAD.block0:embedding_1.block0,embedding_1_moment1_0,embedding_1_moment2_0,kSparseIDs@embedding_1.block0:uniform_random&0&-0.5&0.5,fill_constant&0.0,fill_constant&0.0:none" + ) + metas.append( + "embedding_2.block0:Param:8:0:embedding_2@GRAD.block0:embedding_2.block0,kSparseIDs@embedding_2.block0:uniform_random&0&-0.5&0.5:none" + ) + + init_program.global_block().append_op( + type="lookup_sparse_table_init", + inputs=None, + outputs=None, + attrs={"large_scale_metas": metas}) + + init_program.global_block().append_op( + type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": output}, + attrs={ + "tablename": "embedding_1.block0", + "init": True, + "value_names": ["Param"], + }) + + init_program.global_block().append_op( + type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": output}, + attrs={ + "tablename": "embedding_2.block0", + "init": True, + "value_names": ["Param"], + }) + + executor = fluid.Executor(place) + executor.run(init_program) + + training_program = fluid.Program() + + scope.var('Beta1Pow').get_tensor().set( + np.array([0]).astype("float32"), place) + scope.var('Beta2Pow').get_tensor().set( + np.array([0]).astype("float32"), place) + + rows = [0, 1, 2, 3, 4, 5, 6] + row_numel = 8 + w_selected_rows = scope.var('Grad').get_selected_rows() + w_selected_rows.set_height(len(rows)) + w_selected_rows.set_rows(rows) + w_array = np.ones((len(rows), row_numel)).astype("float32") + for i in range(len(rows)): + w_array[i] *= i + w_tensor = w_selected_rows.get_tensor() + w_tensor.set(w_array, place) + + lr = training_program.global_block().create_var( + name="LearningRate", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + grads = training_program.global_block().create_var( + name="Grad", + persistable=True, + type=fluid.core.VarDesc.VarType.SELECTED_ROWS, + shape=[100, 8], + dtype="float32") + + beta1 = training_program.global_block().create_var( + name="Beta1Pow", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + beta2 = training_program.global_block().create_var( + name="Beta2Pow", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + training_program.global_block().append_op( + type="lookup_sparse_table_fuse_adam", + inputs={ + "Grad": grads, + "LearningRate": lr, + "Beta1Pow": beta1, + "Beta2Pow": beta2, + }, + outputs={"Beta1PowOut": beta1, + "Beta2PowOut": beta2}, + attrs={ + "is_entry": False, + "tablename": "embedding_1.block0", + "value_names": ["Param", "Moment1", "Moment2"], + }) + + training_program.global_block().append_op( + type="lookup_sparse_table_fuse_sgd", + inputs={"Grad": grads, + "LearningRate": lr}, + attrs={ + "is_entry": False, + "tablename": "embedding_2.block0", + "value_names": ["Param"], + }) + + executor.run(training_program) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py b/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc305cd1f4dcd3faaaf8ccbe813bdf08e966d6e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py @@ -0,0 +1,215 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import core + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "Paddle core is not compiled with CUDA") +class TestFusedBnAddActAPI(unittest.TestCase): + def setUp(self): + self.conv_param_attr1 = fluid.ParamAttr( + name='conv2d_1.weight', + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + self.conv_param_attr2 = fluid.ParamAttr( + name='conv2d_2.weight', + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + self.bn_param_attr1 = fluid.ParamAttr( + name='batch_norm_w_1', + initializer=fluid.initializer.Constant(value=1.0)) + self.bn_bias_attr1 = fluid.ParamAttr( + name='batch_norm_b_1', + initializer=fluid.initializer.Constant(value=0.0)) + self.bn_param_attr2 = fluid.ParamAttr( + name='batch_norm_w_2', + initializer=fluid.initializer.Constant(value=1.0)) + self.bn_bias_attr2 = fluid.ParamAttr( + name='batch_norm_b_2', + initializer=fluid.initializer.Constant(value=0.0)) + self.fc_param_attr = fluid.ParamAttr( + name='fc.weight', + initializer=fluid.initializer.Xavier(uniform=False)) + + def build_fused_program(self, + main_program, + startup_program, + use_cuda, + seed=1): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm( + input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act( + conv1_2, + bn, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2) + prediction = fluid.layers.fc(input=fused_bn_add_act, + size=10, + act='softmax', + param_attr=self.fc_param_attr) + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + def build_origin_program(self, + main_program, + startup_program, + use_cuda, + seed=1): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn1 = fluid.layers.batch_norm( + input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + bn2 = fluid.layers.batch_norm( + input=conv1_2, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2, + act=None, + data_layout='NHWC') + out = bn1 + bn2 + out = fluid.layers.relu(out) + prediction = fluid.layers.fc(input=out, + size=10, + act='softmax', + param_attr=self.fc_param_attr) + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + def check(self, place, use_cuda): + paddle.manual_seed(1) + paddle.framework.random._manual_program_seed(1) + iters = 5 + batch_size = 16 + + # build_fused_program + main_program = fluid.Program() + startup_program = fluid.Program() + x, y, loss = self.build_fused_program(main_program, startup_program, + use_cuda) + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + exe = fluid.Executor(place) + loss_vals_fused = [] + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + loss_vals_fused.append(loss_v[0][0]) + + # build_origin_program + main_program = fluid.Program() + startup_program = fluid.Program() + x, y, loss = self.build_origin_program(main_program, startup_program, + use_cuda) + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + loss_vals = [] + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + loss_vals.append(loss_v[0][0]) + + # check loss + for i in range(iters): + self.assertAlmostEqual(loss_vals[i], loss_vals_fused[i], delta=1e-5) + + def test_fuse_bn_add_act(self): + place = fluid.CUDAPlace(0) + self.check(place, use_cuda=True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index a70862f40197c513a0cd04753553264708ee2a1c..5df04ddfc3d26492323153b8b26658db4325b7ec 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -19,6 +19,7 @@ import paddle import paddle.fluid as fluid import numpy as np import six +import inspect class TestMathOpPatchesVarBase(unittest.TestCase): @@ -302,21 +303,13 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertEqual(x.dim(), 2) self.assertEqual(x.ndimension(), 2) self.assertEqual(x.ndim, 2) - self.assertEqual(x.size(), [2, 3]) - self.assertTrue( - np.array_equal(x.sigmoid().numpy(), fluid.layers.sigmoid(x).numpy( - ))) - self.assertTrue( - np.array_equal(x.log_sigmoid().numpy(), - fluid.layers.logsigmoid(x).numpy())) + self.assertEqual(x.size, 6) + self.assertEqual(x.numel(), 6) self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy())) self.assertTrue( np.array_equal(x.tanh().numpy(), paddle.tanh(x).numpy())) self.assertTrue( np.array_equal(x.atan().numpy(), paddle.atan(x).numpy())) - self.assertTrue( - np.array_equal(x.tanh_shrink().numpy(), - fluid.layers.tanh_shrink(x).numpy())) self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy())) m = x.abs() self.assertTrue( @@ -344,12 +337,6 @@ class TestMathOpPatchesVarBase(unittest.TestCase): ))) self.assertTrue( np.array_equal(x.square().numpy(), paddle.square(x).numpy())) - self.assertTrue( - np.array_equal(x.softplus().numpy(), - fluid.layers.softplus(x).numpy())) - self.assertTrue( - np.array_equal(x.softsign().numpy(), - fluid.layers.softsign(x).numpy())) self.assertTrue( np.array_equal(x.rank().numpy(), paddle.rank(x).numpy())) self.assertTrue( @@ -422,6 +409,8 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertTrue(np.array_equal(x.reciprocal(), paddle.reciprocal(x))) # 2. Binary operation + self.assertTrue( + np.array_equal(x.divide(y).numpy(), paddle.divide(x, y).numpy())) self.assertTrue( np.array_equal( x.matmul(y, True, False).numpy(), @@ -501,6 +490,73 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertTrue( np.array_equal( x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) + a = paddle.to_tensor([[1, 2], [3, 4]]) + b = paddle.to_tensor([[4, 3], [2, 1]]) + self.assertTrue( + np.array_equal( + x.where(a, b).numpy(), paddle.where(x, a, b).numpy())) + + self.assertTrue(inspect.ismethod(a.dot)) + self.assertTrue(inspect.ismethod(a.elementwise_add)) + self.assertTrue(inspect.ismethod(a.elementwise_div)) + self.assertTrue(inspect.ismethod(a.elementwise_floordiv)) + self.assertTrue(inspect.ismethod(a.elementwise_mod)) + self.assertTrue(inspect.ismethod(a.elementwise_sub)) + self.assertTrue(inspect.ismethod(a.logsumexp)) + self.assertTrue(inspect.ismethod(a.multiplex)) + self.assertTrue(inspect.ismethod(a.prod)) + self.assertTrue(inspect.ismethod(a.reduce_max)) + self.assertTrue(inspect.ismethod(a.reduce_min)) + self.assertTrue(inspect.ismethod(a.reduce_prod)) + self.assertTrue(inspect.ismethod(a.reduce_sum)) + self.assertTrue(inspect.ismethod(a.scale)) + self.assertTrue(inspect.ismethod(a.stanh)) + self.assertTrue(inspect.ismethod(a.sums)) + self.assertTrue(inspect.ismethod(a.elementwise_sum)) + self.assertTrue(inspect.ismethod(a.max)) + self.assertTrue(inspect.ismethod(a.maximum)) + self.assertTrue(inspect.ismethod(a.min)) + self.assertTrue(inspect.ismethod(a.minimum)) + self.assertTrue(inspect.ismethod(a.floor_divide)) + self.assertTrue(inspect.ismethod(a.remainder)) + self.assertTrue(inspect.ismethod(a.floor_mod)) + self.assertTrue(inspect.ismethod(a.multiply)) + self.assertTrue(inspect.ismethod(a.logsumexp)) + self.assertTrue(inspect.ismethod(a.inverse)) + self.assertTrue(inspect.ismethod(a.log1p)) + self.assertTrue(inspect.ismethod(a.erf)) + self.assertTrue(inspect.ismethod(a.addcmul)) + self.assertTrue(inspect.ismethod(a.addmm)) + self.assertTrue(inspect.ismethod(a.clip)) + self.assertTrue(inspect.ismethod(a.trace)) + self.assertTrue(inspect.ismethod(a.kron)) + self.assertTrue(inspect.ismethod(a.isinf)) + self.assertTrue(inspect.ismethod(a.isnan)) + self.assertTrue(inspect.ismethod(a.concat)) + self.assertTrue(inspect.ismethod(a.broadcast_to)) + self.assertTrue(inspect.ismethod(a.scatter_nd_add)) + self.assertTrue(inspect.ismethod(a.scatter_nd)) + self.assertTrue(inspect.ismethod(a.shard_index)) + self.assertTrue(inspect.ismethod(a.chunk)) + self.assertTrue(inspect.ismethod(a.stack)) + self.assertTrue(inspect.ismethod(a.strided_slice)) + self.assertTrue(inspect.ismethod(a.unsqueeze)) + self.assertTrue(inspect.ismethod(a.unstack)) + self.assertTrue(inspect.ismethod(a.argmax)) + self.assertTrue(inspect.ismethod(a.argmin)) + self.assertTrue(inspect.ismethod(a.argsort)) + self.assertTrue(inspect.ismethod(a.has_inf)) + self.assertTrue(inspect.ismethod(a.has_nan)) + self.assertTrue(inspect.ismethod(a.masked_select)) + self.assertTrue(inspect.ismethod(a.topk)) + self.assertTrue(inspect.ismethod(a.index_select)) + self.assertTrue(inspect.ismethod(a.nonzero)) + self.assertTrue(inspect.ismethod(a.sort)) + self.assertTrue(inspect.ismethod(a.index_sample)) + self.assertTrue(inspect.ismethod(a.mean)) + self.assertTrue(inspect.ismethod(a.reduce_mean)) + self.assertTrue(inspect.ismethod(a.std)) + self.assertTrue(inspect.ismethod(a.numel)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_minimum_op.py b/python/paddle/fluid/tests/unittests/test_minimum_op.py index 4c08b7386ca2c5da04c0a289872dacf68a2ea040..a0673c82c5b341e550485ebdcee4e4616693d641 100644 --- a/python/paddle/fluid/tests/unittests/test_minimum_op.py +++ b/python/paddle/fluid/tests/unittests/test_minimum_op.py @@ -61,8 +61,8 @@ class ApiMinimumTest(unittest.TestCase): def test_dynamic_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') - x = paddle.to_variable(self.input_x) - y = paddle.to_variable(self.input_y) + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) z = paddle.minimum(x, y) np_z = z.numpy() z_expected = np.array(np.minimum(self.input_x, self.input_y)) @@ -73,8 +73,8 @@ class ApiMinimumTest(unittest.TestCase): np_x = np.random.rand(5, 4, 3, 2).astype("float64") np_y = np.random.rand(4, 3).astype("float64") - x = paddle.to_variable(self.input_x) - y = paddle.to_variable(self.input_y) + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) result_1 = paddle.minimum(x, y, axis=1) result_2 = paddle.minimum(x, y, axis=-2) self.assertEqual((result_1.numpy() == result_2.numpy()).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py index 753d96c44114a552f4bdd299602d7f13f672efbf..e327307e955308e78f6e9640681c842060a34882 100644 --- a/python/paddle/fluid/tests/unittests/test_mse_loss.py +++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py @@ -205,8 +205,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.disable_static() dy_ret = paddle.nn.functional.mse_loss( - paddle.to_variable(input_np), - paddle.to_variable(target_np), 'mean') + paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'mean') dy_result = dy_ret.numpy() sub = input_np - target_np @@ -240,8 +239,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.disable_static() dy_ret = paddle.nn.functional.mse_loss( - paddle.to_variable(input_np), - paddle.to_variable(target_np), 'sum') + paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'sum') dy_result = dy_ret.numpy() sub = input_np - target_np @@ -275,8 +273,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.disable_static() dy_ret = paddle.nn.functional.mse_loss( - paddle.to_variable(input_np), - paddle.to_variable(target_np), 'none') + paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'none') dy_result = dy_ret.numpy() sub = input_np - target_np diff --git a/python/paddle/fluid/tests/unittests/test_nll_loss.py b/python/paddle/fluid/tests/unittests/test_nll_loss.py index e7154193beaf788a9d20f3c131b1df3420918266..c07bf949af39e38222b05394f65977c7027e2f13 100644 --- a/python/paddle/fluid/tests/unittests/test_nll_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nll_loss.py @@ -909,8 +909,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): with fluid.dygraph.guard(): x_np = np.random.random(size=(5, )).astype(np.float64) label_np = np.random.randint(0, 10, size=(5, )).astype(np.int64) - x = paddle.to_variable(x_np) - label = paddle.to_variable(label_np) + x = paddle.to_tensor(x_np) + label = paddle.to_tensor(label_np) nll_loss = paddle.nn.loss.NLLLoss() res = nll_loss(x, label) @@ -933,8 +933,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): with fluid.dygraph.guard(): x_np = np.random.random(size=(5, 3)).astype(np.float64) label_np = np.random.randint(0, 3, size=(5, )).astype(np.int64) - x = paddle.to_variable(x_np) - label = paddle.to_variable(label_np) + x = paddle.to_tensor(x_np) + label = paddle.to_tensor(label_np) nll_loss = paddle.nn.loss.NLLLoss(reduction='') res = nll_loss(x, label) @@ -957,8 +957,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): with fluid.dygraph.guard(): x_np = np.random.random(size=(5, 3)).astype(np.float64) label_np = np.random.randint(0, 3, size=(5, )).astype(np.int64) - x = paddle.to_variable(x_np) - label = paddle.to_variable(label_np) + x = paddle.to_tensor(x_np) + label = paddle.to_tensor(label_np) res = paddle.nn.functional.nll_loss(x, label, reduction='') self.assertRaises( diff --git a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py index 0ebe769fb9bce1aee8412ccebc216c2c85e97775..8ee3b2ac20320c3b82eb7bb81509a9a84ce959a7 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py @@ -101,9 +101,9 @@ def create_test_case(margin, reduction): def run_dynamic_functional_api(self, place): paddle.disable_static(place) - x = paddle.to_variable(self.x_data) - y = paddle.to_variable(self.y_data) - label = paddle.to_variable(self.label_data) + x = paddle.to_tensor(self.x_data) + y = paddle.to_tensor(self.y_data) + label = paddle.to_tensor(self.label_data) result = paddle.nn.functional.margin_ranking_loss(x, y, label, margin, reduction) @@ -117,9 +117,9 @@ def create_test_case(margin, reduction): def run_dynamic_api(self, place): paddle.disable_static(place) - x = paddle.to_variable(self.x_data) - y = paddle.to_variable(self.y_data) - label = paddle.to_variable(self.label_data) + x = paddle.to_tensor(self.x_data) + y = paddle.to_tensor(self.y_data) + label = paddle.to_tensor(self.label_data) margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) @@ -134,9 +134,9 @@ def create_test_case(margin, reduction): def run_dynamic_broadcast_api(self, place): paddle.disable_static(place) label_data = np.random.choice([-1, 1], size=[10]).astype("float64") - x = paddle.to_variable(self.x_data) - y = paddle.to_variable(self.y_data) - label = paddle.to_variable(label_data) + x = paddle.to_tensor(self.x_data) + y = paddle.to_tensor(self.y_data) + label = paddle.to_tensor(label_data) margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) diff --git a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py index d52a1f5d5b16ca7e0d58230a1a17624e5bff0b02..90132a0923df716e9e2a0224671006cb62c1bba0 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py @@ -56,7 +56,7 @@ class TestNNSigmoidAPI(unittest.TestCase): def check_dynamic_api(self, place): paddle.disable_static(place) - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) mysigmoid = nn.Sigmoid() y = mysigmoid(x) self.assertTrue(np.allclose(y.numpy(), self.y)) @@ -94,7 +94,7 @@ class TestNNFunctionalSigmoidAPI(unittest.TestCase): def check_dynamic_api(self): paddle.disable_static() - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) y = functional.sigmoid(x) self.assertTrue(np.allclose(y.numpy(), self.y)) diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py index 8512bc99e7451c73e5513b834fb6aa448717c646..800706e5965dffedadb61c384d946c8ed28bf826 100644 --- a/python/paddle/fluid/tests/unittests/test_numel_op.py +++ b/python/paddle/fluid/tests/unittests/test_numel_op.py @@ -76,8 +76,8 @@ class TestNumelOoAPI(unittest.TestCase): paddle.disable_static(paddle.CPUPlace()) input_1 = np.random.random([2, 1, 4, 5]).astype("int32") input_2 = np.random.random([1, 4, 5]).astype("int32") - x_1 = paddle.to_variable(input_1) - x_2 = paddle.to_variable(input_2) + x_1 = paddle.to_tensor(input_1) + x_2 = paddle.to_tensor(input_2) out_1 = paddle.numel(x_1) out_2 = paddle.numel(x_2) assert (np.array_equal(out_1.numpy().item(0), np.size(input_1))) diff --git a/python/paddle/fluid/tests/unittests/test_ones_like.py b/python/paddle/fluid/tests/unittests/test_ones_like.py index c1e6a3377710f98184e9541e287b911def89cd81..bb0d6f07bdbde18d155b66c7d014503747ebd887 100644 --- a/python/paddle/fluid/tests/unittests/test_ones_like.py +++ b/python/paddle/fluid/tests/unittests/test_ones_like.py @@ -63,7 +63,7 @@ class TestOnesLikeImpeartive(unittest.TestCase): place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() paddle.disable_static(place) - x = paddle.to_variable(np.ones(shape)) + x = paddle.to_tensor(np.ones(shape)) for dtype in [np.bool, np.float32, np.float64, np.int32, np.int64]: out = ones_like(x, dtype) self.assertEqual((out.numpy() == np.ones(shape, dtype)).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py index baf0efa6ec2e7edafb8d331423a7b47155283c21..cf138e67726163d3d1c990a180fa229b88fed99f 100644 --- a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py +++ b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py @@ -48,8 +48,8 @@ def test_static(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): def test_dygraph(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): paddle.disable_static() - x = paddle.to_variable(x_np) - y = paddle.to_variable(y_np) + x = paddle.to_tensor(x_np) + y = paddle.to_tensor(y_np) dist = paddle.nn.layer.distance.PairwiseDistance( p=p, epsilon=epsilon, keepdim=keepdim) distance = dist(x, y) diff --git a/python/paddle/fluid/tests/unittests/test_sort_op.py b/python/paddle/fluid/tests/unittests/test_sort_op.py index 015b72fd1c5275f758a109451110f61b97c4a0c7..366e0c7a3fa3ee714162e6041aa0d52dbfb30746 100644 --- a/python/paddle/fluid/tests/unittests/test_sort_op.py +++ b/python/paddle/fluid/tests/unittests/test_sort_op.py @@ -72,14 +72,14 @@ class TestSortDygraph(unittest.TestCase): def test_api_0(self): paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) + var_x = paddle.to_tensor(self.input_data) out = paddle.sort(var_x) self.assertEqual((np.sort(self.input_data) == out.numpy()).all(), True) paddle.enable_static() def test_api_1(self): paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) + var_x = paddle.to_tensor(self.input_data) out = paddle.sort(var_x, axis=-1) self.assertEqual( (np.sort( diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py index 5aaf31993448ab0ff0c69f648cfa84c62d3e198b..b0f065a26a006ee3553a84938fb5b6b2db7b3172 100644 --- a/python/paddle/fluid/tests/unittests/test_tile_op.py +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -230,13 +230,13 @@ class TestTileAPI(unittest.TestCase): def test_api(self): with fluid.dygraph.guard(): np_x = np.random.random([12, 14]).astype("float32") - x = paddle.to_variable(np_x) + x = paddle.to_tensor(np_x) positive_2 = np.array([2]).astype("int32") - positive_2 = paddle.to_variable(positive_2) + positive_2 = paddle.to_tensor(positive_2) repeat_times = np.array([2, 3]).astype("int32") - repeat_times = paddle.to_variable(repeat_times) + repeat_times = paddle.to_tensor(repeat_times) out_1 = paddle.tile(x, repeat_times=[2, 3]) out_2 = paddle.tile(x, repeat_times=[positive_2, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py index bd76edc9d8cadf14c6cf224b7708ff4acd6efef4..7c7a71a3be1b508c850048c3945f29ef7424654c 100644 --- a/python/paddle/fluid/tests/unittests/test_transformer_api.py +++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py @@ -234,23 +234,23 @@ class TestTransformer(unittest.TestCase): if cache_dict: if 'k' and 'v' in cache_dict: cache_obj = multi_head_attn.Cache( - paddle.to_variable(cache_dict['k']), - paddle.to_variable(cache_dict['v'])) + paddle.to_tensor(cache_dict['k']), + paddle.to_tensor(cache_dict['v'])) elif 'static_k' and 'static_v' in cache_dict: cache_obj = multi_head_attn.StaticCache( - paddle.to_variable(cache_dict['static_k']), - paddle.to_variable(cache_dict['static_v'])) + paddle.to_tensor(cache_dict['static_k']), + paddle.to_tensor(cache_dict['static_v'])) if attn_mask is not None: attn_output = multi_head_attn( - paddle.to_variable(query), - paddle.to_variable(key), - paddle.to_variable(value), - paddle.to_variable(attn_mask), cache_obj) + paddle.to_tensor(query), + paddle.to_tensor(key), + paddle.to_tensor(value), + paddle.to_tensor(attn_mask), cache_obj) else: attn_output = multi_head_attn( - paddle.to_variable(query), - paddle.to_variable(key), - paddle.to_variable(value), attn_mask, cache_obj) + paddle.to_tensor(query), + paddle.to_tensor(key), + paddle.to_tensor(value), attn_mask, cache_obj) attn_output = attn_output[0] if cache_dict else attn_output # implementation by numpy @@ -296,16 +296,16 @@ class TestTransformer(unittest.TestCase): attn_dropout, act_dropout) encoder_output = encoder_layer( - paddle.to_variable(src), - paddle.to_variable(src_mask)) # paddle.to_variable(src_mask)) + paddle.to_tensor(src), + paddle.to_tensor(src_mask)) # paddle.to_tensor(src_mask)) # 4.numpy: # paddle self attention self_attn = MultiHeadAttention( d_model, n_head, dropout=attn_dropout) attn_output = self_attn( - paddle.to_variable(src), - paddle.to_variable(src), - paddle.to_variable(src), paddle.to_variable(src_mask)).numpy() + paddle.to_tensor(src), + paddle.to_tensor(src), + paddle.to_tensor(src), paddle.to_tensor(src_mask)).numpy() src = attn_output + residual src_norm = layer_norm(src, d_model, encoder_layer.norm1) @@ -348,13 +348,13 @@ class TestTransformer(unittest.TestCase): cache_objs = None if cache: cache_objs = decoder_layer.gen_cache( - paddle.to_variable(memory)) + paddle.to_tensor(memory)) decoder_output = decoder_layer( - paddle.to_variable(tgt), - paddle.to_variable(memory), - paddle.to_variable(tgt_mask), - paddle.to_variable(memory_mask), cache_objs) + paddle.to_tensor(tgt), + paddle.to_tensor(memory), + paddle.to_tensor(tgt_mask), + paddle.to_tensor(memory_mask), cache_objs) decoder_output = decoder_output[0].numpy( ) if cache else decoder_output.numpy() @@ -365,10 +365,10 @@ class TestTransformer(unittest.TestCase): self_attn_cache = cache_objs[ 0] if cache_objs is not None else None tgt = self_attn( - paddle.to_variable(tgt), - paddle.to_variable(tgt), - paddle.to_variable(tgt), - paddle.to_variable(tgt_mask), self_attn_cache) + paddle.to_tensor(tgt), + paddle.to_tensor(tgt), + paddle.to_tensor(tgt), + paddle.to_tensor(tgt_mask), self_attn_cache) tgt = tgt[0].numpy() if cache else tgt.numpy() @@ -380,10 +380,10 @@ class TestTransformer(unittest.TestCase): cross_attn_cache = cache_objs[ 1] if cache_objs is not None else None tgt = cross_attn( - paddle.to_variable(tgt_norm), - paddle.to_variable(memory), - paddle.to_variable(memory), - paddle.to_variable(memory_mask), cross_attn_cache) + paddle.to_tensor(tgt_norm), + paddle.to_tensor(memory), + paddle.to_tensor(memory), + paddle.to_tensor(memory_mask), cross_attn_cache) tgt = tgt[0].numpy() if cache else tgt.numpy() # postprocess @@ -416,7 +416,7 @@ class TestTransformer(unittest.TestCase): encoder = TransformerEncoder(encoder_layer, num_layers) # src, src_mask enc_output = encoder( - paddle.to_variable(src), paddle.to_variable(src_mask)) + paddle.to_tensor(src), paddle.to_tensor(src_mask)) def test_decoder(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( @@ -438,9 +438,9 @@ class TestTransformer(unittest.TestCase): decoder = TransformerDecoder(decoder_layer, num_layers) output = decoder( - paddle.to_variable(tgt), - paddle.to_variable(memory), - paddle.to_variable(tgt_mask), paddle.to_variable(memory_mask)) + paddle.to_tensor(tgt), + paddle.to_tensor(memory), + paddle.to_tensor(tgt_mask), paddle.to_tensor(memory_mask)) def test_transformer(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( @@ -453,24 +453,24 @@ class TestTransformer(unittest.TestCase): n_head, dim_feedforward=dim_feedforward, dropout=dropout) - src = paddle.to_variable( + src = paddle.to_tensor( np.random.rand(batch_size, source_length, d_model).astype( "float32")) - tgt = paddle.to_variable( + tgt = paddle.to_tensor( np.random.rand(batch_size, target_length, d_model).astype( "float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf - src_mask = paddle.to_variable(src_mask) + src_mask = paddle.to_tensor(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 - tgt_mask, memory_mask = paddle.to_variable( - tgt_mask), paddle.to_variable(memory_mask) + tgt_mask, memory_mask = paddle.to_tensor( + tgt_mask), paddle.to_tensor(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 6bc42f0712a1a8c9f9a0640e06042c42e7cc948f..c4155e0d8260fe1fdc4a0e49e955fc2bbff0fc89 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -424,10 +424,10 @@ class TestCTCLossAPICase(unittest.TestCase): loss_np = ctc.forward() paddle.disable_static() - softmax = paddle.to_variable(logits) - labels = paddle.to_variable(labels) - logits_length = paddle.to_variable(self.logits_length) - labels_length = paddle.to_variable(self.labels_length) + softmax = paddle.to_tensor(logits) + labels = paddle.to_tensor(labels) + logits_length = paddle.to_tensor(self.logits_length) + labels_length = paddle.to_tensor(self.labels_length) loss_pd_mean = F.ctc_loss( softmax, labels, @@ -477,10 +477,10 @@ class TestCTCLossAPICase(unittest.TestCase): loss_np = ctc.forward() paddle.disable_static() - softmax = paddle.to_variable(logits) - labels = paddle.to_variable(labels) - logits_length = paddle.to_variable(self.logits_length) - labels_length = paddle.to_variable(self.labels_length) + softmax = paddle.to_tensor(logits) + labels = paddle.to_tensor(labels) + logits_length = paddle.to_tensor(self.logits_length) + labels_length = paddle.to_tensor(self.labels_length) loss_pd = paddle.nn.CTCLoss(self.blank, 'none')( softmax, labels, logits_length, labels_length) diff --git a/python/paddle/inference/__init__.py b/python/paddle/inference/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c388301ec3408e436eacb2567e8e529d0bbc03bb --- /dev/null +++ b/python/paddle/inference/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..fluid.inference import Config, DataType, PlaceType, PrecisionType, Tensor, \ + Predictor, create_predictor, get_version, get_num_bytes_of_data_type, PredictorPool diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 91a2a78203cbc50fec27b4f3ae8d3541ac4ec5da..8ee4d73ea847ea116ea4401b5b05ef1b925950fe 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -62,6 +62,22 @@ def cache(reader): Returns: generator: a decorated reader object which yields data from cached memory. + + Examples: + .. code-block:: python + + import paddle + + def reader(): + for i in range(3): + yield i + + # All data is cached into memory + cached_reader = paddle.io.cache(reader) + + # Output: 0 1 2 + for i in cached_reader(): + print(i) """ all_data = tuple(reader()) @@ -296,12 +312,28 @@ def buffered(reader, size): buffer. Reading from the buffered data reader will proceed as long as the buffer is not empty. - :param reader: the data reader to read from. - :type reader: callable - :param size: max buffer size. - :type size: int + Args: + reader(generator): the data reader to read from. + size(int): max buffer size. + + Returns: + generator: the buffered data reader. + + Examples: + .. code-block:: python - :returns: the buffered data reader. + import paddle + + def reader(): + for i in range(3): + yield i + + # Create a buffered reader, and the buffer size is 2. + buffered_reader = paddle.io.buffered(reader, 2) + + # Output: 0 1 2 + for i in buffered_reader(): + print(i) """ class EndSignal(): diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 9de407841fb461713d00f997afdf33a38a531245..dc6a04a4723bd92dbe1c76fce5b3e52981136211 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -53,7 +53,7 @@ __all__ = [ 'shard_index', 'slice', 'split', - 'chunk' + 'chunk', 'squeeze', 'stack', 'strided_slice', diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index e1bc65a5d15c2883e14d20c5e06c2ee3cd726ea5..6fb73b08c11b417332b064df7408e78ed390cc2f 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -8,6 +8,10 @@ foreach(TEST_OP ${DIST_TEST_OPS}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) endforeach() +# disable test_pretrained_model and test_vision_models +list(REMOVE_ITEM TEST_OPS test_pretrained_model) +list(REMOVE_ITEM TEST_OPS test_vision_models) + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/setup.py.in b/python/setup.py.in index d85a23a5edd31f77514b468731097759f47533c1..467c5cb86779b80e51794cf800226d64534e8676 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -155,6 +155,7 @@ packages=['paddle', 'paddle.distributed.fleet.utils', 'paddle.framework', 'paddle.jit', + 'paddle.inference', 'paddle.fluid', 'paddle.fluid.inference', 'paddle.fluid.dygraph', diff --git a/tools/enforce/count_enforce_by_file.sh b/tools/enforce/count_enforce_by_file.sh index 1858bd0fd17aac7273318ddbb37fc0d9c512f48d..c1e2903c092ce4124c55566679e081dbe3a03445 100644 --- a/tools/enforce/count_enforce_by_file.sh +++ b/tools/enforce/count_enforce_by_file.sh @@ -57,7 +57,14 @@ FILE_WHITE_LIST="\ random_crop_op.h \ elementwise_op_function.cu.h \ fused_elemwise_activation_op.cc \ - auc_op.cu" + auc_op.cu \ + unsqueeze_op.h \ + unsqueeze_op.cc \ + enforce.h \ + errors_test.cc \ + cross_entropy.cu \ + cross_entropy.h \ + unpooling.cu" function count_file_recursively(){ dir_name=$1 diff --git a/tools/wlist.json b/tools/wlist.json index 20f6a9cbaedb391995b3757612ec24f2061a8a81..5591f90da4ba807871663e56fe4e3b11bf2fbd8f 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -105,8 +105,6 @@ "convert_dist_to_sparse_program", "load_persistables_for_increment", "load_persistables_for_inference", - "cache", - "buffered", "xmap_readers", "Metric.reset", "Metric.update",