提交 e70b1727 编写于 作者: Q Qiao Longfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add-async-ssa-graph-executor

...@@ -42,12 +42,6 @@ repos: ...@@ -42,12 +42,6 @@ repos:
entry: bash ./tools/codestyle/pylint_pre_commit.hook entry: bash ./tools/codestyle/pylint_pre_commit.hook
language: system language: system
files: \.(py)$ files: \.(py)$
- repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
hooks:
- id: go-fmt
types:
- go
- repo: local - repo: local
hooks: hooks:
- id: copyright_checker - id: copyright_checker
......
...@@ -39,8 +39,10 @@ IF(WIN32) ...@@ -39,8 +39,10 @@ IF(WIN32)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib) SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll) SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll) SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
ELSE() ELSE()
SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE) #TODO(intel-huying):
# Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
......
...@@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs) ...@@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(NGRAPH_PROJECT "extern_ngraph") SET(NGRAPH_PROJECT "extern_ngraph")
SET(NGRAPH_GIT_TAG "20bd8bbc79ae3a81c57313846a2be7313e5d1dab") SET(NGRAPH_GIT_TAG "a444f7a959b7d87f2c117c9b57a4c387759e481e")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph) SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph) SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include) SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
...@@ -69,7 +69,7 @@ ExternalProject_Add( ...@@ -69,7 +69,7 @@ ExternalProject_Add(
CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR} CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
) )
......
...@@ -153,7 +153,11 @@ function(op_library TARGET) ...@@ -153,7 +153,11 @@ function(op_library TARGET)
# pybind USE_OP_DEVICE_KERNEL for CUDNN # pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len) list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0) if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN # pybind USE_OP_DEVICE_KERNEL for MIOPEN
...@@ -168,6 +172,9 @@ function(op_library TARGET) ...@@ -168,6 +172,9 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op") elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
else() else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
endif() endif()
......
paddle.fluid.Program.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Program.block ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', 'af5346376065ff4cf6832a8ac0ae0945'))
paddle.fluid.Program.clone ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'ebb7765b2962bd2be041d19720e49d0f'))
paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5e162d3bf8dd625703463d9e4be36adb'))
paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'cfb7e05a002b2e64650778cabde7301c'))
paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1c8647b14fe57c7824b1c9562394dd3c'))
paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None) paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5'))
paddle.fluid.Program.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15'))
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd'))
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659'))
paddle.fluid.program_guard ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6'))
paddle.fluid.name_scope ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2'))
paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None) paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '78e512cabeda9c7f42cb7c7e88967ae7'))
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)) paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45'))
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0'))
paddle.fluid.scope_guard ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None) paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2'))
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c'))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff'))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)) paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4'))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)) paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '2cb4bd74481861345c70228a0f57620c'))
paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None) paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '8e7bb21e83ff4604f5b379672e285b94'))
paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None) paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '368f638b99f1dfe59e9b02aa6f077752'))
paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4294493e31c4bc9fc4bd48753044235f'))
paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21'))
paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766'))
paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690'))
paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')) paddle.fluid.AsyncExecutor.__init__ (ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.config_distributed_nodes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4810dbe1870452f16b3c60b6c5fd1459'))
paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)) paddle.fluid.AsyncExecutor.download_data (ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)), ('document', '799a2066cc26819f1ed31f47c15ad083'))
paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.get_instance (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f8688f76a2db1243c7097a60c507b182'))
paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '504f39be2007404a17e5cabea1256c7d'))
paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_server (ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None), ('document', 'c403ab46c5d3ef25c0f7e94ae75dcb68'))
paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.init_worker (ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None), ('document', 'dcf08f4bf2f3282acf11391f5d39c536'))
paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)) paddle.fluid.AsyncExecutor.run (ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)), ('document', '848fc53484e8326f6325feea87fe955c'))
paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.save_model (ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None), ('document', 'c8ac0dfcb3b187aba25d03af7fea56b2'))
paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.AsyncExecutor.stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5f23d043607bb5d55e466ec3f578e093'))
paddle.fluid.CompiledProgram.__init__ ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None) paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.CompiledProgram.with_data_parallel ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'e1af7fd53cf868554f312779fc803864'))
paddle.fluid.CompiledProgram.with_inference_optimize ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None) paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8'))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b55d6193a1d4198d45b013fc5779e1f2'))
paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.save_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '3a7a99abac3e1bf898871fe609354218'))
paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9141bb5f32caf7975eb3fd88c8a1b2da'))
paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95'))
paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2'))
paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2'))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)) paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '582d87b8df75a5a639a107db8ff86f9c'))
paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb'))
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)) paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)) paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)) paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)) paddle.fluid.initializer.TruncatedNormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)) paddle.fluid.initializer.XavierInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.initializer.BilinearInitializer.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd389912dc079cbef432335a00017cec0'))
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)) paddle.fluid.initializer.MSRAInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)), ('document', '53c757bed9345f2ad3361902531e7cf5'))
paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.initializer.force_init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6d0f3e22c90d9d500d36ff57daf056ee'))
paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.initializer.init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'a6d7011ca3d8c0d454dac3a56eae0c29'))
paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None) paddle.fluid.initializer.NumpyArrayInitializer.__init__ (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)) paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)), ('document', '1929058262994f212620599c63aea6bd'))
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')) paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '89c2c55a0b0656b106064048e068e77a'))
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)) paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', 'dfbb624f85015df29e994ca6999e8ff6'))
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)) paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'b4b608b986eb9617aa0525e1be21d32d'))
paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)) paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '4ec4845fd7d991bcac822f8b0dfc101f'))
paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)) paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', 'e0e2439f7af069b57badca18a6ba60b8'))
paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)), ('document', '7c49ef4bbf0adfd4b9a1d98e2e5f3fea'))
paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '7642373ab65d3fc3b96d16d10fef1538'))
paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', 'd740824aa7316b807c4b4a3c6c8c0bbe'))
paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)) paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '025b364dafb4b7975c801eb33e7831a1'))
paddle.fluid.layers.bpr_loss ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '30add751a0f99347a6257634c03ff254'))
paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', '44b6eef4a0f2bc15f7d9745782406736'))
paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ee152a7ba3036e7b9ede9184545179b4'))
paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)) paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)), ('document', 'b6543768e1afaa2ecb869709d6e9c7e2'))
paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '8ca6121acd6d23cd8806a93f493c2e17'))
paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b'))
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8'))
paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4'))
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b'))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa'))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625'))
paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95'))
paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)) paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '120f4323a3d7ed9c0916f15a59f0e497'))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)) paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', 'c527b71b8a4c60dca8df8a745c2b598d'))
paddle.fluid.layers.data_norm ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'use_mkldnn', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, False, None, None, None, False)) paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', 'e45e09e65a2658e07cad987222f0d9ab'))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b0b8d53821716cd50c42e09b593f3feb'))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '03993955ab1e6d3044c44e6f17fc85e9'))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'ec113c6a3686ac94f8fccd1a7953d445'))
paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)) paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '79c375214fa427faac504043d162dae9'))
paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d2611f84ab364c5da545e6a82f1770a'))
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6a1adf3067b20f6e4bcb354d71c19184'))
paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_unpad (ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd12803c903c99aa36ec03aaac5f0cc5b'))
paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)) paddle.fluid.layers.lstm_unit (ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)), ('document', '027723966f3ef0d7bc598f22287a96cc'))
paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_sum (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'b69998ce3ff4980fb21da0df05565f1b'))
paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd4d80dd98a1a5839f41eeb3a0f85f370'))
paddle.fluid.layers.reduce_max ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4'))
paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c'))
paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)) paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca'))
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3'))
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce'))
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6'))
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')) paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'dc7042734c6d8b8ce97321f017f01d6f'))
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)) paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6'))
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2'))
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571'))
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)) paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990'))
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)) paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9'))
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32'))
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)) paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab'))
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'a10ab9bf88d4a7e328882d411abb6fd1'))
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1feac48b843d679db82312dc85885f4'))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)) paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', '3ce01160ede80b1c26f776f8fef9340f'))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)) paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', 'fddad4896dee5193e1cdf70882c2a347'))
paddle.fluid.layers.sampled_softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)) paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', '5db30b8a74e8c93687943a3e8d221da0'))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)) paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d'))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)) paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996'))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e'))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee'))
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e'))
paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)) paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b'))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False)) paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, True, False)), ('document', 'bce1b75e3d95b75cacd1099655cbb3c3'))
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c6b175d253c55baf4b9c0eca9b1dda88'))
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None), ('document', '6148b6a555cbfb62fdcd030d8982c18c'))
paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)) paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '3f6c828594720c9b2da89c464be94478'))
paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)) paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', '323c019f257e55ddea4a824a362de62f'))
paddle.fluid.layers.squeeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3229d06517f794e86ca3da14c38b1465'))
paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'bbd62da391b1df984a1909d069a759b2'))
paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.lod_reset (ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f122194c562bd674f6ecdccf33785f99'))
paddle.fluid.layers.lrn ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)) paddle.fluid.layers.lrn (ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)), ('document', '0795e9940e42dcd62953514ff7e09f77'))
paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)) paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '2f28153bdd2d5ea6f7bad5867bd03eeb'))
paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)) paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', 'd2e1f45fef51b2c214e3f2aa8976c46c'))
paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)) paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '70c113658102a11cc5d8e3d45145737a'))
paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)) paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97'))
paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)) paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1'))
paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)) paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d'))
paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)) paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', 'b3ecb819454832885c1f0f3ab9a5b938'))
paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)) paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7'))
paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)) paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'e4fb4ed511b2293b8f04f7e872afbfd7'))
paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)) paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '735fa9758a6d7ff3b47d7b827f961c1d'))
paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2'))
paddle.fluid.layers.scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342'))
paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b'))
paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c9ab9e460ef0a1823249935a30e82c66'))
paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', '35cbbdfa585d027bb490707c95a176b9'))
paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '866ffa1cc93f29e23662b526a7596537'))
paddle.fluid.layers.selu ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '9044c7fe667b76cb2d9264f2db11f417'))
paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.log (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '98247c59d1c9b40af6730001b2aea73d'))
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.crop (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '883104791204d3127e24234bb630b2e7'))
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.rank_loss (ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c542e39ac6add24a6bef6e79bf5617e2'))
paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)) paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '6d19dcc19917080b7ff3e03bde451bc8'))
paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) paddle.fluid.layers.elu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '463258ee9f8b60760eb1e26357cc9bfa'))
paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)) paddle.fluid.layers.relu6 (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)), ('document', '6f367339caf6c7124bc262fe1475df70'))
paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'a5117c1eb84aca2ac0b0abab337a4799'))
paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)) paddle.fluid.layers.stanh (ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)), ('document', '959936a477efc6c1447a9c8bf8ce94bb'))
paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)) paddle.fluid.layers.hard_sigmoid (ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)), ('document', 'c82059b6fea1aa730f9aac911807b756'))
paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) paddle.fluid.layers.swish (ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'ef745e55a48763ee7b46b21a81dc7e84'))
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.prelu (ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f6acef7ff7d887e49ff499fbb1dad4a9'))
paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)) paddle.fluid.layers.brelu (ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)), ('document', '3db337c195e156e6ef2b8b4a57113600'))
paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)) paddle.fluid.layers.leaky_relu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)), ('document', 'f878486c82b576938151daad0de995a0'))
paddle.fluid.layers.soft_relu ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)) paddle.fluid.layers.soft_relu (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)), ('document', '869adce548c342d6cc1bd88a948d83c9'))
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.flatten (ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'cb295c13cb957db85cd9609269d7784d'))
paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)) paddle.fluid.layers.sequence_mask (ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)), ('document', 'f0dd6eddd3bff015a3c05269d82fcbd8'))
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) paddle.fluid.layers.stack (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '367cfbb642839beacb5d117e2d2b4041'))
paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)) paddle.fluid.layers.pad2d (ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)), ('document', '7f4d46320cc077ca2e8db600c35f4030'))
paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.unstack (ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)), ('document', '98eb9d633116efcfc6f90c114bd44fd6'))
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.sequence_enumerate (ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'f6028537085dc296103bbbd85fa7763d'))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.expand (ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '117d3607d1ffa0571835bbaebc7857ff'))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_concat (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3a1d155dd1bf6e72a0a3e3e1519591d1'))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)) paddle.fluid.layers.scale (ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)), ('document', '30190413b2fa442e7466d6cf2ce5ea07'))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_add (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '6bfbe72cbadc95ac7ab88c05ed5bf9f0'))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_div (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'cc6e6cc1cb942a152dde3ef08d5f165c'))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_sub (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'a12abdab09c3e57af5a6e1e9f138684a'))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_mul (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '422c77dbfcff355a57b5fdd4ec876daa'))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_max (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'f0bb0b2c454541cfafa761021a5cc776'))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_min (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '8a9cdefefbccbf9f6b0991c0946a21e9'))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)) paddle.fluid.layers.elementwise_pow (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '1aea4e197c552a284f83888a3c67a32e'))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)) paddle.fluid.layers.uniform_random_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)), ('document', '129e0a3257f1d532a948eedf9d5bf671'))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')) paddle.fluid.layers.gaussian_random (ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '389dafe36e099841b6a7fb18d11f1b4c'))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')) paddle.fluid.layers.sampling_id (ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '840fdac643d1341c1cae218d4511dbb9'))
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')) paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '840026b4766613c5705e06563cd103b6'))
paddle.fluid.layers.sum ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860'))
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a'))
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8'))
paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42'))
paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012'))
paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25'))
paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.logical_not (ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cd1c8cf31e040427d4e05711044caeb6'))
paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.clip (ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b020b7aab59719be98a4ae229a76deba'))
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.clip_by_norm (ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1ea0bc5a926f427458c4254ca022749'))
paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.mean (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd638d915195ce86a8d7963b81110d4c8'))
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)) paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)), ('document', 'ccd37fa6b53f074adbfb732d738c4c2d'))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)) paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '180c284317ea45ef89a460d8d79c0b72'))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '891870d069a6aea746d34cc53b61690c'))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e'))
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed'))
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f'))
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)) paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7'))
paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35'))
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007'))
paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d'))
paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)) paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', '4b5a2341023afe63157a066c14254f98'))
paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '4b9c2e8af5817937d831820874b5aa77'))
paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'aa7540a0fa73ff69a02e11b4091aab75'))
paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'dc63315b84f591ac79ecca0c3632027a'))
paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6'))
paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)) paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932'))
paddle.fluid.layers.shuffle_channel ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949'))
paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb'))
paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d'))
paddle.fluid.layers.teacher_student_sigmoid_loss ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)) paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99'))
paddle.fluid.layers.huber_loss ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7'))
paddle.fluid.layers.tree_conv ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)) paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc'))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e'))
paddle.fluid.layers.shuffle ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca'))
paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0'))
paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3'))
paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff'))
paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '13dabc57863f62ab3141586784ee356b'))
paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)) paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '350f74d93fab9adb2ac4950f1c26416b'))
paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.outputs ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None) paddle.fluid.layers.Preprocessor.outputs (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.load ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.load (ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d1a4bc97bbce9fa1d4f7a4200a771ff'))
paddle.fluid.layers.create_tensor ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)) paddle.fluid.layers.create_tensor (ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)), ('document', 'c0c3d0194f83fff8ea99ce0820657dae'))
paddle.fluid.layers.create_parameter ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)) paddle.fluid.layers.create_parameter (ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'd62b866c899bc1fedb5385f95b88e1f8'))
paddle.fluid.layers.create_global_var ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)) paddle.fluid.layers.create_global_var (ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)), ('document', 'ab914fac893607e29ac6e52bbdbea1a4'))
paddle.fluid.layers.cast ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.cast (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '60cb8f843d625abf33f8bf12455b8f99'))
paddle.fluid.layers.tensor_array_to_tensor ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.tensor_array_to_tensor (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'b12717d3d4567e6119589f7f655b0cbb'))
paddle.fluid.layers.concat ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.concat (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b19b79be4f05e85d1d6cec642c9fb535'))
paddle.fluid.layers.sums ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sums (ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', '42912092418620b4be07f36af31e7816'))
paddle.fluid.layers.assign ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.assign (ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b690184f3537df5501e4d9d8f31152a5'))
paddle.fluid.layers.fill_constant_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0)) paddle.fluid.layers.fill_constant_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0)), ('document', 'd4059a2f5763036b07018d76429f9acb'))
paddle.fluid.layers.fill_constant ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.fill_constant (ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)), ('document', '1d8b14729639fa38509c79b9784740fa'))
paddle.fluid.layers.argmin ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) paddle.fluid.layers.argmin (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '2778a1d34be49263a51211885599ea37'))
paddle.fluid.layers.argmax ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) paddle.fluid.layers.argmax (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '04114996cfb98994ba222804a1a6109f'))
paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)) paddle.fluid.layers.argsort (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '68ec45c6fb6b93e47de9c9a0945fb98e'))
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.ones (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'b402489c62e668df42e7daceb63c142b'))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.zeros (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'c155e2efc56ffa5ed4658cca0272e491'))
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None), ('document', '8ee7cb6ca639e7460e825f953b65d94d'))
paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '8f8c0306117ea441f20dcbbdba1f0ecc'))
paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8'))
paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.Switch.case (ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None), ('document', 'f7c7160014c1b46cfeda9dd5808d1789'))
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.Switch.default (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '50853ae884df03d9c36703bb46d9ef07'))
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)) paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', '73bb96ec4783ec1a11e760e8851b0e77'))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713'))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a'))
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)) paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77'))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823'))
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2'))
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.IfElse.input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None) paddle.fluid.layers.IfElse.output (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.IfElse.true_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.DynamicRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6d3e0a5d9aa519a9773a36e1620ea9b7'))
paddle.fluid.layers.DynamicRNN.memory ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')) paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d'))
paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a'))
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0'))
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '169d694d2224f62b4f3afdc3dbc19e95'))
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f'))
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)) paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7'))
paddle.fluid.layers.StaticRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) paddle.fluid.layers.StaticRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.step ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.StaticRNN.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.step_output ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.update_memory ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.reorder_lod_tensor_by_rank ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '3545f529ef04e8f6ecb76b47fa3df01a'))
paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')) paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', '5fef91b0e21c93610785f2b1f7161732'))
paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,)) paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,)), ('document', 'bbe578dbb49ad13e15b014e98c22b519'))
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '29a25ba78de79152076cacfc5443137d'))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '81ccb7acafd06c7728e11581f5d342e3'))
paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.exp (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e6b3e769413d96aab4176f96db25984b'))
paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.tanh (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9d586a0b5bd05f67ee78048f9d503b6'))
paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7'))
paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13'))
paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '072a8541e0f632366bba10f67cb0db27'))
paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd'))
paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad'))
paddle.fluid.layers.floor ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973'))
paddle.fluid.layers.cos ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.cos (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '485f2686bcc2fe37a4bd893769c8a3e2'))
paddle.fluid.layers.sin ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sin (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '01f1766aa76eff1df30147505b59f7c4'))
paddle.fluid.layers.round ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.round (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b47f5da13913d3e56bdb1e612a73f3f2'))
paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'cc6ac2f14f03c52aaa83a59bf83b8d26'))
paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee'))
paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a'))
paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148'))
paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)) paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', '308b619af849caa82bbc31e897f5e641'))
paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e'))
paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926'))
paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a'))
paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)) paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8'))
paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)) paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9'))
paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)) paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'a6ab47a2fe681e52fabb7057ddf0efdd'))
paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1'))
paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c'))
paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1'))
paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)) paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '6d5028fd09d01ab82d296adc0ea95aee'))
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')) paddle.fluid.layers.detection_map (ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')), ('document', '1467d91b50c22cd52103b4aa1ee9d0a1'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)) paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '1dddef3eb4b3cbd4df8e03ac480dbf97'))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)) paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '23337cc57bbf5be73884b6bd0f849603'))
paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', '5761f9ed83654314416e24372b33bb84'))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)), ('document', '87863717edeb7fe87a1268976cbc015d'))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '57ab49f3f324f310b7eed322e7c1057a'))
paddle.fluid.layers.generate_mask_labels ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'f73706a65468e9ca3e0bee4a31521b0a'))
paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1'))
paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)) paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e'))
paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b'))
paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691'))
paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)) paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) paddle.fluid.layers.accuracy (ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)), ('document', '9808534c12c5e739a10f73ebb0b4eafd'))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)) paddle.fluid.layers.auc (ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)), ('document', 'e0e95334fce92d16c2d9db6e7caffc47'))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.exponential_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '98a5050bee8522fcea81aa795adaba51'))
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.natural_exp_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '676a7bc2a218691db50bca233903d21e'))
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', 'd07e767d59c4a5e6c930f3e6756d3f82'))
paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)) paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40'))
paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae'))
paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')) paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.contrib.StateCell.__init__ ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.compute_state ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.get_input ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99'))
paddle.fluid.contrib.StateCell.get_state ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.get_input (ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None), ('document', '6f24a007cfa184e32f01a960703bfd70'))
paddle.fluid.contrib.StateCell.out_state ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.get_state (ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None), ('document', '630a4945cfe659ea4f307598fbbce5d2'))
paddle.fluid.contrib.StateCell.set_state ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.out_state (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7ad681dff0393ddf13a724194e720f28'))
paddle.fluid.contrib.StateCell.state_updater ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.set_state (ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None), ('document', 'd4e0e08cd5d9d9a571cbc52d114f5ae9'))
paddle.fluid.contrib.StateCell.update_states ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.StateCell.state_updater (ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None), ('document', 'd5afe1b7665d94fb023b15cf913ca510'))
paddle.fluid.contrib.TrainingDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.StateCell.update_states (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'fe0b0f1338723516a35a30247899c81b'))
paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.TrainingDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.TrainingDecoder.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) paddle.fluid.contrib.TrainingDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf'))
paddle.fluid.contrib.TrainingDecoder.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.TrainingDecoder.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'f0a457dee586559036202087ce2eff69'))
paddle.fluid.contrib.TrainingDecoder.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.TrainingDecoder.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'a024c72664fe815068423ba630b7658a'))
paddle.fluid.contrib.BeamSearchDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)) paddle.fluid.contrib.TrainingDecoder.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '4659db7a888a2495e71c1838a0483909'))
paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf'))
paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.decode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1e47c60f080c1343ebb6ceaef89656b2'))
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)) paddle.fluid.contrib.BeamSearchDecoder.early_stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a84a7454ed6707f79b9e954d92a7575'))
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'aa89eb8fd5e4cabaf5cc1bcae14665a4'))
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7'))
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47'))
paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)) paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa'))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
paddle.fluid.contrib.Calibrator.__init__ ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
paddle.fluid.contrib.Calibrator.sample_data ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.Calibrator.save_int8_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
paddle.fluid.contrib.reader.ctr_reader.ctr_reader ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) paddle.fluid.contrib.build_compressor (ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.CompressPass.__init__ (ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.CompressPass.apply ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.CompressPass.add_strategy (ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None), ('document', '3bf6010b6f47d3c86df0ec8957be95e0'))
paddle.fluid.contrib.ImitationGraph.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.CompressPass.apply (ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None), ('document', 'a92bf85d4b59bd4f2ac1706d7c4899a6'))
paddle.fluid.contrib.ImitationGraph.all_parameters ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.ImitationGraph.__init__ (ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.__init__ ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)) paddle.fluid.contrib.ImitationGraph.all_parameters (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.__init__ (ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.MagnitudePruner.__init__ ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.MagnitudePruner.prune ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.RatioPruner.__init__ ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.RatioPruner.prune ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d'))
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645'))
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7'))
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196'))
paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8'))
paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)) paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e'))
paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634'))
paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.HDFSClient.is_dir (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', '45bde1bae02605a205c8245b58b9156d'))
paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.HDFSClient.is_exist (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', 'be9c94bccff7ba0c1d95883ac62b5864'))
paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)) paddle.fluid.contrib.HDFSClient.ls (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '808acac504870c7e46594b95674f8a86'))
paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.HDFSClient.lsr (ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)), ('document', 'fae835aa3354eb6a0434c0f9ba3c2747'))
paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.HDFSClient.make_local_dirs (ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None), ('document', 'e76b89c8e7f019b5da576c0026fcf689'))
paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.contrib.HDFSClient.makedirs (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '44d9972aae390aedf40aaea731a37e4b'))
paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)) paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)), ('document', '0eb133644d9a9f4da45bb39261ff0955'))
paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)) paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665'))
paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)) paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c'))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)) paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4'))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4'))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.HashName.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.HashName.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)) paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', 'e0f67f35abf27f666f81003113b90244'))
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)) paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', '48c434dd7bb827f69d90e5135d77470f'))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '08c1c57e1db6b20bf87b264cb7cf3ca8'))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)) paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', '921714c9bfb351b41403418265393203'))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)) paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970'))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.SGDOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.SGDOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)) paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)) paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)) paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)) paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamaxOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)) paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)) paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.FtrlOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.FtrlOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)) paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.RMSPropOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdadeltaOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)) paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715'))
paddle.fluid.optimizer.ModelAverage.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.ModelAverage.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe'))
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)) paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.LarsMomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1a79bd7d10ae54ca763ec81bca36ba24'))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)) paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)) paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core.LoDTensor) -> None paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]] paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
...@@ -482,38 +483,38 @@ paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray ...@@ -482,38 +483,38 @@ paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray
paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)) paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)) paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)) paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753'))
paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None) paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca'))
paddle.fluid.DataFeeder.feed_parallel ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85'))
paddle.fluid.clip.ErrorClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.clip.GradientClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByNorm.__init__ ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None) paddle.fluid.clip.GradientClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByGlobalNorm.__init__ ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)) paddle.fluid.clip.GradientClipByGlobalNorm.__init__ (ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.profiler.cuda_profiler ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.profiler.cuda_profiler (ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)), ('document', '2e2fb1cfc469a67f19fb578a2ed6be79'))
paddle.fluid.profiler.reset_profiler ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.profiler.reset_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '397ce757fabbe5c622e0c3458c41fcd0'))
paddle.fluid.profiler.profiler ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')) paddle.fluid.profiler.profiler (ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'bd3a07eeb68e384f4d2d416cb2e28d86'))
paddle.fluid.profiler.start_profiler ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None) paddle.fluid.profiler.start_profiler (ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None), ('document', '88da8fb6dbebaee2f7520188a09574f9'))
paddle.fluid.profiler.stop_profiler ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')) paddle.fluid.profiler.stop_profiler (ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'a7500e39dd033f1e64f562e909333a8a'))
paddle.fluid.unique_name.generate ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None) paddle.fluid.unique_name.generate (ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.unique_name.guard ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)) paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', '65c7523e86f0c50bb729b01667f36310'))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)) paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7'))
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None) paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d'))
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None) paddle.reader.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb'))
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None) paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '884291104e1c3f37f33aae44b7deeb0d'))
paddle.reader.chain ArgSpec(args=[], varargs='readers', keywords=None, defaults=None) paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4'))
paddle.reader.shuffle ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None) paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d'))
paddle.reader.firstn ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None) paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad'))
paddle.reader.xmap_readers ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)) paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560'))
paddle.reader.PipeReader.__init__ ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')) paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.reader.PipeReader.get_line ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')) paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69'))
paddle.reader.multiprocess_reader ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)) paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
paddle.reader.Fake.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.reader.creator.np_array ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada'))
paddle.reader.creator.text_file ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None) paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a'))
paddle.reader.creator.recordio ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)) paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8'))
...@@ -13,7 +13,11 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include <queue> #include <queue>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
...@@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) { ...@@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
ops_.erase(ops_.begin() + s, ops_.begin() + e); ops_.erase(ops_.begin() + s, ops_.begin() + e);
} }
void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) {
// TODO(minqiyang): make this faster
for (auto it = ops_.begin(); it != ops_.end(); ++it) {
if (it->get() == op_desc) {
ops_.erase(it);
break;
}
}
}
std::vector<OpDesc *> BlockDesc::AllOps() const { std::vector<OpDesc *> BlockDesc::AllOps() const {
std::vector<OpDesc *> res; std::vector<OpDesc *> res;
for (const auto &op : ops_) { for (const auto &op : ops_) {
...@@ -163,20 +177,6 @@ std::vector<OpDesc *> BlockDesc::AllOps() const { ...@@ -163,20 +177,6 @@ std::vector<OpDesc *> BlockDesc::AllOps() const {
return res; return res;
} }
void BlockDesc::Clear() {
// clear all ops
ops_.clear();
// clear all vars which are not persistable
for (auto it = vars_.begin(); it != vars_.end();) {
if (it->second->Persistable()) {
++it;
} else {
vars_.erase(it++);
}
}
}
void BlockDesc::Flush() { void BlockDesc::Flush() {
for (auto &op_desc : ops_) { for (auto &op_desc : ops_) {
op_desc->Flush(); op_desc->Flush();
......
...@@ -93,12 +93,12 @@ class BlockDesc { ...@@ -93,12 +93,12 @@ class BlockDesc {
*/ */
void RemoveOp(size_t s, size_t e); void RemoveOp(size_t s, size_t e);
void RemoveOpInternal(const OpDesc *op_desc);
void RemoveVar(const std::string &name) { vars_.erase(name); } void RemoveVar(const std::string &name) { vars_.erase(name); }
std::vector<OpDesc *> AllOps() const; std::vector<OpDesc *> AllOps() const;
void Clear();
size_t OpSize() const { return ops_.size(); } size_t OpSize() const { return ops_.size(); }
OpDesc *Op(int idx) const { return ops_.at(idx).get(); } OpDesc *Op(int idx) const { return ops_.at(idx).get(); }
......
...@@ -12,7 +12,9 @@ ...@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/multi_devices_helper.h"
...@@ -55,7 +57,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run( ...@@ -55,7 +57,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
std::vector<FetchOpHandle *> fetch_ops; std::vector<FetchOpHandle *> fetch_ops;
for (auto &fetch_var_name : fetch_tensors) { for (auto &fetch_var_name : fetch_tensors) {
for (auto &var_map : graph_->Get<details::GraphVars>("vars")) { for (auto &var_map : graph_->Get<details::GraphVars>(details::kGraphVars)) {
auto it = var_map.find(fetch_var_name); auto it = var_map.find(fetch_var_name);
if (it != var_map.end()) { if (it != var_map.end()) {
fetched_vars[fetch_var_name].push_back(*it->second.rbegin()); fetched_vars[fetch_var_name].push_back(*it->second.rbegin());
......
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/distributed.h"
......
...@@ -105,4 +105,5 @@ if (WITH_MKLDNN) ...@@ -105,4 +105,5 @@ if (WITH_MKLDNN)
cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor) cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass)
endif () endif ()
...@@ -44,10 +44,14 @@ struct TestIsReachable { ...@@ -44,10 +44,14 @@ struct TestIsReachable {
using func = std::function<bool(const std::string&, const std::string&)>; using func = std::function<bool(const std::string&, const std::string&)>;
auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func { auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
auto find_node = [](const std::unique_ptr<ir::Graph>& graph, auto hash = [](const Node* node) -> std::string {
const std::string& name) -> Node* { return node->Name() + std::to_string(node->id());
};
auto find_node = [&](const std::unique_ptr<ir::Graph>& graph,
const std::string& name) -> Node* {
for (auto& node : GraphTraits::DFS(*graph)) { for (auto& node : GraphTraits::DFS(*graph)) {
if (name == node.Name()) { if (name == hash(&node)) {
return &node; return &node;
} }
} }
...@@ -55,13 +59,17 @@ struct TestIsReachable { ...@@ -55,13 +59,17 @@ struct TestIsReachable {
return nullptr; return nullptr;
}; };
return [&](std::string from, const std::string to) -> bool { // update the from and to strings to hashed equivs in loop from graph traits
return [&](std::string from, std::string to) -> bool {
if (from == to) return true; if (from == to) return true;
std::map<std::string, bool> visited; std::map<std::string, bool> visited;
for (auto& node : GraphTraits::DFS(*graph)) { for (auto& node : GraphTraits::DFS(*graph)) {
visited[node.Name()] = false; auto hashed = hash(&node);
if (node.Name() == from) from = hashed;
if (node.Name() == to) to = hashed;
visited[hashed] = false;
} }
visited[from] = true; visited[from] = true;
...@@ -72,15 +80,15 @@ struct TestIsReachable { ...@@ -72,15 +80,15 @@ struct TestIsReachable {
while (!queue.empty()) { while (!queue.empty()) {
auto cur = find_node(graph, queue.front()); auto cur = find_node(graph, queue.front());
queue.pop_front(); queue.pop_front();
if (cur == nullptr) return false; if (cur == nullptr) return false;
for (auto n : cur->outputs) { for (auto n : cur->outputs) {
if (n->Name() == to) return true; auto hashed_name = hash(n);
if (hashed_name == to) return true;
if (!visited[n->Name()]) { if (!visited[hashed_name]) {
visited[n->Name()] = true; visited[hashed_name] = true;
queue.push_back(n->Name()); queue.push_back(hashed_name);
} }
} }
} }
...@@ -166,6 +174,28 @@ TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) { ...@@ -166,6 +174,28 @@ TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) {
RunPassAndAssert(&prog, "a", "relu", 1); RunPassAndAssert(&prog, "a", "relu", 1);
} }
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionProjectionAsYWithElementwiseAddRelu) {
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e", "f"},
{"bias", "weights", "bias2", "weights2"});
SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
// right branch
SetOp(&prog, "conv2d",
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
{"Output", "c"});
// left branch
SetOp(&prog, "conv2d",
{{"Input", "a"}, {"Bias", "bias2"}, {"Filter", "weights2"}},
{"Output", "f"});
SetOp(&prog, "elementwise_add", {{"X", "f"}, {"Y", "c"}}, {"Out", "d"});
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
RunPassAndAssert(&prog, "a", "relu", 2);
}
TEST(ConvElementwiseAddMKLDNNFusePass, TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionAsYWithElementwiseAddReluNoBias) { ConvolutionAsYWithElementwiseAddReluNoBias) {
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"}); auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
......
...@@ -21,7 +21,7 @@ namespace ir { ...@@ -21,7 +21,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl( std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy."; VLOG(3) << "Applies MKL-DNN placement strategy.";
const auto& op_types_list = const auto& op_types_list =
Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types"); Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
#include <gtest/gtest.h>
#include <boost/logic/tribool.hpp>
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, boost::tribool use_mkldnn) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (!boost::indeterminate(use_mkldnn)) op->SetAttr("use_mkldnn", use_mkldnn);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
op->SetInput("Bias", {inputs[2]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator use_mkldnn
// ---------------------------------------
// (a,b)->concat->c none
// (c,weights,bias)->conv->f none
// f->relu->g false
// g->pool->h false
// (h,weights2,bias2)->conv->k true
// k->relu->l true
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
"h", "weights2", "bias2", "k", "l"})) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::SELECTED_ROWS);
if (v == "weights" || v == "bias") {
var->SetPersistable(true);
}
}
SetOp(&prog, "concat", "concat1", std::vector<std::string>({"a", "b"}),
std::vector<std::string>({"c"}), boost::indeterminate);
SetOp(&prog, "conv2d", "conv1",
std::vector<std::string>({"c", "weights", "bias"}),
std::vector<std::string>({"f"}), boost::indeterminate);
SetOp(&prog, "relu", "relu1", std::vector<std::string>({"f"}),
std::vector<std::string>({"g"}), false);
SetOp(&prog, "pool2d", "pool1", std::vector<std::string>({"g"}),
std::vector<std::string>({"h"}), false);
SetOp(&prog, "conv2d", "conv2",
std::vector<std::string>({"h", "weights2", "bias2"}),
std::vector<std::string>({"k"}), true);
SetOp(&prog, "relu", "relu2", std::vector<std::string>({"k"}),
std::vector<std::string>({"l"}), true);
return prog;
}
void MainTest(std::initializer_list<std::string> mkldnn_enabled_op_types,
unsigned expected_use_mkldnn_true_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass");
pass->Set("mkldnn_enabled_op_types",
new std::unordered_set<std::string>(mkldnn_enabled_op_types));
graph = pass->Apply(std::move(graph));
unsigned use_mkldnn_true_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->HasAttr("use_mkldnn") &&
boost::get<bool>(op->GetAttr("use_mkldnn"))) {
++use_mkldnn_true_count;
}
}
}
EXPECT_EQ(use_mkldnn_true_count, expected_use_mkldnn_true_count);
}
TEST(MKLDNNPlacementPass, enable_conv_relu) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 0 pool
MainTest({"conv2d", "relu"}, 3);
}
TEST(MKLDNNPlacementPass, enable_relu_pool) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
MainTest({"relu", "pool2d"}, 4);
}
TEST(MKLDNNPlacementPass, enable_all) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
MainTest({}, 4);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(mkldnn_placement_pass);
...@@ -290,7 +290,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I, ...@@ -290,7 +290,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
"USE_OP_DEVICE_KERNEL must be in global namespace"); \ "USE_OP_DEVICE_KERNEL must be in global namespace"); \
extern int \ extern int \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name(); \ TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name(); \
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##DEFAULT_TYPE##_ = /* NOLINT */ \ UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name() TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \ #define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
......
...@@ -882,7 +882,8 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -882,7 +882,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
const RuntimeContext& ctx_; const RuntimeContext& ctx_;
}; };
static void CheckTensorNANOrInf(const std::string& name, static void CheckTensorNANOrInf(const std::string& op_type,
const std::string& name,
const framework::Tensor& tensor) { const framework::Tensor& tensor) {
if (tensor.memory_size() == 0) { if (tensor.memory_size() == 0) {
return; return;
...@@ -892,9 +893,9 @@ static void CheckTensorNANOrInf(const std::string& name, ...@@ -892,9 +893,9 @@ static void CheckTensorNANOrInf(const std::string& name,
return; return;
} }
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
"Tensor %s contains Inf", name); "Operator %s output Tensor %s contains Inf", op_type, name);
PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor), PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor),
"Tensor %s contains NAN", name); "Operator %s output Tensor %s contains NAN", op_type, name);
} }
void OperatorWithKernel::RuntimeInferShape(const Scope& scope, void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
...@@ -988,9 +989,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -988,9 +989,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto* var = exec_scope.FindVar(vname); auto* var = exec_scope.FindVar(vname);
if (var == nullptr) continue; if (var == nullptr) continue;
if (var->IsType<framework::LoDTensor>()) { if (var->IsType<framework::LoDTensor>()) {
CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>()); CheckTensorNANOrInf(type_, vname, var->Get<framework::LoDTensor>());
} else if (var->IsType<framework::SelectedRows>()) { } else if (var->IsType<framework::SelectedRows>()) {
CheckTensorNANOrInf(vname, var->Get<framework::SelectedRows>().value()); CheckTensorNANOrInf(type_, vname,
var->Get<framework::SelectedRows>().value());
} }
} }
} }
......
...@@ -24,3 +24,11 @@ limitations under the License. */ ...@@ -24,3 +24,11 @@ limitations under the License. */
#pragma pop_macro("_XOPEN_SOURCE") #pragma pop_macro("_XOPEN_SOURCE")
#pragma pop_macro("_POSIX_C_SOURCE") #pragma pop_macro("_POSIX_C_SOURCE")
#if !defined(PYBIND11_HIDDEN)
#ifdef _WIN32
#define PYBIND11_HIDDEN __declspec(dllexport)
#else
#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
#endif
#endif
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
#include <memory>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <limits> #include <limits>
#include <map> #include <map>
#include <random> #include <random>
#include <unordered_set>
#include <utility> #include <utility>
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
...@@ -139,6 +140,8 @@ class Autograd { ...@@ -139,6 +140,8 @@ class Autograd {
} }
} }
} }
ready_op->InvokeBackwardHooks();
} }
} }
...@@ -156,8 +159,10 @@ class Autograd { ...@@ -156,8 +159,10 @@ class Autograd {
for (auto it : candidate->pre_ops_) { for (auto it : candidate->pre_ops_) {
for (OpBase* pre_op : it.second) { for (OpBase* pre_op : it.second) {
if (!pre_op) continue; if (!pre_op) continue;
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- " VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id "
<< it.first << " <---- " << pre_op->op_desc_->Type(); << candidate->trace_id_ << " <---- " << it.first << " <---- "
<< pre_op->op_desc_->Type() << " trace id "
<< pre_op->trace_id_;
if (visited.find(pre_op) == visited.end()) { if (visited.find(pre_op) == visited.end()) {
visited.insert(pre_op); visited.insert(pre_op);
queue.push_back(pre_op); queue.push_back(pre_op);
...@@ -211,6 +216,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -211,6 +216,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return {}; return {};
} }
VLOG(3) << "apply op grad: " << op_desc_->Type();
std::vector<framework::VariableValueMap> grad_outputs; std::vector<framework::VariableValueMap> grad_outputs;
if (backward_id_ > 0) { if (backward_id_ > 0) {
VLOG(3) << "py_layer_grad"; VLOG(3) << "py_layer_grad";
...@@ -272,6 +278,22 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() { ...@@ -272,6 +278,22 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return input_vars_; return input_vars_;
} }
void OpBase::InvokeBackwardHooks() {
VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size();
// call backward hooks
for (py::object& callable : backward_hooks_) {
callable(this);
}
}
void OpBase::RegisterBackwardHooks(const py::object& callable) {
VLOG(3) << "Register backward hooks " << trace_id_;
// TODO(minqiyang): check the callable format
backward_hooks_.push_back(callable);
}
void VarBase::RunBackward() { void VarBase::RunBackward() {
if (!pre_op_) return; if (!pre_op_) return;
......
...@@ -123,22 +123,32 @@ class VarBase { ...@@ -123,22 +123,32 @@ class VarBase {
private: private:
VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient) VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient)
: var_desc_(nullptr), : name_(),
var_desc_(nullptr),
var_(var), var_(var),
grads_(grad), grads_(grad),
block_(nullptr),
persistable_(false),
stop_gradient_(stop_gradient), stop_gradient_(stop_gradient),
pre_op_(nullptr), pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1) {} pre_op_out_idx_(-1) {}
public: public:
virtual ~VarBase() { virtual ~VarBase() {
// TODO(minqiyang): remove var desc from block desc
if (var_) { if (var_) {
delete var_; delete var_;
var_ = nullptr;
} }
if (grads_) { if (grads_) {
delete grads_; delete grads_;
grads_ = nullptr;
} }
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
} }
inline OpBase* PreOp() const { return pre_op_; } inline OpBase* PreOp() const { return pre_op_; }
...@@ -151,6 +161,14 @@ class VarBase { ...@@ -151,6 +161,14 @@ class VarBase {
void RunBackward(); void RunBackward();
inline void ResetPreOp(OpBase* op) {
if (op == pre_op_) {
// clear pre_op info when op equals to var's pre_op
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
}
}
void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
int pre_op_out_idx, bool pre_op_stop_gradient) { int pre_op_out_idx, bool pre_op_stop_gradient) {
pre_op_ = pre_op; pre_op_ = pre_op;
...@@ -184,11 +202,15 @@ class VarBase { ...@@ -184,11 +202,15 @@ class VarBase {
return string::Sprintf("%s@IGrad", var_desc_->Name()); return string::Sprintf("%s@IGrad", var_desc_->Name());
} }
std::string name_;
framework::VarDesc* var_desc_; framework::VarDesc* var_desc_;
framework::Variable* var_; framework::Variable* var_;
VarBase* grads_; VarBase* grads_;
framework::BlockDesc* block_;
bool persistable_;
private: private:
bool stop_gradient_; bool stop_gradient_;
OpBase* pre_op_; OpBase* pre_op_;
...@@ -199,15 +221,27 @@ class VarBase { ...@@ -199,15 +221,27 @@ class VarBase {
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
* gradient. This object should be managed totally by Python intepreter. * gradient. This object should be managed totally by Python intepreter.
*/ */
class OpBase { class PYBIND11_HIDDEN OpBase {
public: public:
OpBase() OpBase()
: op_desc_(nullptr), : op_desc_(nullptr),
forward_id_(-1), forward_id_(-1),
backward_id_(-1), backward_id_(-1),
place_(platform::CPUPlace()) {} trace_id_(-1),
place_(platform::CPUPlace()),
backward_hooks_() {}
virtual ~OpBase() { virtual ~OpBase() {
// TODO(minqiyang): remove op_desc from block_desc in tracer
//
// reset all output vars' pre op
for (auto iter : output_vars_) {
for (VarBase* var : iter.second) {
var->ResetPreOp(this);
}
}
// release resource
for (framework::OpDesc* desc : grad_op_descs_) { for (framework::OpDesc* desc : grad_op_descs_) {
delete desc; delete desc;
} }
...@@ -215,6 +249,10 @@ class OpBase { ...@@ -215,6 +249,10 @@ class OpBase {
std::map<std::string, std::vector<VarBase*>> ApplyGrad(); std::map<std::string, std::vector<VarBase*>> ApplyGrad();
void RegisterBackwardHooks(const py::object& callable);
void InvokeBackwardHooks();
// One of `op_desc_` or `forward_id_` is set, not both. // One of `op_desc_` or `forward_id_` is set, not both.
// For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_. // For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
framework::OpDesc* op_desc_; framework::OpDesc* op_desc_;
...@@ -225,6 +263,7 @@ class OpBase { ...@@ -225,6 +263,7 @@ class OpBase {
// Note: each fwd op corresponds to a vector of bwd ops. // Note: each fwd op corresponds to a vector of bwd ops.
std::vector<framework::OpDesc*> grad_op_descs_; std::vector<framework::OpDesc*> grad_op_descs_;
int backward_id_; int backward_id_;
int trace_id_;
platform::Place place_; platform::Place place_;
...@@ -239,6 +278,8 @@ class OpBase { ...@@ -239,6 +278,8 @@ class OpBase {
std::vector<framework::VariableValueMap> grad_output_vars_; std::vector<framework::VariableValueMap> grad_output_vars_;
framework::BlockDesc* block_; framework::BlockDesc* block_;
std::vector<py::object> backward_hooks_;
}; };
class Layer { class Layer {
......
...@@ -14,15 +14,32 @@ ...@@ -14,15 +14,32 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include <memory>
#include <set> #include <set>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string(
tracer_profile_fname, "",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif
void CreateGradOp(const framework::OpDesc& op_desc, void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set, const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block, const std::vector<framework::BlockDesc*>& grad_sub_block,
...@@ -68,15 +85,36 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) { ...@@ -68,15 +85,36 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) {
return result; return result;
} }
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}
std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs, const VarBasePtrMap& outputs,
framework::BlockDesc* block, framework::BlockDesc* block,
const platform::Place expected_place, const platform::Place expected_place,
const bool stop_gradient) { const bool stop_gradient) {
#ifdef WITH_GPERFTOOLS
if (gTracerProfilerStarted) {
ProfilerFlush();
}
#endif
std::map<std::string, VarBase*> vars; std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_; framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type(); VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id "
<< op->trace_id_;
op_desc->InferShape(*block); op_desc->InferShape(*block);
op_desc->InferVarType(block); op_desc->InferVarType(block);
...@@ -99,11 +137,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -99,11 +137,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
if (inp->PreOp() && !inp->IsStopGradient()) { if (inp->PreOp() && !inp->IsStopGradient()) {
op->pre_ops_[it.first].push_back(inp->PreOp()); op->pre_ops_[it.first].push_back(inp->PreOp());
op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx()); op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type();
} else { } else {
op->pre_ops_[it.first].push_back(nullptr); op->pre_ops_[it.first].push_back(nullptr);
} }
VLOG(3) << "input vname " << inp->var_desc_->Name() << " " VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
<< inp->var_->IsInitialized(); << inp->var_->IsInitialized() << " stop_gradient "
<< inp->IsStopGradient();
} }
} }
...@@ -155,6 +195,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -155,6 +195,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
op->grad_input_vars_.resize(op->grad_op_descs_.size()); op->grad_input_vars_.resize(op->grad_op_descs_.size());
op->grad_output_vars_.resize(op->grad_op_descs_.size()); op->grad_output_vars_.resize(op->grad_op_descs_.size());
for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) { for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) {
framework::OpDesc* grad_op_desc = op->grad_op_descs_[i]; framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
for (auto it : grad_op_desc->Inputs()) { for (auto it : grad_op_desc->Inputs()) {
...@@ -167,7 +208,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -167,7 +208,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE(fwd_var_it != vars.end()); PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs. // Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_); grad_in_vars.push_back(fwd_var_it->second->var_);
vars_saved_for_backward.insert(it.first);
} else { } else {
VarBase* var = vars[var_it->second]; VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) { if (!var->grads_->var_->IsInitialized()) {
...@@ -177,6 +217,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, ...@@ -177,6 +217,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
// Douts. // Douts.
grad_in_vars.push_back(var->grads_->var_); grad_in_vars.push_back(var->grads_->var_);
} }
vars_saved_for_backward.insert(it.first);
} }
} }
......
...@@ -40,7 +40,7 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs); ...@@ -40,7 +40,7 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs);
class Tracer { class Tracer {
public: public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {} explicit Tracer(framework::BlockDesc* root_block);
virtual ~Tracer() {} virtual ~Tracer() {}
......
...@@ -16,6 +16,7 @@ add_subdirectory(utils) ...@@ -16,6 +16,7 @@ add_subdirectory(utils)
if (TENSORRT_FOUND) if (TENSORRT_FOUND)
add_subdirectory(tensorrt) add_subdirectory(tensorrt)
endif() endif()
# add_subdirectory(anakin)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES) get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
......
cc_library(anakin_engine SRCS engine.cc)
target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
add_subdirectory(convert)
cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void FcOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto *y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(y_v);
auto *y_t = y_v->GetMutable<framework::LoDTensor>();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
auto weight_shape = framework::vectorize2int(y_t->dims());
engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", false);
engine_->AddOpAttr(op_name, "axis", 1);
int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim);
weight_shape.push_back(1);
weight_shape.push_back(1);
Shape anakin_shape(weight_shape);
framework::LoDTensor weight_tensor;
weight_tensor.Resize(y_t->dims());
TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class FcOpConverter : public AnakinOpConverter {
public:
FcOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~FcOpConverter() {}
private:
};
static Registrar<FcOpConverter> register_fc_op_converter("fc");
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "framework/core/types.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/anakin/convert/registrar.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "saber/saber_types.h"
namespace paddle {
namespace inference {
namespace anakin {
using AnakinNvEngine =
AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
class AnakinOpConverter {
public:
AnakinOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) {}
void ConvertOp(const framework::proto::OpDesc &op,
const std::unordered_set<std::string> &parameters,
const framework::Scope &scope, AnakinNvEngine *engine,
bool test_mode = false) {
framework::OpDesc op_desc(op, nullptr);
std::string op_type = op_desc.Type();
std::shared_ptr<AnakinOpConverter> it{nullptr};
if (op_type == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
std::string Y = op_desc.Input("Y")[0];
std::cout << Y << parameters.count(Y) << std::endl;
if (parameters.count(Y)) {
it = OpRegister::instance()->Get("fc");
}
}
if (!it) {
it = OpRegister::instance()->Get(op_type);
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type);
it->SetEngine(engine);
(*it)(op, scope, test_mode);
}
void ConvertBlock(const framework::proto::BlockDesc &block,
const std::unordered_set<std::string> &parameters,
const framework::Scope &scope, AnakinNvEngine *engine) {
std::unique_lock<std::mutex> lock(mutex_);
for (auto i = 0; i < block.ops_size(); i++) {
auto &op = block.ops(i);
ConvertOp(op, parameters, scope, engine);
}
}
void SetEngine(AnakinNvEngine *engine) { engine_ = engine; }
virtual ~AnakinOpConverter() {}
protected:
bool test_mode_;
AnakinNvEngine *engine_{nullptr};
private:
std::unordered_map<std::string, AnakinOpConverter *> converters_;
framework::Scope *scope_{nullptr};
std::mutex mutex_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
struct anakin_##op_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_converter() { \
::paddle::inference:: \
Registry<paddle::inference::anakin::AnakinOpConverter>::Register< \
::paddle::inference::anakin::Converter__>(#op_type__); \
} \
}; \
anakin_##op_type__##_converter anakin_##op_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__() { \
anakin_##op_type__##_converter__.Touch(); \
return 0; \
}
#define USE_ANAKIN_CONVERTER(op_type__) \
extern int TouchConverterRegister_anakin_##op_type__(); \
static int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
TouchConverterRegister_anakin_##op_type__();
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/registrar.h"
namespace paddle {
namespace inference {
namespace anakin {
std::shared_ptr<AnakinOpConverter> OpRegister::Get(const std::string &name) {
auto it = registry_.find(name);
if (it == registry_.end()) return nullptr;
return it->second();
}
OpRegister *OpRegister::instance() {
static OpRegister factory;
return &factory;
}
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <utility>
namespace paddle {
namespace inference {
namespace anakin {
class AnakinOpConverter;
class OpRegister {
public:
OpRegister() = default;
std::shared_ptr<AnakinOpConverter> Get(const std::string &name);
static OpRegister *instance();
void OpRegisterFn(const std::string &name,
std::function<std::shared_ptr<AnakinOpConverter>()> fn) {
registry_[name] = fn;
}
private:
using RegisterFnType = std::function<std::shared_ptr<AnakinOpConverter>()>;
std::map<std::string, std::function<std::shared_ptr<AnakinOpConverter>()>>
registry_;
};
template <typename T, typename... Args>
class Registrar {
public:
Registrar(const std::string &name, Args... args) {
std::shared_ptr<AnakinOpConverter> converter =
std::make_shared<T>(std::move(args)...);
OpRegister::instance()->OpRegisterFn(name,
[converter]() { return converter; });
}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
TEST(fc_op, test) {
auto fc_converter = OpRegister::instance()->Get("fc");
ASSERT_TRUE(fc_converter != nullptr);
// Registrar<FcOpConverter> register_fc("fc");
// auto fc = std::make_shared<FcOpConverter>();
std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("mul_x", {1, 1, 1, 1});
validator.DeclParamVar("mul_y", {1, 2});
validator.DeclOutputVar("mul_out", {1, 1, 1, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("mul");
desc.SetInput("X", {"mul_x"});
desc.SetInput("Y", {"mul_y"});
desc.SetOutput("Out", {"mul_out"});
int num_flatten_dims = 3;
desc.SetAttr("x_num_col_dims", num_flatten_dims);
validator.SetOp(*desc.Proto());
validator.Execute(10);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(mul);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
/*
* Get a random float value between [low, high]
*/
float random(float low, float high) {
static std::random_device rd;
static std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(low, high);
return dist(mt);
}
void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
const platform::DeviceContext& ctx) {
auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0);
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
temp_tensor.Resize(dims);
auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
for (size_t i = 0; i < num_elements; i++) {
*(temp_data + i) = random(0., 1.);
}
TensorCopySync(temp_tensor, place, tensor);
}
/*
* Help to validate the correctness between Fluid Op and the corresponding
* anakin
* layer.
*/
class AnakinConvertValidation {
using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
public:
AnakinConvertValidation() = delete;
AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
const framework::Scope& scope)
: parameters_(parameters), scope_(scope), place_(0) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
}
// Declare a Variable as input with random initialization.
void DeclInputVar(const std::string& name,
const std::vector<int> tensor_dims) {
DeclVar(name, tensor_dims);
// should decalre anakin input here.
}
void DeclParamVar(const std::string& name, const std::vector<int> dim_vec) {
DeclVar(name, dim_vec);
}
void DeclOutputVar(const std::string& name, const std::vector<int> dim_vec) {
DeclVar(name, dim_vec);
// should declare anakin output here.
}
void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
platform::CUDADeviceContext ctx(place_);
auto* x = scope_.Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec));
RandomizeTensor(x_tensor, place_, ctx);
}
void SetOp(const framework::proto::OpDesc& desc) {
op_ = framework::OpRegistry::CreateOp(desc);
op_desc_.reset(new framework::OpDesc(desc, nullptr));
// should init anakin engine here.
Singleton<AnakinOpConverter>::Global().ConvertOp(
desc, parameters_, scope_, engine_.get(), true /*test_mode*/);
engine_->Freeze();
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_,
input);
auto t_shape = framework::vectorize2int(t.dims());
engine_->SetInputShape(input, t_shape);
}
engine_->Optimize();
engine_->InitGraph();
}
// We use the set 'neglected_output' here, because some Ops like batch norm,
// the outputs specified in the op des are only used during training,
// so we should neglect those output during inference.
void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op
platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_);
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std::map<std::string, framework::LoDTensor*> inputs;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto* var = scope_.FindVar(input);
auto tensor = var->GetMutable<framework::LoDTensor>();
inputs.insert({input, tensor});
}
std::map<std::string, framework::LoDTensor*> outputs;
std::vector<std::vector<float>> fluid_outputs;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> fluid_out;
auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out);
// size_t fluid_out_size = fluid_out.size();
/*for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
}*/
outputs.insert({output, tensor});
}
engine_->Execute(inputs, outputs);
int i_output = 0;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> anakin_out;
auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &anakin_out);
size_t anakin_out_size = anakin_out.size();
auto fluid_out = fluid_outputs[i_output++];
for (size_t i = 0; i < anakin_out_size; i++) {
LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], "
<< "fluid[" << fluid_out[i] << "]";
}
}
}
framework::Scope& scope() { return scope_; }
private:
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
cudaStream_t stream_;
std::unique_ptr<framework::OperatorBase> op_;
std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_;
platform::CUDAPlace place_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/engine.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <utility>
#include "paddle/fluid/framework/ddim.h"
using anakin::Precision;
using anakin::OpRunType;
using paddle::framework::LoDTensor;
template <typename T, Precision P, OpRunType O>
using AnakinNetT = anakin::Net<T, P, O>;
template <typename T, Precision P>
using AnakinGraphT = anakin::graph::Graph<T, P>;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(bool need_summary)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::~AnakinEngine() {}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::SetInputShape(
const std::string &name, std::vector<int> shape) {
graph_->AddOpAttr<::anakin::PTuple<int>>(name, "input_shape",
std::move(shape));
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::InitGraph() {
net_->init(*graph_);
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
const std::string &name, const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs) {
PADDLE_ENFORCE(graph_->AddOp(name, type, inputs, outputs), "Add operation.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs) {
for (const auto &input : inputs) {
auto *tensor = input.second;
auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_input = net_->get_in(input.first);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape);
anakin_input->share_from(tmp_anakin_tensor);
}
for (const auto &output : outputs) {
auto *tensor = output.second;
auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_output = net_->get_out(output.first);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape);
anakin_output->share_from(tmp_anakin_tensor);
}
net_->prediction();
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>>
AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
auto *engine = new AnakinEngine();
engine->net_ = std::move(net_->Clone());
return std::unique_ptr<AnakinEngine>(engine);
}
template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "saber/saber_types.h"
namespace anakin {
template <typename, Precision, OpRunType>
class Net;
namespace graph {
template <typename, Precision>
class Graph;
} // namespace graph
} // namespace anakin
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionType,
::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
class AnakinEngine {
public:
explicit AnakinEngine(bool need_summary = false);
~AnakinEngine();
void InitGraph();
void SetInputShape(const std::string &name, std::vector<int> shape);
void AddOp(const std::string &name, const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs);
template <typename T>
void AddOpAttr(const std::string &op_name, const std::string &attr_name,
const T &attr_value) {
PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
"Add operation's attribution.");
}
std::unique_ptr<AnakinEngine> Clone();
void Freeze();
void Optimize();
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs);
private:
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <map>
#include "framework/core/net/net.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "paddle/fluid/inference/anakin/engine.h"
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
class TestAnakinEngine : public ::testing::Test {
protected:
void SetUp() override;
void TearDown() override {}
protected:
using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
};
void TestAnakinEngine::SetUp() {
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
}
TEST_F(TestAnakinEngine, Execute) {
engine_->AddOp("op1", "Dense", {"x"}, {"y"});
engine_->AddOpAttr("op1", "out_dim", 2);
engine_->AddOpAttr("op1", "bias_term", false);
engine_->AddOpAttr("op1", "axis", 1);
std::vector<int> shape = {1, 1, 1, 2};
Shape tmp_shape(shape);
// PBlock<NV> weight1(tmp_shape);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
cpu_data[0] = 2.;
weight1->d_tensor().set_shape(tmp_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr("op1", "weight_1", *weight1);
engine_->Freeze();
// PTuple<int> input_shape = {1};
// engine_->AddOpAttr("x", "input_shape", input_shape);
engine_->SetInputShape("x", {1, 1, 1, 1});
engine_->Optimize();
engine_->InitGraph();
framework::LoDTensor x;
framework::LoDTensor y;
x.Resize({1, 1, 1, 1});
y.Resize({1, 1, 1, 2});
auto *x_data = x.mutable_data<float>(platform::CUDAPlace());
float x_data_cpu[] = {1.};
cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice);
std::map<std::string, framework::LoDTensor *> inputs = {{"x", &x}};
auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};
engine_->Execute(inputs, outputs);
auto *y_data_gpu = y_data;
float y_data_cpu[2];
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);
LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
}
} // namespace anakin
} // namespace inference
} // namespace paddle
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#endif
#include <glog/logging.h> #include <glog/logging.h>
namespace paddle { namespace paddle {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/cudnn_desc.h"
namespace paddle {
namespace operators {
using framework::Tensor;
using platform::ActivationDescriptor;
using platform::TensorDescriptor;
template <typename Functor>
class CudnnActivationKernel
: public framework::OpKernel<Functor::ElEWISE_TYPE> {
public:
void Compute(const framework::ExecutionContext& context) const override {
framework::Tensor *X, *Out;
ExtractActivationTensor(context, X, Out);
ActivationDescriptor act_desc;
TensorDescriptor x_desc, out_desc;
x_desc.set(detail::Ref(X));
out_desc.set(detail::Ref(Out));
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/cudnn_desc.h"
namespace paddle {
namespace operators {
using framework::Tensor;
using platform::ActivationDescriptor;
using platform::TensorDescriptor;
using platform::CUDADeviceContext;
template <typename T>
struct CudnnActivationFunctor {
using ELEMENT_TYPE = T;
CudnnActivationFunctor(const CUDADeviceContext& ctx, const T& c,
const cudnnActivationMode_t& m)
: ctx_(ctx), coef_(c), mode_(m) {}
void operator()(const Tensor& x, Tensor* out) {
ActivationDescriptor act_desc;
act_desc.set(mode_, coef_);
TensorDescriptor x_desc, out_desc;
x_desc.set(x);
out_desc.set(detail::Ref(out));
PADDLE_ENFORCE(platform::dynload::cudnnActivationForward(
ctx_.cudnn_handle(), act_desc.desc(),
platform::CudnnDataType<T>::kOne(), x_desc.desc(), x.data<T>(),
platform::CudnnDataType<T>::kZero(), out_desc.desc(),
out->mutable_data<T>(ctx_.GetPlace())));
}
const CUDADeviceContext& ctx_;
const T coef_;
const cudnnActivationMode_t mode_;
};
template <typename T>
struct CudnnActivationGradFunctor {
using ELEMENT_TYPE = T;
CudnnActivationGradFunctor(const CUDADeviceContext& ctx, const T& c,
const cudnnActivationMode_t& m)
: ctx_(ctx), coef_(c), mode_(m) {}
void operator()(const Tensor& x, const Tensor& out, const Tensor dout,
Tensor* dx) {
ActivationDescriptor act_desc;
act_desc.set(mode_, coef_);
TensorDescriptor x_desc, out_desc, dout_desc, dx_desc;
x_desc.set(x);
out_desc.set(out);
dout_desc.set(dout);
dx_desc.set(detail::Ref(dx));
PADDLE_ENFORCE(platform::dynload::cudnnActivationBackward(
ctx_.cudnn_handle(), act_desc.desc(),
platform::CudnnDataType<T>::kOne(), out_desc.desc(), out.data<T>(),
dout_desc.desc(), dout.data<T>(), x_desc.desc(), x.data<T>(),
platform::CudnnDataType<T>::kZero(), dx_desc.desc(),
dx->mutable_data<T>(ctx_.GetPlace())));
}
const CUDADeviceContext& ctx_;
const T coef_;
const cudnnActivationMode_t mode_;
};
template <typename T>
struct CudnnReluFunctor : public CudnnActivationFunctor<T> {
explicit CudnnReluFunctor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
};
template <typename T>
struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
};
template <typename T>
struct CudnnRelu6Functor : public CudnnActivationFunctor<T> {
explicit CudnnRelu6Functor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {}
};
template <typename T>
struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {
}
};
template <typename T>
struct CudnnSigmoidFunctor : public CudnnActivationFunctor<T> {
explicit CudnnSigmoidFunctor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
};
template <typename T>
struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
};
template <typename T>
struct CudnnTanhFunctor : public CudnnActivationFunctor<T> {
explicit CudnnTanhFunctor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
};
template <typename T>
struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
};
template <typename Functor>
class CudnnActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* X = nullptr;
framework::Tensor* Out = nullptr;
ExtractActivationTensor(context, &X, &Out);
Out->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
Functor functor(dev_ctx);
functor(detail::Ref(X), Out);
}
};
template <typename Functor>
class CudnnActivationGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor *X, *Out, *dOut;
X = Out = dOut = nullptr;
framework::Tensor* dX = nullptr;
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
dX->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
Functor functor(dev_ctx);
functor(detail::Ref(X), detail::Ref(Out), detail::Ref(dOut), dX);
}
};
} // namespace operators
} // namespace paddle
namespace plat = paddle::platform;
namespace ops = paddle::operators;
#define FOR_EACH_CUDNN_OP_FUNCTOR(__macro) \
__macro(relu, CudnnReluFunctor, CudnnReluGradFunctor); \
__macro(relu6, CudnnRelu6Functor, CudnnRelu6GradFunctor); \
__macro(sigmoid, CudnnTanhFunctor, CudnnTanhGradFunctor); \
__macro(tanh, CudnnTanhFunctor, CudnnTanhGradFunctor)
#define REGISTER_ACTIVATION_CUDNN_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_KERNEL(act_type, CUDNN, plat::CUDAPlace, \
ops::CudnnActivationKernel<ops::functor<float>>, \
ops::CudnnActivationKernel<ops::functor<double>>); \
REGISTER_OP_KERNEL( \
act_type##_grad, CUDNN, plat::CUDAPlace, \
ops::CudnnActivationGradKernel<ops::grad_functor<float>>, \
ops::CudnnActivationGradKernel<ops::grad_functor<double>>);
FOR_EACH_CUDNN_OP_FUNCTOR(REGISTER_ACTIVATION_CUDNN_KERNEL);
...@@ -16,29 +16,36 @@ limitations under the License. */ ...@@ -16,29 +16,36 @@ limitations under the License. */
#include <string> #include <string>
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using paddle::framework::Tensor; using paddle::framework::Tensor;
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ #define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \ class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \ : public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \ public: \
void Make() override { \ void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \ AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator"); \ AddOutput("Out", "Output of " #OP_NAME " operator"); \
AddAttr<bool>("use_mkldnn", \ AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \ "(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \ .SetDefault(false); \
AddAttr<bool>( \ AddAttr<bool>("use_cudnn", \
"is_test", \ "(bool, default false) Only used in cudnn kernel, need " \
"(bool, default false) Set to true for inference only, false " \ "install cudnn") \
"for training. Some layers may run faster when this is true.") \ .SetDefault(false); \
.SetDefault(false); \ AddAttr<bool>( \
AddComment(OP_COMMENT); \ "is_test", \
} \ "(bool, default false) Set to true for inference only, false " \
"for training. Some layers may run faster when this is true.") \
.SetDefault(false); \
AddComment(OP_COMMENT); \
} \
} }
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
...@@ -67,6 +74,12 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, ...@@ -67,6 +74,12 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const std::string& name) { const std::string& name) {
framework::LibraryType library{framework::LibraryType::kPlain}; framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout = framework::DataLayout::kAnyLayout; framework::DataLayout layout = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_CUDA
auto it1 = oper.Attrs().find("use_cudnn");
if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) {
library = framework::LibraryType::kCUDNN;
}
#endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
auto it = oper.Attrs().find("use_mkldnn"); auto it = oper.Attrs().find("use_mkldnn");
if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() && if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() &&
......
...@@ -11,6 +11,7 @@ limitations under the License. */ ...@@ -11,6 +11,7 @@ limitations under the License. */
#pragma once #pragma once
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm>
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
...@@ -24,6 +25,7 @@ limitations under the License. */ ...@@ -24,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -41,53 +43,115 @@ static std::unordered_set<std::string> InplaceOpSet = { ...@@ -41,53 +43,115 @@ static std::unordered_set<std::string> InplaceOpSet = {
"floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid", "floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid",
}; };
static bool IsInplace(const std::string& op) {
bool inplace = InplaceOpSet.count(op);
// for op_grad
const int kGradSuffixLen = 4;
if (op.size() > kGradSuffixLen &&
op.compare(op.size() - kGradSuffixLen - 1, kGradSuffixLen, "grad")) {
inplace =
InplaceOpSet.count(op.substr(0, op.size() - (kGradSuffixLen + 1)));
}
return inplace;
}
/* The following operator can be used to process SelectedRows, because the /* The following operator can be used to process SelectedRows, because the
* output of those operator for zero is zero too. * output of those operator for zero is zero too.
*/ */
static std::unordered_set<std::string> CanBeUsedBySelectedRows = { static std::unordered_set<std::string> CanBeUsedBySelectedRows = {
"abs", "abs_grad", "square", "square_grad", "sqrt", "sqrt_grad"}; "abs", "abs_grad", "square", "square_grad", "sqrt", "sqrt_grad"};
static bool IsInplace(std::string op) { return InplaceOpSet.count(op); } inline void ExtractActivationTensor(const framework::ExecutionContext& context,
const framework::Tensor** X,
template <typename DeviceContext, typename Functor> framework::Tensor** Out) {
class ActivationKernel auto x_var = context.InputVar("X");
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> { auto out_var = context.OutputVar("Out");
public: PADDLE_ENFORCE(x_var != nullptr,
using T = typename Functor::ELEMENT_TYPE; "Cannot get input Variable X, variable name = %s",
context.op().Input("X"));
void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE(out_var != nullptr,
"Cannot get output Variable Out, variable name = %s",
context.op().Output("Out"));
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
*X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
*Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
out_var);
} else {
*X = context.Input<framework::Tensor>("X");
*Out = context.Output<framework::Tensor>("Out");
}
PADDLE_ENFORCE(*Out != nullptr,
"Cannot get output tensor Out, variable name = %s",
context.op().Output("Out"));
}
inline void ExtractActivationGradTensor(
const framework::ExecutionContext& context, const framework::Tensor** X,
const framework::Tensor** Out, const framework::Tensor** dOut,
framework::Tensor** dX) {
auto out_var = context.InputVar("Out");
auto out_grad_var = context.InputVar(framework::GradVarName("Out"));
auto x_grad_var = context.OutputVar(framework::GradVarName("X"));
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
context.op().Input("Out"));
PADDLE_ENFORCE(out_grad_var != nullptr,
"Cannot get input Variable %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
PADDLE_ENFORCE(x_grad_var != nullptr,
"Cannot get output Variable %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
*Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
*dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
*out_grad_var);
*dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
x_grad_var);
} else {
*Out = context.Input<framework::Tensor>("Out");
*dOut = context.Input<framework::Tensor>(framework::GradVarName("Out"));
*dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
}
PADDLE_ENFORCE(*dX != nullptr,
"Cannot get output tensor %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
bool inplace = IsInplace(context.op().Type());
if (!inplace) {
auto x_var = context.InputVar("X"); auto x_var = context.InputVar("X");
auto out_var = context.OutputVar("Out");
PADDLE_ENFORCE(x_var != nullptr, PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input Variable X, variable name = %s", "Cannot get input tensor X, variable name = %s",
context.op().Input("X")); context.op().Input("X"));
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get output Variable Out, variable name = %s",
context.op().Output("Out"));
framework::Tensor X, *Out;
if (CanBeUsedBySelectedRows.count(context.op().Type())) { if (CanBeUsedBySelectedRows.count(context.op().Type())) {
X = detail::Ref( *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var),
"Cannot get input Tensor X, variable name = %s",
context.op().Input("X"));
Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
out_var);
} else { } else {
X = detail::Ref(context.Input<framework::Tensor>("X"), *X = context.Input<framework::Tensor>("X");
"Cannot get input Tensor X, variable name = %s",
context.op().Input("X"));
Out = context.Output<framework::Tensor>("Out");
} }
} else {
VLOG(10) << " Inplace activation of Op : " << context.op().Type();
*X = *dX;
}
}
PADDLE_ENFORCE(Out != nullptr, template <typename DeviceContext, typename Functor>
"Cannot get output tensor Out, variable name = %s", class ActivationKernel
context.op().Output("Out")); : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* X = nullptr;
framework::Tensor* Out = nullptr;
ExtractActivationTensor(context, &X, &Out);
Out->mutable_data<T>(context.GetPlace()); Out->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(X);
auto out = framework::EigenVector<T>::Flatten(*Out); auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
auto* place = auto* place =
context.template device_context<DeviceContext>().eigen_device(); context.template device_context<DeviceContext>().eigen_device();
Functor functor; Functor functor;
...@@ -106,55 +170,15 @@ class ActivationGradKernel ...@@ -106,55 +170,15 @@ class ActivationGradKernel
public: public:
using T = typename Functor::ELEMENT_TYPE; using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto out_var = context.InputVar("Out"); const framework::Tensor *X, *Out, *dOut;
auto out_grad_var = context.InputVar(framework::GradVarName("Out")); framework::Tensor* dX = nullptr;
auto x_grad_var = context.OutputVar(framework::GradVarName("X")); X = Out = dOut = nullptr;
PADDLE_ENFORCE(out_var != nullptr, ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
"Cannot get input Variable Out, variable name = %s",
context.op().Input("Out"));
PADDLE_ENFORCE(out_grad_var != nullptr,
"Cannot get input Variable %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
PADDLE_ENFORCE(x_grad_var != nullptr,
"Cannot get output Variable %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
framework::Tensor Out, dOut, *dX;
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
Out = detail::Ref(
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var),
"Cannot get input Tensor Out, variable name = %s",
context.op().Input("Out"));
dOut =
detail::Ref(paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
*out_grad_var),
"Cannot get input Tensor %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
x_grad_var);
} else {
Out = detail::Ref(context.Input<framework::Tensor>("Out"),
"Cannot get input Tensor Out, variable name = %s",
context.op().Input("Out"));
dOut = detail::Ref(
context.Input<framework::Tensor>(framework::GradVarName("Out")),
"Cannot get input Tensor %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
}
PADDLE_ENFORCE(dX != nullptr,
"Cannot get output tensor %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(detail::Ref(dOut));
auto dout = framework::EigenVector<T>::Flatten(dOut); auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
auto out = framework::EigenVector<T>::Flatten(Out); auto dx = framework::EigenVector<T>::Flatten(detail::Ref(dX));
auto dx = framework::EigenVector<T>::Flatten(*dX); auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
auto* place = auto* place =
context.template device_context<DeviceContext>().eigen_device(); context.template device_context<DeviceContext>().eigen_device();
Functor functor; Functor functor;
...@@ -162,27 +186,7 @@ class ActivationGradKernel ...@@ -162,27 +186,7 @@ class ActivationGradKernel
for (auto& attr : attrs) { for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first); *attr.second = context.Attr<float>(attr.first);
} }
bool inplace = functor.Inplace(); functor(*place, x, out, dout, dx);
if (!inplace) {
auto x_var = context.InputVar("X");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input tensor X, variable name = %s",
context.op().Input("X"));
framework::Tensor X;
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
X = detail::Ref(
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var));
} else {
X = detail::Ref(context.Input<framework::Tensor>("X"));
}
auto x = framework::EigenVector<T>::Flatten(X);
functor(*place, x, out, dout, dx);
} else {
VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
} }
}; };
...@@ -214,7 +218,6 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> { ...@@ -214,7 +218,6 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct SigmoidGradFunctor : public BaseActivationFunctor<T> { struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -269,7 +272,6 @@ struct ExpFunctor : public BaseActivationFunctor<T> { ...@@ -269,7 +272,6 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> { struct ExpGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("exp"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -288,7 +290,6 @@ struct ReluFunctor : public BaseActivationFunctor<T> { ...@@ -288,7 +290,6 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ReluGradFunctor : public BaseActivationFunctor<T> { struct ReluGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("relu"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -301,8 +302,28 @@ template <typename T> ...@@ -301,8 +302,28 @@ template <typename T>
struct GeluFunctor : public BaseActivationFunctor<T> { struct GeluFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out> template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const { void operator()(Device d, X x, Out out) const {
// Because the execute or device context can not be deliver here, it keep the
// marco for NVCC.
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
!defined(__OSX__) && !defined(PADDLE_WITH_CUDA)
auto x_data = x.data();
auto out_data = out.data();
int n = std::min(x.size(), out.size());
std::memset(out_data, 0, n * sizeof(T));
math::CBlas<T>::AXPY(n, static_cast<T>(M_SQRT1_2), x_data, 1, out_data, 1);
math::CBlas<T>::VMERF(n, out_data, out_data, VML_LA);
for (int i = 0; i < n; i++) {
out_data[i] += static_cast<T>(1);
}
math::CBlas<T>::VMUL(n, x_data, out_data, out_data);
for (int i = 0; i < n; i++) {
out_data[i] *= static_cast<T>(0.5);
}
#else
auto temp = (x * static_cast<T>(M_SQRT1_2)).erf(); auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp); out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
#endif
} }
}; };
...@@ -331,7 +352,6 @@ struct TanhFunctor : public BaseActivationFunctor<T> { ...@@ -331,7 +352,6 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct TanhGradFunctor : public BaseActivationFunctor<T> { struct TanhGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("tanh"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -437,7 +457,6 @@ struct SqrtFunctor : public BaseActivationFunctor<T> { ...@@ -437,7 +457,6 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> { struct SqrtGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sqrt"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -456,7 +475,6 @@ struct CeilFunctor : public BaseActivationFunctor<T> { ...@@ -456,7 +475,6 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ZeroGradFunctor : public BaseActivationFunctor<T> { struct ZeroGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("ceil"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -573,7 +591,6 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> { ...@@ -573,7 +591,6 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename T> template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> { struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("reciprocal"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -673,7 +690,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> { ...@@ -673,7 +690,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}}; return {{"threshold", &threshold}};
} }
bool Inplace() const { return IsInplace("relu6"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -755,7 +771,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -755,7 +771,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}}; return {{"threshold", &threshold}};
} }
bool Inplace() const { return IsInplace("soft_relu"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
...@@ -936,7 +951,6 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -936,7 +951,6 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}}; return {{"slope", &slope}, {"offset", &offset}};
} }
bool Inplace() { return IsInplace("hard_sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut, template <typename Device, typename X, typename Out, typename dOut,
typename dX> typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const { void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
static framework::proto::VarType::Type kDefaultDtype =
framework::proto::VarType::Type::VarType_Type_BOOL;
template <typename DeviceContext, typename T>
class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto &in_var_names = context.Inputs("Input");
auto &out_var_names = context.Outputs("Output");
auto &in_vars = context.MultiInputVar("Input");
auto out_vars = context.MultiOutputVar("Output");
PADDLE_ENFORCE_GT(in_var_names.size(), static_cast<size_t>(0));
PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size());
for (size_t i = 0; i < in_var_names.size(); ++i) {
// Only support LoDTensor
PADDLE_ENFORCE_NOT_NULL(in_vars[i], "%s should not be nullptr,",
in_var_names[i]);
PADDLE_ENFORCE_NOT_NULL(out_vars[i], "%s should not be nullptr,",
out_var_names[i]);
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensor>());
PADDLE_ENFORCE(out_vars[i]->IsType<framework::LoDTensor>());
}
auto in_tensors = context.MultiInput<framework::LoDTensor>("Input");
if (context.Attr<bool>("check_name")) {
for (size_t i = 0; i < in_var_names.size(); ++i) {
PADDLE_ENFORCE_EQ(in_var_names[i], out_var_names[i]);
}
} else {
// Init the output as input
for (size_t i = 0; i < in_tensors.size(); ++i) {
out_vars[i]->GetMutable<framework::LoDTensor>()->Resize(
in_tensors[i]->dims());
}
}
auto &dev_ctx = context.template device_context<DeviceContext>();
// Get numel and dtype
size_t numel = 0;
auto dtype = kDefaultDtype;
GetMemSizeAndDtype(in_tensors, in_var_names, &numel, &dtype);
// Alloc the continuous space
auto fused_tensor = context.Output<framework::LoDTensor>("FusedOutput");
fused_tensor->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
.mutable_data(context.GetPlace(), dtype);
// Init the continuous space
auto out_tensors = context.MultiOutput<framework::LoDTensor>("Output");
int64_t offset = 0;
if (context.Attr<bool>("copy_data")) {
for (size_t i = 0; i < in_var_names.size(); ++i) {
int64_t len = out_tensors[i]->numel();
auto sub_tensor = fused_tensor->Slice(offset, offset + len);
offset += len;
framework::TensorCopy(*out_tensors[i], context.GetPlace(), dev_ctx,
&sub_tensor);
}
} else if (context.Attr<bool>("set_constant")) {
math::SetConstant<DeviceContext, T> set_constant;
set_constant(dev_ctx, fused_tensor,
static_cast<T>(context.Attr<float>("constant")));
}
// Make the outputs point to the continuous space.
offset = 0;
for (size_t i = 0; i < out_tensors.size(); ++i) {
int64_t len = out_tensors[i]->numel();
auto dim = out_tensors[i]->dims();
out_tensors[i]
->ShareDataWith(fused_tensor->Slice(offset, offset + len))
.Resize(dim);
offset += len;
VLOG(10) << "alloc_space_for_vars: output(" << out_var_names[i]
<< ") ,dim:(" << dim << ")"
<< " Address: " << out_tensors[i]->data<void>();
}
}
void GetMemSizeAndDtype(
const std::vector<const framework::LoDTensor *> &lod_tensors,
const std::vector<std::string> var_names, size_t *numel,
framework::proto::VarType::Type *dtype) const {
PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size());
*numel = 0;
for (size_t i = 0; i < var_names.size(); ++i) {
PADDLE_ENFORCE(lod_tensors[i]->IsInitialized(), "%s is not initialized.",
var_names[i]);
auto p_dtype = lod_tensors[i]->type();
if (*dtype == kDefaultDtype) {
PADDLE_ENFORCE_NE(p_dtype, kDefaultDtype, "%s's type should not be %s.",
var_names[i], kDefaultDtype);
*dtype = p_dtype;
}
PADDLE_ENFORCE_EQ(p_dtype, *dtype, "Input vars is not equal.");
auto size = lod_tensors[i]->numel();
PADDLE_ENFORCE_GT(size, 0);
VLOG(10) << "alloc_space_for_vars: input(" << var_names[i] << ") ,dim:("
<< lod_tensors[i]->dims() << ")";
*numel += size;
}
}
};
class AllocContinuousSpaceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {}
};
class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Input",
"(vector<LoDTensor>) The input tensors of"
" alloc_continuous_space operator.")
.AsDuplicable();
AddOutput("Output",
"(vector<LoDTensor>) The output "
"tensors of alloc_continuous_space operator. And the address "
"of output tensors are continuous, they are sliced from the "
"tensor of FusedOutput.")
.AsDuplicable();
AddOutput("FusedOutput",
"(LoDTensor) The output tensor "
"of alloc_continuous_space operator. And the tensors of"
" Output is sliced from the tensor of FusedOutput.");
AddAttr<bool>("copy_data", "Whether to copy the Input value to Output.")
.SetDefault(false);
AddAttr<bool>("set_constant",
"Whether to set the Output with a constant value.")
.SetDefault(false);
AddAttr<float>("constant",
"If set_constant is true, the constant value will be used "
"to set the Output.")
.SetDefault(0.0);
AddAttr<bool>("check_name",
"Whether to check the name of Input and Output to ensure "
"they are the same separately.")
.SetDefault(false);
AddComment(R"DOC(
AllocContinuousSpace Operator.
alloc_continuous_space is used to make the address of Output
continuous according to the Input. This Op will alloc a big tensor
according to the tensors of Input, the dtype is the same with those input tensors,
the size is the sum of those input tensors' numel, and the dim of the big
tensor is {sum(numel)}. And the big tensor is stored in FusedOutput.
The tensors of Output are sliced from the tensor of FusedOutput.
Note that, the dtype of Input should be the same, and the dim of Input
and Output should equal.
The tensors of Input and Output could be the same or different. And
alloc_continuous_space allows copying the value of Input to Output, or
setting the Output with a constant value.
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(alloc_continuous_space,
paddle::operators::AllocContinuousSpaceOp,
paddle::operators::AllocContinuousSpaceOpMaker);
namespace ops = paddle::operators;
REGISTER_OP_CPU_KERNEL(
alloc_continuous_space,
ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext, int>,
ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext, float>,
ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext,
double>);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL(
alloc_continuous_space,
ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext, int>,
ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext, float>,
ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext,
double>);
#endif
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/benchmark/op_tester.h" #include "paddle/fluid/operators/benchmark/op_tester.h"
#include <fstream>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
...@@ -28,6 +29,7 @@ namespace operators { ...@@ -28,6 +29,7 @@ namespace operators {
namespace benchmark { namespace benchmark {
DEFINE_string(op_config_list, "", "Path of op config file."); DEFINE_string(op_config_list, "", "Path of op config file.");
DEFINE_int32(specified_config_id, -1, "Test the specified op config.");
void OpTester::Init(const std::string &filename) { void OpTester::Init(const std::string &filename) {
Init(OpTesterConfig(filename)); Init(OpTesterConfig(filename));
...@@ -147,7 +149,7 @@ void OpTester::CreateInputVarDesc() { ...@@ -147,7 +149,7 @@ void OpTester::CreateInputVarDesc() {
var->SetShape(input->dims); var->SetShape(input->dims);
op_desc_.SetInput(name, {var_name}); op_desc_.SetInput(name, {var_name});
inputs_.push_back(var_name); input_lods_[var_name] = input->lod;
} }
} }
...@@ -162,7 +164,6 @@ void OpTester::CreateOutputVarDesc() { ...@@ -162,7 +164,6 @@ void OpTester::CreateOutputVarDesc() {
var->SetDataType(framework::proto::VarType::FP32); var->SetDataType(framework::proto::VarType::FP32);
op_desc_.SetOutput(name, {var_name}); op_desc_.SetOutput(name, {var_name});
outputs_.push_back(var_name);
} }
} }
...@@ -218,16 +219,26 @@ void OpTester::CreateVariables(framework::Scope *scope) { ...@@ -218,16 +219,26 @@ void OpTester::CreateVariables(framework::Scope *scope) {
} }
} }
// Allocate memory for input tensor for (auto &item : input_lods_) {
for (auto &name : inputs_) { // Allocate memory for input tensor
VLOG(3) << "Allocate memory for tensor " << name; auto &var_name = item.first;
auto &var_desc = vars_[name]; VLOG(3) << "Allocate memory for tensor " << var_name;
auto &var_desc = vars_[var_name];
std::vector<int64_t> shape = var_desc->GetShape(); std::vector<int64_t> shape = var_desc->GetShape();
auto *var = scope->Var(name); auto *var = scope->Var(var_name);
auto *tensor = var->GetMutable<framework::LoDTensor>(); auto *tensor = var->GetMutable<framework::LoDTensor>();
SetupTensor<float>(tensor, shape, static_cast<float>(0.0), SetupTensor<float>(tensor, shape, static_cast<float>(0.0),
static_cast<float>(1.0)); static_cast<float>(1.0));
VLOG(3) << "Set lod for tensor " << var_name;
std::vector<std::vector<size_t>> &lod_vec = item.second;
framework::LoD lod;
for (size_t i = 0; i < lod_vec.size(); ++i) {
lod.push_back(lod_vec[i]);
}
tensor->set_lod(lod);
} }
} }
...@@ -282,10 +293,32 @@ std::string OpTester::DebugString() { ...@@ -282,10 +293,32 @@ std::string OpTester::DebugString() {
} }
TEST(op_tester, base) { TEST(op_tester, base) {
OpTester tester;
if (!FLAGS_op_config_list.empty()) { if (!FLAGS_op_config_list.empty()) {
tester.Init(FLAGS_op_config_list); std::ifstream fin(FLAGS_op_config_list, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s",
FLAGS_op_config_list.c_str());
std::vector<OpTesterConfig> op_configs;
while (!fin.eof()) {
OpTesterConfig config;
bool result = config.Init(fin);
if (result) {
op_configs.push_back(config);
}
}
if (FLAGS_specified_config_id >= 0 &&
FLAGS_specified_config_id < static_cast<int>(op_configs.size())) {
OpTester tester;
tester.Init(op_configs[FLAGS_specified_config_id]);
tester.Run();
} else {
for (size_t i = 0; i < op_configs.size(); ++i) {
OpTester tester;
tester.Init(op_configs[i]);
tester.Run();
}
}
} else { } else {
OpTester tester;
OpTesterConfig config; OpTesterConfig config;
config.op_type = "elementwise_add"; config.op_type = "elementwise_add";
config.inputs.resize(2); config.inputs.resize(2);
...@@ -294,8 +327,8 @@ TEST(op_tester, base) { ...@@ -294,8 +327,8 @@ TEST(op_tester, base) {
config.inputs[1].name = "Y"; config.inputs[1].name = "Y";
config.inputs[1].dims = {64, 1}; config.inputs[1].dims = {64, 1};
tester.Init(config); tester.Init(config);
tester.Run();
} }
tester.Run();
} }
} // namespace benchmark } // namespace benchmark
......
...@@ -57,8 +57,7 @@ class OpTester { ...@@ -57,8 +57,7 @@ class OpTester {
std::string type_; std::string type_;
framework::OpDesc op_desc_; framework::OpDesc op_desc_;
std::unordered_map<std::string, std::unique_ptr<framework::VarDesc>> vars_; std::unordered_map<std::string, std::unique_ptr<framework::VarDesc>> vars_;
std::vector<std::string> inputs_; std::unordered_map<std::string, std::vector<std::vector<size_t>>> input_lods_;
std::vector<std::string> outputs_;
std::unique_ptr<framework::OperatorBase> op_; std::unique_ptr<framework::OperatorBase> op_;
platform::Place place_; platform::Place place_;
std::unique_ptr<framework::Scope> scope_; std::unique_ptr<framework::Scope> scope_;
......
...@@ -33,21 +33,64 @@ static bool EndWith(const std::string& str, const std::string& substr) { ...@@ -33,21 +33,64 @@ static bool EndWith(const std::string& str, const std::string& substr) {
return str.rfind(substr) == (str.length() - substr.length()); return str.rfind(substr) == (str.length() - substr.length());
} }
static void EraseEndSep(std::string* str) { static void EraseEndSep(std::string* str,
std::string substr = kSepBetweenItems; std::string substr = kSepBetweenItems) {
if (EndWith(*str, substr)) { if (EndWith(*str, substr)) {
str->erase(str->length() - substr.length(), str->length()); str->erase(str->length() - substr.length(), str->length());
} }
} }
static std::vector<int64_t> ParseDims(std::string dims_str) { void OpInputConfig::ParseDims(std::istream& is) {
std::vector<int64_t> dims; std::string dims_str;
is >> dims_str;
dims.clear();
std::string token; std::string token;
std::istringstream token_stream(dims_str); std::istringstream token_stream(dims_str);
while (std::getline(token_stream, token, 'x')) { while (std::getline(token_stream, token, 'x')) {
dims.push_back(std::stoi(token)); dims.push_back(std::stoi(token));
} }
return dims; }
void OpInputConfig::ParseLoD(std::istream& is) {
std::string lod_str;
std::string start_sep =
std::string(kStartSeparator) + std::string(kStartSeparator);
std::string end_sep = std::string(kEndSeparator) + std::string(kEndSeparator);
std::string sep;
is >> sep;
if (StartWith(sep, start_sep)) {
lod_str += sep;
while (!EndWith(sep, end_sep)) {
is >> sep;
lod_str += sep;
}
}
EraseEndSep(&lod_str);
PADDLE_ENFORCE_GE(lod_str.length(), 4U);
VLOG(4) << "lod: " << lod_str << ", length: " << lod_str.length();
// Parse the lod_str
lod.clear();
for (size_t i = 1; i < lod_str.length() - 1;) {
if (lod_str[i] == '{') {
std::vector<size_t> level;
while (lod_str[i] != '}') {
++i;
std::string number;
while (lod_str[i] >= '0' && lod_str[i] <= '9') {
number += lod_str[i];
++i;
}
level.push_back(atoi(number.c_str()));
}
lod.push_back(level);
} else if (lod_str[i] == '}') {
++i;
}
}
} }
OpInputConfig::OpInputConfig(std::istream& is) { OpInputConfig::OpInputConfig(std::istream& is) {
...@@ -60,9 +103,9 @@ OpInputConfig::OpInputConfig(std::istream& is) { ...@@ -60,9 +103,9 @@ OpInputConfig::OpInputConfig(std::istream& is) {
is >> name; is >> name;
EraseEndSep(&name); EraseEndSep(&name);
} else if (sep == "dims" || sep == "dims:") { } else if (sep == "dims" || sep == "dims:") {
std::string dims_str; ParseDims(is);
is >> dims_str; } else if (sep == "lod" || sep == "lod:") {
dims = ParseDims(dims_str); ParseLoD(is);
} }
} }
} }
...@@ -76,7 +119,7 @@ OpTesterConfig::OpTesterConfig(const std::string& filename) { ...@@ -76,7 +119,7 @@ OpTesterConfig::OpTesterConfig(const std::string& filename) {
Init(fin); Init(fin);
} }
void OpTesterConfig::Init(std::istream& is) { bool OpTesterConfig::Init(std::istream& is) {
std::string sep; std::string sep;
is >> sep; is >> sep;
if (sep == kStartSeparator) { if (sep == kStartSeparator) {
...@@ -95,9 +138,40 @@ void OpTesterConfig::Init(std::istream& is) { ...@@ -95,9 +138,40 @@ void OpTesterConfig::Init(std::istream& is) {
} else if (sep == "input" || sep == "input:") { } else if (sep == "input" || sep == "input:") {
OpInputConfig input_config(is); OpInputConfig input_config(is);
inputs.push_back(input_config); inputs.push_back(input_config);
} else if (sep == "attrs" || sep == "attrs:") {
ParseAttrs(is);
} else {
if (sep != kEndSeparator) {
return false;
}
} }
} }
} else {
return false;
}
return true;
}
bool OpTesterConfig::ParseAttrs(std::istream& is) {
std::string sep;
is >> sep;
if (sep == kStartSeparator) {
while (true) {
std::string key;
is >> key;
if (key == kEndSeparator) {
break;
}
std::string value;
is >> value;
EraseEndSep(&key, ":");
EraseEndSep(&value);
attrs[key] = value;
}
} }
return true;
} }
const OpInputConfig* OpTesterConfig::GetInput(const std::string& name) { const OpInputConfig* OpTesterConfig::GetInput(const std::string& name) {
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <istream> #include <istream>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
namespace paddle { namespace paddle {
...@@ -26,19 +27,27 @@ struct OpInputConfig { ...@@ -26,19 +27,27 @@ struct OpInputConfig {
OpInputConfig() {} OpInputConfig() {}
explicit OpInputConfig(std::istream& is); explicit OpInputConfig(std::istream& is);
void ParseDims(std::istream& is);
void ParseLoD(std::istream& is);
std::string name; std::string name;
std::vector<int64_t> dims; std::vector<int64_t> dims;
std::vector<std::vector<size_t>> lod;
}; };
struct OpTesterConfig { struct OpTesterConfig {
OpTesterConfig() {} OpTesterConfig() {}
explicit OpTesterConfig(const std::string& filename); explicit OpTesterConfig(const std::string& filename);
void Init(std::istream& is);
bool Init(std::istream& is);
bool ParseAttrs(std::istream& is);
const OpInputConfig* GetInput(const std::string& name); const OpInputConfig* GetInput(const std::string& name);
std::string op_type; std::string op_type;
std::vector<OpInputConfig> inputs; std::vector<OpInputConfig> inputs;
std::unordered_map<std::string, std::string> attrs;
int device_id{-1}; // CPU: -1 int device_id{-1}; // CPU: -1
int repeat{1}; int repeat{1};
int profile{0}; int profile{0};
......
...@@ -81,6 +81,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( ...@@ -81,6 +81,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
framework::OpKernelType::kDefaultCustomizedTypeValue; framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library{framework::LibraryType::kPlain}; framework::LibraryType library{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
auto input_data_type = ctx.Input<Tensor>("Input")->type();
std::string data_format = ctx.Attr<std::string>("data_format"); std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout = framework::StringToDataLayout(data_format); framework::DataLayout layout = framework::StringToDataLayout(data_format);
...@@ -94,11 +95,14 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( ...@@ -94,11 +95,14 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
platform::CanMKLDNNBeUsed(ctx)) { platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN; library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN; layout = framework::DataLayout::kMKLDNN;
customized_type_value = kConvMKLDNNFP32; customized_type_value =
(input_data_type == framework::DataTypeTrait<int8_t>::DataType ||
input_data_type == framework::DataTypeTrait<uint8_t>::DataType)
? kConvMKLDNNINT8
: kConvMKLDNNFP32;
} }
#endif #endif
auto input_data_type = ctx.Input<Tensor>("Input")->type();
if (input_data_type != framework::proto::VarType::INT8 && if (input_data_type != framework::proto::VarType::INT8 &&
input_data_type != framework::proto::VarType::UINT8) { input_data_type != framework::proto::VarType::UINT8) {
auto filter_data_type = ctx.Input<Tensor>("Filter")->type(); auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
......
...@@ -32,14 +32,23 @@ class CrossEntropyOp : public framework::OperatorWithKernel { ...@@ -32,14 +32,23 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
int rank = x_dims.size(); int rank = x_dims.size();
PADDLE_ENFORCE_EQ(rank, label_dims.size(), PADDLE_ENFORCE_EQ(rank, label_dims.size(),
"Input(X) and Input(Label) shall have the same rank."); "Input(X) and Input(Label) shall have the same rank.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), bool check = true;
framework::slice_ddim(label_dims, 0, rank - 1), if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
"Input(X) and Input(Label) shall have the same shape " framework::product(label_dims) <= 0)) {
"except the last dimension."); check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1),
"Input(X) and Input(Label) shall have the same shape "
"except the last dimension.");
}
if (ctx->Attrs().Get<bool>("soft_label")) { if (ctx->Attrs().Get<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], if (check) {
"If Attr(soft_label) == true, the last dimension of " PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"Input(X) and Input(Label) should be equal."); "If Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal.");
}
} else { } else {
PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
"If Attr(softLabel) == false, the last dimension of " "If Attr(softLabel) == false, the last dimension of "
...@@ -82,20 +91,32 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { ...@@ -82,20 +91,32 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
"Input(Y@Grad) and Input(X) should have the same rank."); "Input(Y@Grad) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(label_dims.size(), rank, PADDLE_ENFORCE_EQ(label_dims.size(), rank,
"Input(Label) and Input(X) should have the same rank."); "Input(Label) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1), bool check = true;
"The Input(X) and Input(Label) should have the same " if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
"shape except the last dimension."); framework::product(label_dims) <= 0)) {
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), check = false;
framework::slice_ddim(dy_dims, 0, rank - 1), }
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension."); if (check) {
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1),
"The Input(X) and Input(Label) should have the same "
"shape except the last dimension.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(dy_dims, 0, rank - 1),
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension.");
}
PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
"The last dimension of Input(Y@Grad) should be 1."); "The last dimension of Input(Y@Grad) should be 1.");
if (ctx->Attrs().Get<bool>("soft_label")) { if (ctx->Attrs().Get<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], if (check) {
"When Attr(soft_label) == true, the last dimension of " PADDLE_ENFORCE_EQ(
"Input(X) and Input(Label) should be equal."); x_dims[rank - 1], label_dims[rank - 1],
"When Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal.");
}
} else { } else {
PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
"When Attr(soft_label) == false, the last dimension of " "When Attr(soft_label) == false, the last dimension of "
......
...@@ -140,9 +140,6 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -140,9 +140,6 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker {
"Scales of the history data batch, " "Scales of the history data batch, "
"will apply to output when training") "will apply to output when training")
.AsIntermediate(); .AsIntermediate();
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Data Normalization. Data Normalization.
......
...@@ -172,6 +172,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -172,6 +172,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
framework::make_ddim({1, static_cast<int>(variances.size())}), framework::make_ddim({1, static_cast<int>(variances.size())}),
ctx.GetPlace()); ctx.GetPlace());
auto var_et = framework::EigenTensor<T, 2>::From(var_t); auto var_et = framework::EigenTensor<T, 2>::From(var_t);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
for (size_t i = 0; i < variances.size(); ++i) { for (size_t i = 0; i < variances.size(); ++i) {
var_et(0, i) = variances[i]; var_et(0, i) = variances[i];
} }
...@@ -181,8 +185,15 @@ class PriorBoxOpKernel : public framework::OpKernel<T> { ...@@ -181,8 +185,15 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
vars->Resize({box_num, static_cast<int>(variances.size())}); vars->Resize({box_num, static_cast<int>(variances.size())});
auto e_vars = framework::EigenMatrix<T, Eigen::RowMajor>::From(*vars); auto e_vars = framework::EigenMatrix<T, Eigen::RowMajor>::From(*vars);
e_vars = var_et.broadcast(Eigen::DSizes<int, 2>(box_num, 1));
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
for (int i = 0; i < box_num; ++i) {
for (int j = 0; j < variances.size(); ++j) {
e_vars(i, j) = variances[j];
}
}
vars->Resize(var_dim); vars->Resize(var_dim);
} }
}; // namespace operators }; // namespace operators
......
...@@ -77,8 +77,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> { ...@@ -77,8 +77,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
} else { } else {
functor.RunMidWise(n, pre, post); functor.RunMidWise(n, pre, post);
} }
z->set_layout(DataLayout::kMKLDNN); z->set_mkldnn_prim_desc(x->get_mkldnn_prim_desc());
z->set_format(x->format());
} else { } else {
PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN && PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
x->format() != memory::format::format_undef, x->format() != memory::format::format_undef,
...@@ -116,7 +115,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> { ...@@ -116,7 +115,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd); auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd);
// create mkldnn memory for dst // create mkldnn memory for dst
memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data); auto dst_mem_pd = sum_pd.dst_primitive_desc();
memory dst_memory = memory(dst_mem_pd, z_data);
std::vector<primitive::at> inputs; std::vector<primitive::at> inputs;
inputs.push_back(srcs[0]); inputs.push_back(srcs[0]);
...@@ -129,9 +129,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> { ...@@ -129,9 +129,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
pipeline.push_back(sum_prim); pipeline.push_back(sum_prim);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
z->set_layout(DataLayout::kMKLDNN); z->set_mkldnn_prim_desc(dst_mem_pd);
z->set_format(
(memory::format)dst_memory.get_primitive_desc().desc().data.format);
} }
} }
}; };
...@@ -152,24 +150,19 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> { ...@@ -152,24 +150,19 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
auto* out = dout; auto* out = dout;
auto *x = dout, *y = dout; auto *x = dout, *y = dout;
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format());
};
if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) { if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) {
if (dx->dims() == dy->dims()) { if (dx->dims() == dy->dims()) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx); auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) { if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(), blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace())); dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout); dx->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc());
} }
if (dy) { if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(), blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace())); dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout); dy->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc());
} }
} }
} else { } else {
......
...@@ -31,7 +31,7 @@ template <typename T> ...@@ -31,7 +31,7 @@ template <typename T>
struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> { struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& ctx, const T* in, void operator()(const platform::CPUDeviceContext& ctx, const T* in,
const int num, T* out) { const int num, T* out) {
*out = *(std::max_element(in + 0, in + num, Compare<T>())); *out = std::abs(*(std::max_element(in + 0, in + num, Compare<T>())));
} }
}; };
...@@ -46,10 +46,8 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { ...@@ -46,10 +46,8 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
platform::Transform<platform::CPUDeviceContext> trans; platform::Transform<platform::CPUDeviceContext> trans;
trans(ctx, in.data<T>(), in.data<T>() + in.numel(), trans(ctx, in.data<T>(), in.data<T>() + in.numel(),
out->mutable_data<T>(ctx.GetPlace()), ClipFunctor<T>(-s, s)); out->mutable_data<T>(ctx.GetPlace()), ClipFunctor<T>(-s, s));
auto in_e = framework::EigenVector<T>::Flatten(in);
auto out_e = framework::EigenVector<T>::Flatten(*out); auto out_e = framework::EigenVector<T>::Flatten(*out);
out_e.device(*ctx.eigen_device()) = (bin_cnt / s * out_e).round();
out_e.device(*ctx.eigen_device()) = (bin_cnt / s * in_e).round();
} }
}; };
......
...@@ -23,6 +23,9 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { ...@@ -23,6 +23,9 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->IsRuntime()) {
return;
}
PADDLE_ENFORCE(ctx->HasInput("W"), PADDLE_ENFORCE(ctx->HasInput("W"),
"Input W of FusedEmbeddingSeqPoolOp should not be null."); "Input W of FusedEmbeddingSeqPoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Ids"), PADDLE_ENFORCE(ctx->HasInput("Ids"),
...@@ -42,36 +45,15 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { ...@@ -42,36 +45,15 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
// we only support sum now // we only support sum now
PADDLE_ENFORCE_EQ(combiner, "sum"); PADDLE_ENFORCE_EQ(combiner, "sum");
int64_t last_dim = table_dims[1]; int64_t last_dim = FusedEmbeddingSeqPoolLastDim(table_dims, ids_dims);
for (int i = 1; i != ids_dims.size(); ++i) { // in compile time, the lod level of ids must be 1
last_dim *= ids_dims[i]; framework::VarDesc* ids_desc =
} boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
if (ctx->IsRuntime()) {
framework::Variable* ids_var =
boost::get<framework::Variable*>(ctx->GetInputVarPtrs("Ids")[0]);
const auto& ids_lod = ids_var->Get<LoDTensor>().lod();
// in run time, the LoD of ids must be 1 // in compile time, the shape from Ids -> output
PADDLE_ENFORCE(ids_lod.size(), 1u, // should be [-1, 1] -> [-1, embedding_size]
"The LoD level of Input(Ids) must be 1"); ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
int64_t batch_size = ids_lod[0].size() - 1;
// in run time, the shape from Ids -> output
// should be [seq_length, 1] -> [batch_size, embedding_size]
ctx->SetOutputDim("Out", framework::make_ddim({batch_size, last_dim}));
} else {
// in compile time, the lod level of ids must be 1
framework::VarDesc* ids_desc =
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
// in compile time, the shape from Ids -> output
// should be [-1, 1] -> [-1, embedding_size]
ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
}
} }
protected: protected:
......
...@@ -61,6 +61,15 @@ struct EmbeddingVSumFunctor { ...@@ -61,6 +61,15 @@ struct EmbeddingVSumFunctor {
} }
}; };
inline int FusedEmbeddingSeqPoolLastDim(const framework::DDim &table_dims,
const framework::DDim &ids_dims) {
int64_t last_dim = table_dims[1];
for (int i = 1; i != ids_dims.size(); ++i) {
last_dim *= ids_dims[i];
}
return last_dim;
}
template <typename T> template <typename T>
class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> { class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
public: public:
...@@ -70,6 +79,17 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> { ...@@ -70,6 +79,17 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
const LoDTensor *table_var = context.Input<LoDTensor>("W"); const LoDTensor *table_var = context.Input<LoDTensor>("W");
const std::string &combiner_type = context.Attr<std::string>("combiner"); const std::string &combiner_type = context.Attr<std::string>("combiner");
int64_t last_dim =
FusedEmbeddingSeqPoolLastDim(table_var->dims(), ids_t->dims());
const auto &ids_lod = ids_t->lod();
// in run time, the LoD of ids must be 1
PADDLE_ENFORCE(ids_lod.size(), 1u, "The LoD level of Input(Ids) must be 1");
PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
int64_t batch_size = ids_lod[0].size() - 1;
// in run time, the shape from Ids -> output
// should be [seq_length, 1] -> [batch_size, embedding_size]
output_t->Resize({batch_size, last_dim});
if (combiner_type == "sum") { if (combiner_type == "sum") {
EmbeddingVSumFunctor<T> functor; EmbeddingVSumFunctor<T> functor;
functor(context, table_var, ids_t, output_t); functor(context, table_var, ids_t, output_t);
......
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/hash_op.h" #include "paddle/fluid/operators/hash_op.h"
#include <string> #include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -27,6 +26,9 @@ class HashOp : public framework::OperatorWithKernel { ...@@ -27,6 +26,9 @@ class HashOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
if (ctx->IsRuntime()) {
return;
}
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of HashOp should not be null."); "Input(X) of HashOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -36,15 +38,8 @@ class HashOp : public framework::OperatorWithKernel { ...@@ -36,15 +38,8 @@ class HashOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(dims.size(), 2UL, PADDLE_ENFORCE_EQ(dims.size(), 2UL,
"The input of hash_op's dimensions must be 2"); "The input of hash_op's dimensions must be 2");
std::vector<int64_t> out_dims; std::vector<int64_t> out_dims;
out_dims.reserve(dims.size() + 1);
// copy all dims except the last one
for (int i = 0u; i != dims.size() - 1; ++i) {
out_dims.emplace_back(dims[i]);
}
int num_hash = ctx->Attrs().Get<int>("num_hash"); int num_hash = ctx->Attrs().Get<int>("num_hash");
out_dims.emplace_back(num_hash); HashOutputSize(dims, out_dims, num_hash);
// keep the last dim to 1
out_dims.emplace_back(1);
ctx->SetOutputDim("Out", framework::make_ddim(out_dims)); ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
...@@ -71,4 +66,4 @@ $$Out = scale * X$$ ...@@ -71,4 +66,4 @@ $$Out = scale * X$$
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker); REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker);
REGISTER_OP_CPU_KERNEL(hash, ops::HashKerel<int>, ops::HashKerel<int64_t>); REGISTER_OP_CPU_KERNEL(hash, ops::HashKernel<int>, ops::HashKernel<int64_t>);
...@@ -17,21 +17,34 @@ limitations under the License. */ ...@@ -17,21 +17,34 @@ limitations under the License. */
extern "C" { extern "C" {
#include <xxhash.h> #include <xxhash.h>
} }
#include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
// template <typename DeviceContext, typename T>
inline void HashOutputSize(const framework::DDim& in_dims,
std::vector<int64_t>& out_dims, // NOLINT
int num_hash) {
out_dims.reserve(in_dims.size() + 1);
// copy all dims except the last one
for (int i = 0u; i != in_dims.size() - 1; ++i) {
out_dims.emplace_back(in_dims[i]);
}
out_dims.emplace_back(num_hash);
// keep the last dim to 1
out_dims.emplace_back(1);
}
template <typename T> template <typename T>
class HashKerel : public framework::OpKernel<T> { class HashKernel : public framework::OpKernel<T> {
public: public:
virtual void Compute(const framework::ExecutionContext& context) const { virtual void Compute(const framework::ExecutionContext& context) const {
auto* out_t = context.Output<framework::LoDTensor>("Out"); auto* out_t = context.Output<framework::LoDTensor>("Out");
auto* in_t = context.Input<framework::LoDTensor>("X"); auto* in_t = context.Input<framework::LoDTensor>("X");
int mod_by = context.Attr<int>("mod_by"); int mod_by = context.Attr<int>("mod_by");
int num_hash = context.Attr<int>("num_hash"); int num_hash = context.Attr<int>("num_hash");
auto* output = out_t->mutable_data<T>(context.GetPlace());
auto in_dims = in_t->dims(); auto in_dims = in_t->dims();
auto in_lod = in_t->lod(); auto in_lod = in_t->lod();
...@@ -39,6 +52,11 @@ class HashKerel : public framework::OpKernel<T> { ...@@ -39,6 +52,11 @@ class HashKerel : public framework::OpKernel<T> {
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(), static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
"The actual input data's size mismatched with LoD information."); "The actual input data's size mismatched with LoD information.");
std::vector<int64_t> out_dims;
HashOutputSize(in_dims, out_dims, num_hash);
out_t->Resize(framework::make_ddim(out_dims));
auto* output = out_t->mutable_data<T>(context.GetPlace());
auto seq_length = in_dims[0]; auto seq_length = in_dims[0];
auto last_dim = in_dims[in_dims.size() - 1]; auto last_dim = in_dims[in_dims.size() - 1];
auto* input = in_t->data<T>(); auto* input = in_t->data<T>();
...@@ -49,6 +67,7 @@ class HashKerel : public framework::OpKernel<T> { ...@@ -49,6 +67,7 @@ class HashKerel : public framework::OpKernel<T> {
} }
input += last_dim; input += last_dim;
} }
out_t->set_lod(in_t->lod());
} }
}; };
......
...@@ -84,13 +84,13 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -84,13 +84,13 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault("bilinear"); .SetDefault("bilinear");
AddAttr<bool>( AddAttr<bool>(
"align_corners", "align_corners",
"an optinal bool. Defaults to True. " "an optional bool. Defaults to True. "
"If True, the centers of 4 corner pixels of the input and output " "If True, the centers of 4 corner pixels of the input and output "
"tensors are aligned, preserving the values at the corner pixels, " "tensors are aligned, preserving the values at the corner pixels, "
"if Flase, are not aligned") "If False, are not aligned")
.SetDefault(true); .SetDefault(true);
AddAttr<int>("align_mode", AddAttr<int>("align_mode",
"(int, default \'1\'), optional for bilinear interpolation" "(int, default \'1\'), optional for bilinear interpolation, "
"can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , " "can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , "
"can be \'1\' for src_idx = scale*dst_index .") "can be \'1\' for src_idx = scale*dst_index .")
.SetDefault(1); .SetDefault(1);
......
...@@ -34,9 +34,8 @@ class IsEmptyOp : public framework::OperatorWithKernel { ...@@ -34,9 +34,8 @@ class IsEmptyOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
framework::OpKernelType kt = framework::OpKernelType( auto *x = ctx.Input<framework::LoDTensor>("X");
ctx.Input<framework::LoDTensor>("X")->type(), platform::CPUPlace()); return framework::OpKernelType(x->type(), x->place());
return kt;
} }
}; };
...@@ -58,7 +57,6 @@ It will just return product(tensor.ddims()) > 0; ...@@ -58,7 +57,6 @@ It will just return product(tensor.ddims()) > 0;
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(is_empty, ops::IsEmptyOp, ops::IsEmptyOpMaker, REGISTER_OPERATOR(is_empty, ops::IsEmptyOp, ops::IsEmptyOpMaker,
paddle::framework::EmptyGradOpMaker); paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/is_empty_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
is_empty, ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, int>,
ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -28,6 +28,9 @@ class IsEmptyOpKernel : public framework::OpKernel<T> { ...@@ -28,6 +28,9 @@ class IsEmptyOpKernel : public framework::OpKernel<T> {
// get output // get output
auto* output_tensor = context.Output<framework::LoDTensor>("Out"); auto* output_tensor = context.Output<framework::LoDTensor>("Out");
// Note: is_empty is always executed on CPU and the output data should
// always be allocated for CPUPlace. We reigister CUDA kernel for this op to
// avoid the unnecessary data transform.
output_tensor->mutable_data<bool>(platform::CPUPlace())[0] = output_tensor->mutable_data<bool>(platform::CPUPlace())[0] =
framework::product(input_tensor->dims()) == 0; framework::product(input_tensor->dims()) == 0;
} }
......
...@@ -332,6 +332,45 @@ void BenchEmbSeqPoolKernel() { ...@@ -332,6 +332,45 @@ void BenchEmbSeqPoolKernel() {
} }
} }
template <jit::KernelType KT, typename T, typename PlaceType>
void BenchSgdKernel() {
const T lr = 0.1;
auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
const int64_t upper) -> std::vector<int64_t> {
PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1);
PADDLE_ENFORCE_GT(n, 0);
std::vector<int64_t> all, out;
for (int i = 0; i < n; ++i) {
all.push_back(i);
}
std::random_shuffle(all.begin(), all.end());
out.insert(out.begin(), all.begin(), all.begin() + n);
return out;
};
for (int param_h : {1, 1000}) {
for (int grad_w : {1, 2, 8, 16, 30, 256}) {
// only benchmark inplace
Tensor param;
param.Resize({param_h, grad_w});
T* param_data = param.mutable_data<T>(PlaceType());
RandomVec<T>(param_h * grad_w, param_data, -2.f, 2.f);
for (int rows_size = 1; rows_size <= std::min(param_h, 10); ++rows_size) {
Tensor grad;
grad.Resize({rows_size, grad_w});
std::vector<int64_t> rows =
UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
RandomVec<T>(rows_size * grad_w, grad.mutable_data<T>(PlaceType()),
-2.f, 2.f);
const T* grad_data = grad.data<T>();
const int64_t* rows_data = rows.data();
jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
BenchAllImpls<KT, jit::SgdTuples<T>, PlaceType>(
attr, &lr, param_data, grad_data, rows_data, param_data, &attr);
}
}
}
}
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void BenchMatMulKernel() { void BenchMatMulKernel() {
for (int m : {1, 2, 3, 4}) { for (int m : {1, 2, 3, 4}) {
...@@ -477,6 +516,9 @@ BENCH_FP32_CPU(kEmbSeqPool) { ...@@ -477,6 +516,9 @@ BENCH_FP32_CPU(kEmbSeqPool) {
BenchEmbSeqPoolKernel<jit::kEmbSeqPool, T, CPUPlace>(); BenchEmbSeqPoolKernel<jit::kEmbSeqPool, T, CPUPlace>();
} }
// sgd function
BENCH_FP32_CPU(kSgd) { BenchSgdKernel<jit::kSgd, T, CPUPlace>(); }
// matmul // matmul
BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); } BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
......
...@@ -32,3 +32,4 @@ USE_JITKERNEL_GEN(kSeqPool) ...@@ -32,3 +32,4 @@ USE_JITKERNEL_GEN(kSeqPool)
USE_JITKERNEL_GEN(kHMax) USE_JITKERNEL_GEN(kHMax)
USE_JITKERNEL_GEN(kHSum) USE_JITKERNEL_GEN(kHSum)
USE_JITKERNEL_GEN(kEmbSeqPool) USE_JITKERNEL_GEN(kEmbSeqPool)
USE_JITKERNEL_GEN(kSgd)
...@@ -31,7 +31,8 @@ namespace gen { ...@@ -31,7 +31,8 @@ namespace gen {
// Application Binary Interface // Application Binary Interface
constexpr Xbyak::Operand::Code abi_param1(Xbyak::Operand::RDI), constexpr Xbyak::Operand::Code abi_param1(Xbyak::Operand::RDI),
abi_param2(Xbyak::Operand::RSI), abi_param3(Xbyak::Operand::RDX), abi_param2(Xbyak::Operand::RSI), abi_param3(Xbyak::Operand::RDX),
abi_param4(Xbyak::Operand::RCX); abi_param4(Xbyak::Operand::RCX), abi_param5(Xbyak::Operand::R8),
abi_param6(Xbyak::Operand::R9);
constexpr Xbyak::Operand::Code g_abi_regs[] = { constexpr Xbyak::Operand::Code g_abi_regs[] = {
Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12,
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/sgd.h"
#include <stddef.h> // offsetof
#include <vector>
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace paddle {
namespace operators {
namespace jit {
namespace gen {
void SgdJitCode::genCode() {
preCode();
constexpr int block = YMM_FLOAT_BLOCK;
constexpr int max_num_regs = 7;
const int num_block = w_ / block;
const int num_groups = num_block / max_num_regs;
const size_t block_size = sizeof(float) * block;
const size_t width_size = w_ * sizeof(float);
std::vector<int> groups(num_groups, max_num_regs);
int rest_num_regs = num_block % max_num_regs;
if (rest_num_regs > 0) {
groups.push_back(rest_num_regs);
}
vbroadcastss(ymm_lr, ptr[param_lr]);
// protect rdx
mov(reg_ptr_grad_i, param_grad);
mov(reg_ptr_rows_i, param_rows);
mov(reg_rows_size_in_byte,
qword[param_attr + offsetof(sgd_attr_t, selected_rows_size)]);
mov(rax, sizeof(int64_t));
mul(reg_rows_size_in_byte);
mov(reg_rows_size_in_byte, rax);
add(reg_rows_size_in_byte, reg_ptr_rows_i);
Label l_next_row;
L(l_next_row);
{
mov(reg_row, qword[reg_ptr_rows_i]);
mov(rax, width_size);
mul(reg_row);
mov(reg_row, rax);
mov(reg_ptr_param_i, param_param);
mov(reg_ptr_out_i, param_out);
add(reg_ptr_param_i, reg_row);
add(reg_ptr_out_i, reg_row);
size_t w_offset = 0;
for (int num_regs : groups) {
// load grad
size_t inner_offfset = w_offset;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ymm_t(reg_i), ptr[reg_ptr_grad_i + inner_offfset]);
inner_offfset += block_size;
}
// load param
inner_offfset = w_offset;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_param_i + inner_offfset]);
inner_offfset += block_size;
}
// compute out
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmulps(ymm_t(reg_i), ymm_t(reg_i), ymm_lr);
vsubps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs), ymm_t(reg_i));
}
// save out
inner_offfset = w_offset;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ptr[reg_ptr_out_i + inner_offfset], ymm_t(reg_i + num_regs));
inner_offfset += block_size;
}
w_offset += (block_size * num_regs);
}
add(reg_ptr_grad_i, width_size);
add(reg_ptr_rows_i, sizeof(int64_t));
cmp(reg_ptr_rows_i, reg_rows_size_in_byte);
jl(l_next_row, T_NEAR);
}
postCode();
}
class SgdCreator : public JitCodeCreator<sgd_attr_t> {
public:
bool UseMe(const sgd_attr_t& attr) const override {
return platform::MayIUse(platform::avx) &&
attr.grad_width % YMM_FLOAT_BLOCK == 0;
}
size_t CodeSize(const sgd_attr_t& attr) const override {
return 96 + (attr.grad_width / YMM_FLOAT_BLOCK) * 32 * 8;
}
std::unique_ptr<GenBase> CreateJitCode(
const sgd_attr_t& attr) const override {
PADDLE_ENFORCE_EQ(attr.param_width, attr.grad_width);
PADDLE_ENFORCE_LE(attr.selected_rows_size, attr.grad_height);
PADDLE_ENFORCE_GE(attr.selected_rows_size, 0);
return make_unique<SgdJitCode>(attr, CodeSize(attr));
}
};
} // namespace gen
} // namespace jit
} // namespace operators
} // namespace paddle
namespace gen = paddle::operators::jit::gen;
REGISTER_JITKERNEL_GEN(kSgd, gen::SgdCreator);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
namespace jit {
namespace gen {
class SgdJitCode : public JitCode {
public:
explicit SgdJitCode(const sgd_attr_t& attr, size_t code_size = 256 * 1024,
void* code_ptr = nullptr)
: JitCode(code_size, code_ptr), w_(attr.grad_width) {
this->genCode();
}
DECLARE_JIT_CODE(SgdJitCode);
void genCode() override;
private:
int w_;
reg64_t param_lr{abi_param1};
reg64_t param_param{abi_param2};
reg64_t param_grad{abi_param3};
reg64_t param_rows{abi_param4};
reg64_t param_out{abi_param5};
reg64_t param_attr{abi_param6};
ymm_t ymm_lr = ymm_t(15);
reg64_t reg_ptr_grad_i{r10};
reg64_t reg_ptr_rows_i{r11};
reg64_t reg_rows_size_in_byte{r12};
reg64_t reg_row{r13};
reg64_t reg_ptr_param_i{r14};
reg64_t reg_ptr_out_i{r15};
};
} // namespace gen
} // namespace jit
} // namespace operators
} // namespace paddle
...@@ -55,6 +55,7 @@ const char* to_string(KernelType kt) { ...@@ -55,6 +55,7 @@ const char* to_string(KernelType kt) {
ONE_CASE(kHSum); ONE_CASE(kHSum);
ONE_CASE(kSoftmax); ONE_CASE(kSoftmax);
ONE_CASE(kEmbSeqPool); ONE_CASE(kEmbSeqPool);
ONE_CASE(kSgd);
default: default:
PADDLE_THROW("Not support type: %d, or forget to add it.", kt); PADDLE_THROW("Not support type: %d, or forget to add it.", kt);
return "NOT JITKernel"; return "NOT JITKernel";
......
...@@ -181,6 +181,14 @@ inline std::ostream& operator<<(std::ostream& os, ...@@ -181,6 +181,14 @@ inline std::ostream& operator<<(std::ostream& os,
return os; return os;
} }
inline std::ostream& operator<<(std::ostream& os, const sgd_attr_t& attr) {
os << "param_height[" << attr.param_height << "],param_width["
<< attr.param_width << "],grad_height[" << attr.grad_height
<< "],grad_width[" << attr.grad_width << "],selected_rows_size["
<< attr.selected_rows_size << "]";
return os;
}
inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) { inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) {
os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]"; os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]";
return os; return os;
......
...@@ -46,6 +46,7 @@ typedef enum { ...@@ -46,6 +46,7 @@ typedef enum {
kVMul, kVMul,
kVRelu, kVRelu,
kVScal, kVScal,
kSgd,
kVSigmoid, kVSigmoid,
kVSquare, kVSquare,
kVSub, kVSub,
...@@ -173,6 +174,28 @@ struct EmbSeqPoolTuples { ...@@ -173,6 +174,28 @@ struct EmbSeqPoolTuples {
const emb_seq_pool_attr_t*); const emb_seq_pool_attr_t*);
}; };
typedef struct sgd_attr_s {
int64_t param_height, param_width;
int64_t grad_height, grad_width;
int64_t selected_rows_size;
sgd_attr_s() = default;
explicit sgd_attr_s(int64_t param_h, int64_t param_w, int64_t grad_h,
int64_t grad_w, int64_t selected_rows_sz)
: param_height(param_h),
param_width(param_w),
grad_height(grad_h),
grad_width(grad_w),
selected_rows_size(selected_rows_sz) {}
} sgd_attr_t;
template <typename T>
struct SgdTuples {
typedef T data_type;
typedef sgd_attr_t attr_type;
typedef void (*func_type)(const T*, const T*, const T*, const int64_t*, T*,
const sgd_attr_t*);
};
typedef struct matmul_attr_s { typedef struct matmul_attr_s {
int m, n, k; int m, n, k;
void* packed_weight{nullptr}; void* packed_weight{nullptr};
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
* limitations under the License. */ * limitations under the License. */
#include "paddle/fluid/operators/jit/kernel_key.h" #include "paddle/fluid/operators/jit/kernel_key.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -23,14 +24,30 @@ size_t JitCodeKey<int>(const int& d) { ...@@ -23,14 +24,30 @@ size_t JitCodeKey<int>(const int& d) {
return d; return d;
} }
// TODO(TJ): refine and benchmark JitCodeKey generatation
constexpr int act_type_shift = 3; // suppot 2^3 act types constexpr int act_type_shift = 3; // suppot 2^3 act types
static inline int act_type_convert(KernelType type) {
if (type == kVIdentity) {
return 0;
} else if (type == kVExp) {
return 1;
} else if (type == kVRelu) {
return 2;
} else if (type == kVSigmoid) {
return 3;
} else if (type == kVTanh) {
return 4;
}
PADDLE_THROW("Unsupported act type %d", type);
return 0;
}
template <> template <>
size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) { size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
size_t key = attr.d; size_t key = attr.d;
int gate_key = static_cast<int>(attr.act_gate) << 1; int gate_key = act_type_convert(attr.act_gate) << 1;
int cand_key = static_cast<int>(attr.act_cand) << (1 + act_type_shift); int cand_key = act_type_convert(attr.act_cand) << (1 + act_type_shift);
int cell_key = static_cast<int>(attr.act_cell) << (1 + act_type_shift * 2); int cell_key = act_type_convert(attr.act_cell) << (1 + act_type_shift * 2);
return (key << (1 + act_type_shift * 3)) + gate_key + cand_key + cell_key + return (key << (1 + act_type_shift * 3)) + gate_key + cand_key + cell_key +
attr.use_peephole; attr.use_peephole;
} }
...@@ -38,8 +55,8 @@ size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) { ...@@ -38,8 +55,8 @@ size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
template <> template <>
size_t JitCodeKey<gru_attr_t>(const gru_attr_t& attr) { size_t JitCodeKey<gru_attr_t>(const gru_attr_t& attr) {
size_t key = attr.d; size_t key = attr.d;
return (key << (act_type_shift * 2)) + static_cast<int>(attr.act_gate) + return (key << (act_type_shift * 2)) + act_type_convert(attr.act_gate) +
(static_cast<int>(attr.act_cand) << act_type_shift); (act_type_convert(attr.act_cand) << act_type_shift);
} }
template <> template <>
...@@ -61,6 +78,11 @@ size_t JitCodeKey<emb_seq_pool_attr_t>(const emb_seq_pool_attr_t& attr) { ...@@ -61,6 +78,11 @@ size_t JitCodeKey<emb_seq_pool_attr_t>(const emb_seq_pool_attr_t& attr) {
return attr.table_width; return attr.table_width;
} }
template <>
size_t JitCodeKey<sgd_attr_t>(const sgd_attr_t& attr) {
return attr.grad_width;
}
} // namespace jit } // namespace jit
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -14,3 +14,4 @@ USE_JITKERNEL_MORE(kVTanh, mkl) ...@@ -14,3 +14,4 @@ USE_JITKERNEL_MORE(kVTanh, mkl)
USE_JITKERNEL_MORE(kSeqPool, mkl) USE_JITKERNEL_MORE(kSeqPool, mkl)
USE_JITKERNEL_MORE(kSoftmax, mkl) USE_JITKERNEL_MORE(kSoftmax, mkl)
USE_JITKERNEL_MORE(kEmbSeqPool, mkl) USE_JITKERNEL_MORE(kEmbSeqPool, mkl)
USE_JITKERNEL_MORE(kSgd, mkl)
...@@ -184,6 +184,16 @@ bool EmbSeqPoolKernel<double>::UseMe(const emb_seq_pool_attr_t& attr) const { ...@@ -184,6 +184,16 @@ bool EmbSeqPoolKernel<double>::UseMe(const emb_seq_pool_attr_t& attr) const {
return true; return true;
} }
template <>
bool SgdKernel<float>::UseMe(const sgd_attr_t& attr) const {
return true;
}
template <>
bool SgdKernel<double>::UseMe(const sgd_attr_t& attr) const {
return true;
}
template <> template <>
bool MatMulKernel<float>::UseMe(const matmul_attr_t& attr) const { bool MatMulKernel<float>::UseMe(const matmul_attr_t& attr) const {
return platform::MayIUse(platform::avx); return platform::MayIUse(platform::avx);
...@@ -239,5 +249,6 @@ REGISTER_MKL_KERNEL(kVTanh, VTanh); ...@@ -239,5 +249,6 @@ REGISTER_MKL_KERNEL(kVTanh, VTanh);
REGISTER_MKL_KERNEL(kSeqPool, SeqPool); REGISTER_MKL_KERNEL(kSeqPool, SeqPool);
REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool); REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool);
REGISTER_MKL_KERNEL(kSoftmax, Softmax); REGISTER_MKL_KERNEL(kSoftmax, Softmax);
REGISTER_MKL_KERNEL(kSgd, Sgd);
#undef REGISTER_MKL_KERNEL #undef REGISTER_MKL_KERNEL
...@@ -142,6 +142,32 @@ void Softmax(const T* x, T* y, int n, int bs) { ...@@ -142,6 +142,32 @@ void Softmax(const T* x, T* y, int n, int bs) {
} }
} }
template <typename T>
void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows,
T* out, const sgd_attr_t* attr) {
PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width);
PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height);
T scalar = -lr[0];
int width = attr->grad_width;
if (out == param) {
for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
auto h_idx = rows[i];
PADDLE_ENFORCE_LT(h_idx, attr->param_height);
PADDLE_ENFORCE_GE(h_idx, 0);
VAXPY(scalar, grad + i * width, out + h_idx * width, width);
}
} else {
for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
auto h_idx = rows[i];
PADDLE_ENFORCE_LT(h_idx, attr->param_height);
PADDLE_ENFORCE_GE(h_idx, 0);
VScal(&scalar, grad + i * width, out + h_idx * width, width);
VAdd(param + h_idx * width, out + h_idx * width, out + h_idx * width,
width);
}
}
}
#define DECLARE_MKL_KERNEL(name, tuples) \ #define DECLARE_MKL_KERNEL(name, tuples) \
template <typename T> \ template <typename T> \
class name##Kernel : public KernelMore<tuples<T>> { \ class name##Kernel : public KernelMore<tuples<T>> { \
...@@ -173,6 +199,8 @@ DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples); ...@@ -173,6 +199,8 @@ DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples); DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples);
DECLARE_MKL_KERNEL(Sgd, SgdTuples);
#undef DECLARE_MKL_KERNEL #undef DECLARE_MKL_KERNEL
} // namespace mkl } // namespace mkl
......
...@@ -33,3 +33,4 @@ USE_JITKERNEL_REFER(kHSum) ...@@ -33,3 +33,4 @@ USE_JITKERNEL_REFER(kHSum)
USE_JITKERNEL_REFER(kHMax) USE_JITKERNEL_REFER(kHMax)
USE_JITKERNEL_REFER(kSoftmax) USE_JITKERNEL_REFER(kSoftmax)
USE_JITKERNEL_REFER(kEmbSeqPool) USE_JITKERNEL_REFER(kEmbSeqPool)
USE_JITKERNEL_REFER(kSgd)
...@@ -59,4 +59,6 @@ REGISTER_REFER_KERNEL(kSoftmax, Softmax); ...@@ -59,4 +59,6 @@ REGISTER_REFER_KERNEL(kSoftmax, Softmax);
REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool); REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool);
REGISTER_REFER_KERNEL(kSgd, Sgd);
#undef REGISTER_REFER_KERNEL #undef REGISTER_REFER_KERNEL
...@@ -446,6 +446,36 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out, ...@@ -446,6 +446,36 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out,
} }
} }
// SGD algorithm:
// lr is pointor of learning rate scalar
// param is an input matrix with (param_h, param_w)
// grad is an input matrix with (grad_h, grad_w), here grad_w == param_w
// selected_rows is a vectot<int64_t> with size selected_rows_size( <= grad_h )
// out is an output matrix with (param_h, param_w)
//
// support both regular and sparse grad
// regular SGD: out[:] = param[:] - lr[0] * grad[:];
// sparse SGD: out[rows[i]][:] = param[rows[i]][:] - lr[0] * grad[i][:]
//
// Note: when use sparse SGD, and if out != param,
// the out rows which are not selected have not beed changed, which maybe empty
template <typename T>
void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows,
T* out, const sgd_attr_t* attr) {
PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width);
PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height);
for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
auto h_idx = rows[i];
PADDLE_ENFORCE_LT(h_idx, attr->param_height);
PADDLE_ENFORCE_GE(h_idx, 0);
for (int64_t j = 0; j < attr->grad_width; ++j) {
out[h_idx * attr->grad_width + j] =
param[h_idx * attr->grad_width + j] -
lr[0] * grad[i * attr->grad_width + j];
}
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \ #define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \ template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \ class name##Kernel : public ReferKernel<tuples<T>> { \
...@@ -496,6 +526,8 @@ DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples); ...@@ -496,6 +526,8 @@ DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples);
DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples); DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_REFER_KERNEL(Sgd, SgdTuples);
#undef DECLARE_REFER_KERNEL #undef DECLARE_REFER_KERNEL
} // namespace refer } // namespace refer
......
...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <algorithm>
#include <random> #include <random>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -36,14 +37,14 @@ void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f), ...@@ -36,14 +37,14 @@ void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
} }
template <typename T> template <typename T>
void ExpectEQ(const T* target, const T* refer, int n) { void ExpectEQ(const T* target, const T* refer, size_t n) {
if (std::is_floating_point<T>::value) { if (std::is_floating_point<T>::value) {
for (int i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
EXPECT_NEAR(target[i], refer[i], FLAGS_acc); EXPECT_NEAR(target[i], refer[i], FLAGS_acc) << " at index : " << i;
} }
} else { } else {
for (int i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
EXPECT_EQ(target[i], refer[i]); EXPECT_EQ(target[i], refer[i]) << " at index : " << i;
} }
} }
} }
...@@ -296,6 +297,45 @@ struct TestFuncWithRefer<jit::EmbSeqPoolTuples<T>, std::vector<T>, ...@@ -296,6 +297,45 @@ struct TestFuncWithRefer<jit::EmbSeqPoolTuples<T>, std::vector<T>,
} }
}; };
template <typename T>
struct TestFuncWithRefer<jit::SgdTuples<T>, T, std::vector<T>, std::vector<T>,
std::vector<int64_t>, std::vector<T>,
typename jit::SgdTuples<T>::attr_type> {
void operator()(const typename jit::SgdTuples<T>::func_type tgt, const T lr,
const std::vector<T>& param, const std::vector<T>& grad,
const std::vector<int64_t>& rows, const std::vector<T>& oref,
const typename jit::SgdTuples<T>::attr_type& attr) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(param.size(),
static_cast<size_t>(attr.param_height * attr.param_width));
EXPECT_EQ(grad.size(),
static_cast<size_t>(attr.grad_height * attr.grad_width));
EXPECT_EQ(rows.size(), static_cast<size_t>(attr.selected_rows_size));
EXPECT_EQ(param.size(), oref.size());
const T* param_data = param.data();
const T* grad_data = grad.data();
const int64_t* rows_data = rows.data();
const T* oref_data = oref.data();
std::vector<T> out(oref.size());
T* o_data = out.data();
tgt(&lr, param_data, grad_data, rows_data, o_data, &attr);
// only the selected rows should be equal
for (size_t i = 0; i < rows.size(); ++i) {
ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
oref_data + rows[i] * attr.grad_width, attr.grad_width);
}
// inplace
std::copy(param.begin(), param.end(), out.begin());
tgt(&lr, o_data, grad_data, rows_data, o_data, &attr);
for (size_t i = 0; i < rows.size(); ++i) {
ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
oref_data + rows[i] * attr.grad_width, attr.grad_width);
}
}
};
template <typename T> template <typename T>
struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>, struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
std::vector<T>, std::vector<T>,
...@@ -407,7 +447,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) { ...@@ -407,7 +447,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestXYZNKernel() { void TestKernelXYZNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) { for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::XYZNTuples<T>>(); auto ref = jit::GetRefer<KT, jit::XYZNTuples<T>>();
...@@ -440,7 +480,7 @@ void TestXYZNKernel() { ...@@ -440,7 +480,7 @@ void TestXYZNKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestAXYNKernel() { void TestKernelAXYNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) { for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::AXYNTuples<T>>(); auto ref = jit::GetRefer<KT, jit::AXYNTuples<T>>();
...@@ -466,7 +506,7 @@ void TestAXYNKernel() { ...@@ -466,7 +506,7 @@ void TestAXYNKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestXRNKernel() { void TestKernelXRNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
auto last_acc = FLAGS_acc; auto last_acc = FLAGS_acc;
FLAGS_acc = 1e-4; FLAGS_acc = 1e-4;
...@@ -484,7 +524,7 @@ void TestXRNKernel() { ...@@ -484,7 +524,7 @@ void TestXRNKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestXYNKernel() { void TestKernelXYNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) { for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::XYNTuples<T>>(); auto ref = jit::GetRefer<KT, jit::XYNTuples<T>>();
...@@ -509,10 +549,12 @@ void TestXYNKernel() { ...@@ -509,10 +549,12 @@ void TestXYNKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestLSTMKernel() { void TestKernelLSTMTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"}; std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
for (int d : TestSizes()) { auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int d : test_sizes) {
for (bool use_peephole : {true, false}) { for (bool use_peephole : {true, false}) {
for (auto& act_gate : all_acts) { for (auto& act_gate : all_acts) {
for (auto& act_cand : all_acts) { for (auto& act_cand : all_acts) {
...@@ -559,10 +601,12 @@ void TestLSTMKernel() { ...@@ -559,10 +601,12 @@ void TestLSTMKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestGRUKernel() { void TestKernelGRUTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"}; std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
for (int d : TestSizes()) { auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int d : test_sizes) {
for (auto& act_gate : all_acts) { for (auto& act_gate : all_acts) {
for (auto& act_cand : all_acts) { for (auto& act_cand : all_acts) {
const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate), const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate),
...@@ -593,14 +637,16 @@ void TestGRUKernel() { ...@@ -593,14 +637,16 @@ void TestGRUKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestSeqPoolKernel() { void TestKernelSeqPoolTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
std::vector<jit::SeqPoolType> pool_types = { std::vector<jit::SeqPoolType> pool_types = {
jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt}; jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt};
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (auto type : pool_types) { for (auto type : pool_types) {
for (int w : TestSizes()) { for (int w : test_sizes) {
jit::seq_pool_attr_t attr(w, type); jit::seq_pool_attr_t attr(w, type);
for (int h : TestSizes()) { for (int h : test_sizes) {
attr.h = h; attr.h = h;
auto ref = jit::GetRefer<KT, jit::SeqPoolTuples<T>>(); auto ref = jit::GetRefer<KT, jit::SeqPoolTuples<T>>();
EXPECT_TRUE(ref != nullptr); EXPECT_TRUE(ref != nullptr);
...@@ -618,11 +664,11 @@ void TestSeqPoolKernel() { ...@@ -618,11 +664,11 @@ void TestSeqPoolKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestMatMulKernel() { void TestKernelMatMulTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
auto last_acc = FLAGS_acc; auto last_acc = FLAGS_acc;
// TODO(intel): fix MKL acc issue // export MKL_CBWR=AVX would make MKL force to use AVX
// https://github.com/PaddlePaddle/Paddle/issues/15447 // export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic
FLAGS_acc = 1e-3; FLAGS_acc = 1e-3;
for (int m : {1, 2, 3, 4}) { for (int m : {1, 2, 3, 4}) {
for (int n : {1, 2, 3, 4}) { for (int n : {1, 2, 3, 4}) {
...@@ -646,7 +692,7 @@ void TestMatMulKernel() { ...@@ -646,7 +692,7 @@ void TestMatMulKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestSoftmaxKernel() { void TestKernelSoftmaxTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int bs : {1, 2, 10}) { for (int bs : {1, 2, 10}) {
for (int n : TestSizes()) { for (int n : TestSizes()) {
...@@ -671,12 +717,14 @@ void TestSoftmaxKernel() { ...@@ -671,12 +717,14 @@ void TestSoftmaxKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestEmbSeqPoolKernel() { void TestKernelEmbSeqPoolTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
int64_t tbl_h = 1e4; int64_t tbl_h = 1e4;
std::vector<jit::SeqPoolType> pool_types = { std::vector<jit::SeqPoolType> pool_types = {
jit::SeqPoolType::kSum}; // only support sum yet jit::SeqPoolType::kSum}; // only support sum yet
for (int tbl_w : TestSizes()) { auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int tbl_w : test_sizes) {
std::vector<T> table(tbl_h * tbl_w); std::vector<T> table(tbl_h * tbl_w);
RandomVec<T>(tbl_h * tbl_w, table.data(), -2.f, 2.f); RandomVec<T>(tbl_h * tbl_w, table.data(), -2.f, 2.f);
const T* table_data = table.data(); const T* table_data = table.data();
...@@ -705,7 +753,61 @@ void TestEmbSeqPoolKernel() { ...@@ -705,7 +753,61 @@ void TestEmbSeqPoolKernel() {
} }
template <jit::KernelType KT, typename T, typename PlaceType> template <jit::KernelType KT, typename T, typename PlaceType>
void TestNCHW16CMulNCKernel() { void TestKernelSgdTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
const T lr = 0.1;
auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
const int64_t upper) -> std::vector<int64_t> {
PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1);
PADDLE_ENFORCE_GT(n, 0);
std::vector<int64_t> all, out;
for (int i = 0; i < n; ++i) {
all.push_back(i);
}
std::random_shuffle(all.begin(), all.end());
out.insert(out.begin(), all.begin(), all.begin() + n);
return out;
};
for (int param_h : {1, 10}) {
for (int grad_w : TestSizes()) {
std::vector<T> param(param_h * grad_w);
std::vector<T> param_out(param_h * grad_w);
RandomVec<T>(param_h * grad_w, param.data(), -2.f, 2.f);
const T* param_data = param.data();
T* out_data = param_out.data();
for (int rows_size = 1; rows_size <= param_h; ++rows_size) {
std::vector<T> grad(rows_size * grad_w);
std::vector<int64_t> rows =
UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
RandomVec<T>(rows_size * grad_w, grad.data(), -2.f, 2.f);
const int64_t* rows_data = rows.data();
const T* grad_data = grad.data();
auto ref = jit::GetRefer<KT, jit::SgdTuples<T>>();
EXPECT_TRUE(ref != nullptr);
jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
ref(&lr, param_data, grad_data, rows_data, out_data, &attr);
// inplace test
std::vector<T> inp(param.size());
std::copy(param.begin(), param.end(), inp.begin());
T* inp_data = inp.data();
ref(&lr, inp_data, grad_data, rows_data, inp_data, &attr);
// only the selected rows should be equal
for (int i = 0; i < rows_size; ++i) {
ExpectEQ<T>(inp_data + rows[i] * grad_w, out_data + rows[i] * grad_w,
grad_w);
}
TestAllImpls<KT, jit::SgdTuples<T>, PlaceType, T, std::vector<T>,
std::vector<T>, std::vector<int64_t>, std::vector<T>>(
attr, lr, param, grad, rows, param_out, attr);
}
}
}
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestKernelNCHW16CMulNCTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
const int n = 3, c = 16 * 4, h = 10, w = 10; const int n = 3, c = 16 * 4, h = 10, w = 10;
auto ref = jit::GetRefer<KT, jit::NCHW16CMulNCTuples<T>>(); auto ref = jit::GetRefer<KT, jit::NCHW16CMulNCTuples<T>>();
...@@ -758,7 +860,7 @@ void TestNCHW16CMulNCKernel() { ...@@ -758,7 +860,7 @@ void TestNCHW16CMulNCKernel() {
} }
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType> template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestLayerNormKernel() { void TestKernelLayerNormTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
const T epsilon = 9.99999975e-06; const T epsilon = 9.99999975e-06;
for (int n : {1, 2, 10}) { for (int n : {1, 2, 10}) {
...@@ -797,11 +899,13 @@ void TestLayerNormKernel() { ...@@ -797,11 +899,13 @@ void TestLayerNormKernel() {
} }
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType> template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestCRFDecodingKernel() { void TestKernelCRFDecodingTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT); VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
constexpr int state_trans_base_idx = 2; constexpr int state_trans_base_idx = 2;
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int seq_len : {1, 11, 17, 50}) { for (int seq_len : {1, 11, 17, 50}) {
for (int tag_num : TestSizes()) { for (int tag_num : test_sizes) {
auto ref = jit::GetRefer<KT, jit::CRFDecodingTuples<T>>(); auto ref = jit::GetRefer<KT, jit::CRFDecodingTuples<T>>();
EXPECT_TRUE(ref != nullptr); EXPECT_TRUE(ref != nullptr);
int x_sz = seq_len * tag_num; int x_sz = seq_len * tag_num;
...@@ -822,143 +926,76 @@ void TestCRFDecodingKernel() { ...@@ -822,143 +926,76 @@ void TestCRFDecodingKernel() {
} }
} }
// XYZNTuple #define TEST_CPU_KERNEL(test_tuple, kernel_type) \
TEST(JITKernel, kVMul) { TEST(JITKernel, kernel_type) { \
TestXYZNKernel<jit::kVMul, float, CPUPlace>(); TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
TestXYZNKernel<jit::kVMul, double, CPUPlace>(); TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
} }
TEST(JITKernel, kVAdd) {
TestXYZNKernel<jit::kVAdd, float, CPUPlace>();
TestXYZNKernel<jit::kVAdd, double, CPUPlace>();
}
TEST(JITKernel, kVAddRelu) {
TestXYZNKernel<jit::kVAddRelu, float, CPUPlace>();
TestXYZNKernel<jit::kVAddRelu, double, CPUPlace>();
}
TEST(JITKernel, kVSub) {
TestXYZNKernel<jit::kVSub, float, CPUPlace>();
TestXYZNKernel<jit::kVSub, double, CPUPlace>();
}
// AXYNTuples
TEST(JITKernel, kVScal) {
TestAXYNKernel<jit::kVScal, float, CPUPlace>();
TestAXYNKernel<jit::kVScal, double, CPUPlace>();
}
TEST(JITKernel, kVAddBias) {
TestAXYNKernel<jit::kVAddBias, float, CPUPlace>();
TestAXYNKernel<jit::kVAddBias, double, CPUPlace>();
}
// XRNTuples
TEST(JITKernel, kHMax) {
TestXRNKernel<jit::kHMax, float, CPUPlace>();
TestXRNKernel<jit::kHMax, double, CPUPlace>();
}
TEST(JITKernel, kHSum) {
TestXRNKernel<jit::kHSum, float, CPUPlace>();
TestXRNKernel<jit::kHSum, double, CPUPlace>();
}
// XYNTuples
TEST(JITKernel, kVRelu) {
TestXYNKernel<jit::kVRelu, float, CPUPlace>();
TestXYNKernel<jit::kVRelu, double, CPUPlace>();
}
TEST(JITKernel, kVIdentity) {
TestXYNKernel<jit::kVIdentity, float, CPUPlace>();
TestXYNKernel<jit::kVIdentity, double, CPUPlace>();
}
TEST(JITKernel, kVSquare) {
TestXYNKernel<jit::kVSquare, float, CPUPlace>();
TestXYNKernel<jit::kVSquare, double, CPUPlace>();
}
TEST(JITKernel, kVExp) {
TestXYNKernel<jit::kVExp, float, CPUPlace>();
TestXYNKernel<jit::kVExp, double, CPUPlace>();
}
TEST(JITKernel, kVSigmoid) {
TestXYNKernel<jit::kVSigmoid, float, CPUPlace>();
TestXYNKernel<jit::kVSigmoid, double, CPUPlace>();
}
TEST(JITKernel, kVTanh) { TEST_CPU_KERNEL(XYZNTuples, kVMul);
TestXYNKernel<jit::kVTanh, float, CPUPlace>(); TEST_CPU_KERNEL(XYZNTuples, kVAdd);
TestXYNKernel<jit::kVTanh, double, CPUPlace>(); TEST_CPU_KERNEL(XYZNTuples, kVAddRelu);
} TEST_CPU_KERNEL(XYZNTuples, kVSub);
// LSTM TEST_CPU_KERNEL(AXYNTuples, kVScal);
TEST(JITKernel, kLSTMCtHt) { TEST_CPU_KERNEL(AXYNTuples, kVAddBias);
TestLSTMKernel<jit::kLSTMCtHt, float, CPUPlace>();
TestLSTMKernel<jit::kLSTMCtHt, double, CPUPlace>();
}
TEST(JITKernel, kLSTMC1H1) { TEST_CPU_KERNEL(XRNTuples, kHMax);
TestLSTMKernel<jit::kLSTMC1H1, float, CPUPlace>(); TEST_CPU_KERNEL(XRNTuples, kHSum);
TestLSTMKernel<jit::kLSTMC1H1, double, CPUPlace>();
}
// GRU TEST_CPU_KERNEL(XYNTuples, kVRelu);
TEST(JITKernel, kGRUH1) { TEST_CPU_KERNEL(XYNTuples, kVIdentity);
TestGRUKernel<jit::kGRUH1, float, CPUPlace>(); TEST_CPU_KERNEL(XYNTuples, kVSquare);
TestGRUKernel<jit::kGRUH1, double, CPUPlace>(); TEST_CPU_KERNEL(XYNTuples, kVExp);
} TEST_CPU_KERNEL(XYNTuples, kVSigmoid);
TEST_CPU_KERNEL(XYNTuples, kVTanh);
TEST(JITKernel, kGRUHtPart1) { TEST_CPU_KERNEL(LSTMTuples, kLSTMCtHt);
TestGRUKernel<jit::kGRUHtPart1, float, CPUPlace>(); TEST_CPU_KERNEL(LSTMTuples, kLSTMC1H1);
TestGRUKernel<jit::kGRUHtPart1, double, CPUPlace>();
}
TEST(JITKernel, kGRUHtPart2) { TEST_CPU_KERNEL(GRUTuples, kGRUH1);
TestGRUKernel<jit::kGRUHtPart2, float, CPUPlace>(); TEST_CPU_KERNEL(GRUTuples, kGRUHtPart1);
TestGRUKernel<jit::kGRUHtPart2, double, CPUPlace>(); TEST_CPU_KERNEL(GRUTuples, kGRUHtPart2);
}
TEST(JITKernel, kSeqPool) { TEST_CPU_KERNEL(NCHW16CMulNCTuples, kNCHW16CMulNC);
TestSeqPoolKernel<jit::kSeqPool, float, CPUPlace>();
TestSeqPoolKernel<jit::kSeqPool, double, CPUPlace>();
}
TEST(JITKernel, kMatMul) { TEST_CPU_KERNEL(SeqPoolTuples, kSeqPool);
TestMatMulKernel<jit::kMatMul, float, CPUPlace>(); TEST_CPU_KERNEL(MatMulTuples, kMatMul);
TestMatMulKernel<jit::kMatMul, double, CPUPlace>(); TEST_CPU_KERNEL(SoftmaxTuples, kSoftmax);
} TEST_CPU_KERNEL(EmbSeqPoolTuples, kEmbSeqPool);
TEST_CPU_KERNEL(SgdTuples, kSgd);
TEST_CPU_KERNEL(LayerNormTuples, kLayerNorm);
TEST_CPU_KERNEL(CRFDecodingTuples, kCRFDecoding);
TEST(JITKernel, kSoftmax) { TEST(JITKernel_key, lstm) {
TestSoftmaxKernel<jit::kSoftmax, float, CPUPlace>(); jit::lstm_attr_t attr1(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
TestSoftmaxKernel<jit::kSoftmax, double, CPUPlace>(); jit::lstm_attr_t attr2(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
} jit::lstm_attr_t attr3(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
jit::lstm_attr_t attr4(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh);
TEST(JITKernel, kEmbSeqPool) { auto key1 = jit::JitCodeKey<jit::lstm_attr_t>(attr1);
TestEmbSeqPoolKernel<jit::kEmbSeqPool, float, CPUPlace>(); auto key2 = jit::JitCodeKey<jit::lstm_attr_t>(attr2);
TestEmbSeqPoolKernel<jit::kEmbSeqPool, double, CPUPlace>(); auto key3 = jit::JitCodeKey<jit::lstm_attr_t>(attr3);
} auto key4 = jit::JitCodeKey<jit::lstm_attr_t>(attr4);
TEST(JITKernel, kNCHW16CMulNC) { EXPECT_TRUE(key1 != key2);
TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, float, CPUPlace>(); EXPECT_TRUE(key2 == key3);
TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, double, CPUPlace>(); EXPECT_TRUE(key3 != key4);
} }
TEST(JITKernel, kLayerNorm) { TEST(JITKernel_key, gru) {
TestLayerNormKernel<jit::kLayerNorm, float, paddle::platform::CPUPlace>(); jit::gru_attr_t attr1(8, jit::kVSigmoid, jit::kVTanh);
TestLayerNormKernel<jit::kLayerNorm, double, paddle::platform::CPUPlace>(); jit::gru_attr_t attr2(9, jit::kVSigmoid, jit::kVTanh);
} jit::gru_attr_t attr3(9, jit::kVSigmoid, jit::kVTanh);
jit::gru_attr_t attr4(9, jit::kVSigmoid, jit::kVIdentity);
TEST(JITKernel, kCRFDecoding) { auto key1 = jit::JitCodeKey<jit::gru_attr_t>(attr1);
TestCRFDecodingKernel<jit::kCRFDecoding, float, paddle::platform::CPUPlace>(); auto key2 = jit::JitCodeKey<jit::gru_attr_t>(attr2);
TestCRFDecodingKernel<jit::kCRFDecoding, double, auto key3 = jit::JitCodeKey<jit::gru_attr_t>(attr3);
paddle::platform::CPUPlace>(); auto key4 = jit::JitCodeKey<jit::gru_attr_t>(attr4);
}
TEST(JITKernel, pool) { EXPECT_TRUE(key1 != key2);
// TODO(TJ): add some test EXPECT_TRUE(key2 == key3);
EXPECT_TRUE(key3 != key4);
} }
// TODO(TJ): add more test about key and pool
...@@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam( ...@@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam(
__syncthreads(); __syncthreads();
} }
if ((num_used_threads & 0x1) != 0) {
// If num_used_threads is a odd number, merge local top_beam of thread 0
// and num_used_threads - 1
if (tid_of_seq == 0) {
int index_in_sh = (num_used_threads - 1 + tid) * beam_size;
for (int i = 0; i < beam_size; i++) {
Insert(top_beam_local, top_beam[index_in_sh], beam_size);
index_in_sh++;
}
}
}
num_used_threads = num_used_threads >> 1; num_used_threads = num_used_threads >> 1;
if (tid_of_seq < num_used_threads) { if (tid_of_seq < num_used_threads) {
int index_in_sh = (num_used_threads + tid) * beam_size; int index_in_sh = (num_used_threads + tid) * beam_size;
......
...@@ -184,6 +184,9 @@ class Blas { ...@@ -184,6 +184,9 @@ class Blas {
template <typename T> template <typename T>
void VINV(int n, const T* a, T* y) const; void VINV(int n, const T* a, T* y) const;
template <typename T>
void VMERF(int n, const T* a, T* y, int64_t mode) const;
private: private:
const DeviceContext& context_; const DeviceContext& context_;
}; };
...@@ -290,6 +293,11 @@ class BlasT : private Blas<DeviceContext> { ...@@ -290,6 +293,11 @@ class BlasT : private Blas<DeviceContext> {
Base()->template VINV<T>(args...); Base()->template VINV<T>(args...);
} }
template <typename... ARGS>
void VMERF(ARGS... args) const {
Base()->template VMERF<T>(args...);
}
private: private:
const Blas<DeviceContext>* Base() const { const Blas<DeviceContext>* Base() const {
return static_cast<const Blas<DeviceContext>*>(this); return static_cast<const Blas<DeviceContext>*>(this);
......
...@@ -123,6 +123,11 @@ struct CBlas<float> { ...@@ -123,6 +123,11 @@ struct CBlas<float> {
static void VINV(ARGS... args) { static void VINV(ARGS... args) {
platform::dynload::vsInv(args...); platform::dynload::vsInv(args...);
} }
template <typename... ARGS>
static void VMERF(ARGS... args) {
platform::dynload::vmsErf(args...);
}
}; };
template <> template <>
...@@ -223,6 +228,11 @@ struct CBlas<double> { ...@@ -223,6 +228,11 @@ struct CBlas<double> {
static void VINV(ARGS... args) { static void VINV(ARGS... args) {
platform::dynload::vdInv(args...); platform::dynload::vdInv(args...);
} }
template <typename... ARGS>
static void VMERF(ARGS... args) {
platform::dynload::vmdErf(args...);
}
}; };
#else #else
...@@ -625,6 +635,19 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const { ...@@ -625,6 +635,19 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
#endif #endif
} }
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::VMERF(int n, const T *a, T *y,
int64_t mode) const {
#ifdef PADDLE_WITH_MKLML
CBlas<T>::VMERF(n, a, y, mode);
#else
for (int i = 0; i < n; ++i) {
y[i] = std::erf(a[i]);
}
#endif
}
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -96,8 +96,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ...@@ -96,8 +96,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
std::vector<int> src_tz = framework::vectorize2int(x->dims()); std::vector<int> src_tz = framework::vectorize2int(x->dims());
auto src_format = auto src_format = x->format();
src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format();
const std::string key = gethash(src_tz, algorithm); const std::string key = gethash(src_tz, algorithm);
const std::string key_src_data = const std::string key_src_data =
...@@ -127,10 +126,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ...@@ -127,10 +126,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
if (p_fwd == nullptr) { if (p_fwd == nullptr) {
// create mkldnn memory for input X // create mkldnn memory for input X
auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), src_format);
auto src_memory = std::shared_ptr<memory>( auto src_memory = std::shared_ptr<memory>(
new memory({src_md, mkldnn_engine}, to_void_cast(x_data))); new memory(x->get_mkldnn_prim_desc(), to_void_cast(x_data)));
// save src_memory to be referred in backward path // save src_memory to be referred in backward path
dev_ctx.SetBlob(key_src_mem, src_memory); dev_ctx.SetBlob(key_src_mem, src_memory);
...@@ -177,8 +174,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, ...@@ -177,8 +174,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
pipeline.push_back(*p_fwd); pipeline.push_back(*p_fwd);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
y->set_layout(DataLayout::kMKLDNN); y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc());
y->set_format(GetMKLDNNFormat(*dst_memory));
} }
template <typename T> template <typename T>
...@@ -196,9 +192,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx, ...@@ -196,9 +192,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims()); std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims());
auto diff_y_format =
diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format();
const std::string key = gethash(diff_dst_tz, algorithm); const std::string key = gethash(diff_dst_tz, algorithm);
const std::string key_src_data = const std::string key_src_data =
key + ctx.op().Input("Out") + "@eltwise_fwd_src_data"; key + ctx.op().Input("Out") + "@eltwise_fwd_src_data";
...@@ -210,8 +203,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, ...@@ -210,8 +203,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem"; key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem";
const std::string key_fwd_pd = const std::string key_fwd_pd =
key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd"; key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd";
const std::string key_with_layouts = const std::string key_with_layouts = key + std::to_string(*p_src_layout) +
key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format); "-" + std::to_string(diff_y->format());
const std::string key_diff_src_mem = const std::string key_diff_src_mem =
key_with_layouts + "@eltwise_diff_src_mem"; key_with_layouts + "@eltwise_diff_src_mem";
const std::string key_diff_dst_mem = const std::string key_diff_dst_mem =
...@@ -234,10 +227,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx, ...@@ -234,10 +227,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
if (p_grad == nullptr) { if (p_grad == nullptr) {
// create mkldnn memory for input diff_y // create mkldnn memory for input diff_y
auto diff_dst_md = platform::MKLDNNMemDesc(
diff_dst_tz, platform::MKLDNNGetDataType<T>(), diff_y_format);
auto diff_dst_memory = std::shared_ptr<memory>( auto diff_dst_memory = std::shared_ptr<memory>(
new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data))); new memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data)));
dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory); dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory);
// retrieve eltwise primitive desc from device context // retrieve eltwise primitive desc from device context
...@@ -281,8 +272,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx, ...@@ -281,8 +272,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
pipeline.push_back(*p_grad); pipeline.push_back(*p_grad);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
diff_x->set_layout(DataLayout::kMKLDNN); diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
diff_x->set_format(GetMKLDNNFormat(*diff_src_memory));
} }
template <typename T, mkldnn::algorithm algorithm> template <typename T, mkldnn::algorithm algorithm>
......
...@@ -206,17 +206,14 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -206,17 +206,14 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu;
// create mkldnn memory from input x tensor // create mkldnn memory from input x tensor
mkldnn::memory::format input_format =
platform::MKLDNNFormatForSize(src_tz.size(), x->format());
// keys for backward pass // keys for backward pass
const std::string key = BatchNormMKLDNNHandler::GetHash( const std::string key = BatchNormMKLDNNHandler::GetHash(
src_tz, epsilon, flags, global_stats, input_format, src_tz, epsilon, flags, global_stats, x->format(),
ctx.op().Output("SavedMean")); ctx.op().Output("SavedMean"));
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
auto user_src_md = platform::MKLDNNMemDesc( auto user_src_md = x->get_mkldnn_prim_desc().desc();
{src_tz}, platform::MKLDNNGetDataType<T>(), input_format);
// create primitive descriptor for batch norm forward // create primitive descriptor for batch norm forward
using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>; using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>;
...@@ -230,8 +227,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -230,8 +227,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine, BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine,
key); key);
auto src_memory = auto src_memory = handler.AcquireSrcMemory(x->get_mkldnn_prim_desc(),
handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data)); to_void_cast(x_data));
// crate mkldnn memory for weights(scale/shift) // crate mkldnn memory for weights(scale/shift)
auto scaleshift_memory = auto scaleshift_memory =
...@@ -265,8 +262,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -265,8 +262,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
variance_memory, false); variance_memory, false);
} }
y->set_layout(DataLayout::kMKLDNN); y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc());
y->set_format(platform::GetMKLDNNFormat(*dst_memory));
std::vector<mkldnn::primitive> pipeline; std::vector<mkldnn::primitive> pipeline;
pipeline.push_back(*batch_norm_p); pipeline.push_back(*batch_norm_p);
...@@ -336,9 +332,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -336,9 +332,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>; using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>;
mkldnn::memory::format dst_format =
platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format());
mkldnn::memory::format input_format = mkldnn::memory::format input_format =
platform::MKLDNNFormatForSize(src_tz.size(), x->format()); platform::MKLDNNFormatForSize(src_tz.size(), x->format());
...@@ -346,14 +339,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -346,14 +339,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// keys from forward pass // keys from forward pass
const std::string key = BatchNormMKLDNNHandler::GetHash( const std::string key = BatchNormMKLDNNHandler::GetHash(
src_tz, epsilon, flags, false, input_format, src_tz, epsilon, flags, false, x->format(),
ctx.op().Input("SavedMean")); ctx.op().Input("SavedMean"));
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd"; const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
// keys for primitives reuse // keys for primitives reuse
const std::string key_with_hash = const std::string key_with_hash =
key + BatchNormMKLDNNHandler::GetHash(src_tz, epsilon, flags, false, key + BatchNormMKLDNNHandler::GetHash(src_tz, epsilon, flags, false,
input_format); x->format());
const std::string key_batch_norm_bwd_p = const std::string key_batch_norm_bwd_p =
key_with_hash + "@batch_norm_bwd_p"; key_with_hash + "@batch_norm_bwd_p";
const std::string key_batch_norm_src_mem_p = const std::string key_batch_norm_src_mem_p =
...@@ -373,9 +366,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -373,9 +366,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
primitive reorder_diff_dst; primitive reorder_diff_dst;
bool is_diff_dst_reordered = false; bool is_diff_dst_reordered = false;
auto user_diff_dst_memory = memory( auto user_diff_dst_memory =
{{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine}, memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data));
to_void_cast(diff_y_data));
// MKLDNN requires a single piece of memory for scale and shift/bias data // MKLDNN requires a single piece of memory for scale and shift/bias data
const size_t scaleshift_size = 2 * ic; const size_t scaleshift_size = 2 * ic;
...@@ -459,10 +451,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -459,10 +451,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
dev_ctx.SetBlob(key_batch_norm_diff_dst_mem_p, diff_dst_memory); dev_ctx.SetBlob(key_batch_norm_diff_dst_mem_p, diff_dst_memory);
// set layout/format of output tensors // set layout/format of output tensors
diff_x->set_layout(DataLayout::kMKLDNN); diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc()
.desc()
.data.format);
} else { } else {
// primitives already exist // primitives already exist
UpdateMemoryData(dev_ctx, key_batch_norm_src_mem_p, to_void_cast(x_data)); UpdateMemoryData(dev_ctx, key_batch_norm_src_mem_p, to_void_cast(x_data));
...@@ -487,10 +476,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -487,10 +476,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
} }
// set layout/format of output tensors // set layout/format of output tensors
diff_x->set_layout(DataLayout::kMKLDNN); diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc()
.desc()
.data.format);
} }
// execute optional reorder and batch_norm backward primitive // execute optional reorder and batch_norm backward primitive
......
...@@ -47,11 +47,6 @@ static memory::primitive_desc CreateMemPrimDesc(const Tensor& input, ...@@ -47,11 +47,6 @@ static memory::primitive_desc CreateMemPrimDesc(const Tensor& input,
return mem_prim_desc; return mem_prim_desc;
} }
static mkldnn::memory::format GetDstMemFormat(
const concat::primitive_desc& concat_pd) {
return (memory::format)concat_pd.dst_primitive_desc().desc().data.format;
}
static platform::CPUPlace GetCpuPlace( static platform::CPUPlace GetCpuPlace(
const paddle::framework::ExecutionContext& ctx) { const paddle::framework::ExecutionContext& ctx) {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
...@@ -139,8 +134,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -139,8 +134,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto concat = prim_creator.CreateConcatPrimitive(concat_pd, output, place); auto concat = prim_creator.CreateConcatPrimitive(concat_pd, output, place);
stream(stream::kind::eager).submit({concat}).wait(); stream(stream::kind::eager).submit({concat}).wait();
output->set_layout(DataLayout::kMKLDNN); output->set_mkldnn_prim_desc(concat_pd.dst_primitive_desc());
output->set_format(GetDstMemFormat(concat_pd));
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -282,8 +282,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -282,8 +282,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline.push_back(*conv_p); pipeline.push_back(*conv_p);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
auto dst_mpd = dst_memory_p->get_primitive_desc(); output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc());
output->set_mkldnn_prim_desc(dst_mpd);
} }
void ComputeINT8(const paddle::framework::ExecutionContext& ctx) const { void ComputeINT8(const paddle::framework::ExecutionContext& ctx) const {
const bool is_test = ctx.Attr<bool>("is_test"); const bool is_test = ctx.Attr<bool>("is_test");
...@@ -972,8 +971,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -972,8 +971,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
pipeline.push_back(*conv_bwd_data_p); pipeline.push_back(*conv_bwd_data_p);
input_grad->set_layout(DataLayout::kMKLDNN); input_grad->set_mkldnn_prim_desc(diff_src_memory_p->get_primitive_desc());
input_grad->set_format(GetMKLDNNFormat(*diff_src_memory_p));
} }
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
} }
...@@ -991,12 +989,12 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, ...@@ -991,12 +989,12 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
::paddle::platform::CPUPlace, U8, ::paddle::platform::CPUPlace, U8,
ops::kConvMKLDNNFP32, ops::kConvMKLDNNINT8,
ops::ConvMKLDNNOpKernel<uint8_t, float>); ops::ConvMKLDNNOpKernel<uint8_t, float>);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
::paddle::platform::CPUPlace, S8, ::paddle::platform::CPUPlace, S8,
ops::kConvMKLDNNFP32, ops::kConvMKLDNNINT8,
ops::ConvMKLDNNOpKernel<int8_t, float>); ops::ConvMKLDNNOpKernel<int8_t, float>);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d_grad, MKLDNN, REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d_grad, MKLDNN,
......
...@@ -221,8 +221,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -221,8 +221,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline.push_back(*conv_p); pipeline.push_back(*conv_p);
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN); output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc());
output->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
} }
private: private:
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/dequantize_op.h" #include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -30,6 +31,18 @@ using framework::DataLayout; ...@@ -30,6 +31,18 @@ using framework::DataLayout;
using mkldnn::stream; using mkldnn::stream;
using platform::GetMKLDNNFormat; using platform::GetMKLDNNFormat;
std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
const mkldnn::memory::data_type& src_dt,
const std::vector<int>& src_tz, const float scale_data) {
std::string key;
key.reserve(platform::MKLDNNHandler::MaxKeyLength);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(src_dt));
platform::MKLDNNHandler::AppendKeyDims(&key, src_tz);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data));
platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output"));
return key;
}
template <typename T> template <typename T>
class DeQuantOpKernel : public framework::OpKernel<T> { class DeQuantOpKernel : public framework::OpKernel<T> {
public: public:
...@@ -51,31 +64,55 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -51,31 +64,55 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
mkldnn::memory::data_type src_dt = mkldnn::memory::data_type src_dt =
paddle::framework::ToMKLDNNDataType(input->type()); paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::format src_fmt = input->format(); mkldnn::memory::format src_fmt = input->format();
std::string key = CreateKey(ctx, src_dt, src_tz, reorder_scale[0]);
const std::string key_prim = key + "@reorder_p";
const std::string key_src_mem = key + "@src_mem";
const std::string key_dst_mem = key + "@dst_mem";
std::shared_ptr<mkldnn::memory> src_memory;
std::shared_ptr<mkldnn::memory> dst_memory;
std::shared_ptr<reorder> reorder_p;
reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
if (reorder_p == nullptr) {
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
memory::format::nchw);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
dst_memory = std::make_shared<mkldnn::memory>(
dst_pd, to_void_cast<float>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, *dst_memory));
dev_ctx.SetBlob(key_prim, reorder_p);
dev_ctx.SetBlob(key_src_mem, src_memory);
dev_ctx.SetBlob(key_dst_mem, dst_memory);
} else {
src_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_src_mem));
src_memory->set_data_handle(to_void_cast<T>(input_data));
dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem));
dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
}
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
memory::format::nchw);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<float>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p); pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
output->set_format(GetMKLDNNFormat(dst_memory)); output->set_format(GetMKLDNNFormat(*dst_memory));
} }
}; };
......
...@@ -81,10 +81,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -81,10 +81,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto e_mid = framework::EigenTensor<T, 4>::From(*mid); auto e_mid = framework::EigenTensor<T, 4>::From(*mid);
e_mid = e_mid.constant(k); e_mid = e_mid.constant(k);
auto dims = paddle::framework::vectorize2int(x->dims()); auto src_md = x->get_mkldnn_prim_desc().desc();
auto src_md = paddle::platform::MKLDNNMemDesc(
dims, mkldnn::memory::data_type::f32, x->format());
auto forward_desc = mkldnn::lrn_forward::desc{mkldnn::prop_kind::forward, auto forward_desc = mkldnn::lrn_forward::desc{mkldnn::prop_kind::forward,
mkldnn::lrn_across_channels, mkldnn::lrn_across_channels,
...@@ -94,7 +91,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -94,7 +91,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
beta, beta,
k}; k};
auto src_memory_pd = mkldnn::memory::primitive_desc{src_md, mkldnn_engine}; auto src_memory_pd = x->get_mkldnn_prim_desc();
if (!is_test) { if (!is_test) {
const std::string key = ctx.op().Output("Out"); const std::string key = ctx.op().Output("Out");
...@@ -111,16 +108,15 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -111,16 +108,15 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
src_memory->set_data_handle( src_memory->set_data_handle(
static_cast<void*>(const_cast<T*>(input_data))); static_cast<void*>(const_cast<T*>(input_data)));
auto dst_memory = mkldnn::memory(forward_pd->dst_primitive_desc(), auto dst_memory_pd = forward_pd->dst_primitive_desc();
static_cast<void*>(output_data)); auto dst_memory =
mkldnn::memory(dst_memory_pd, static_cast<void*>(output_data));
auto workspace_memory = insert_to_context<mkldnn::memory>( auto workspace_memory = insert_to_context<mkldnn::memory>(
key_workspace_memory, dev_ctx, key_workspace_memory, dev_ctx,
forward_pd->workspace_primitive_desc()); forward_pd->workspace_primitive_desc());
run_primitive(*forward_pd, *src_memory, *workspace_memory, dst_memory); run_primitive(*forward_pd, *src_memory, *workspace_memory, dst_memory);
out->set_mkldnn_prim_desc(dst_memory_pd);
out->set_layout(framework::DataLayout::kMKLDNN);
out->set_format(platform::GetMKLDNNFormat(dst_memory));
} else { } else {
auto forward_pd = auto forward_pd =
mkldnn::lrn_forward::primitive_desc{forward_desc, mkldnn_engine}; mkldnn::lrn_forward::primitive_desc{forward_desc, mkldnn_engine};
...@@ -128,13 +124,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -128,13 +124,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
src_memory_pd, static_cast<void*>(const_cast<T*>(input_data))}; src_memory_pd, static_cast<void*>(const_cast<T*>(input_data))};
auto workspace_memory = auto workspace_memory =
mkldnn::memory{forward_pd.workspace_primitive_desc()}; mkldnn::memory{forward_pd.workspace_primitive_desc()};
auto dst_memory_pd = forward_pd.dst_primitive_desc();
auto dst_memory = mkldnn::memory(forward_pd.dst_primitive_desc(), auto dst_memory = mkldnn::memory(forward_pd.dst_primitive_desc(),
static_cast<void*>(output_data)); static_cast<void*>(output_data));
run_primitive(forward_pd, src_memory, workspace_memory, dst_memory); run_primitive(forward_pd, src_memory, workspace_memory, dst_memory);
out->set_mkldnn_prim_desc(dst_memory_pd);
out->set_layout(framework::DataLayout::kMKLDNN);
out->set_format(platform::GetMKLDNNFormat(dst_memory));
} }
} }
}; };
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/operators/pool_op.h" #include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -29,23 +30,23 @@ using mkldnn::stream; ...@@ -29,23 +30,23 @@ using mkldnn::stream;
using platform::to_void_cast; using platform::to_void_cast;
// Generate keys for storing/retriving primitives for this operator // Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
static std::string gethash(const memory::dims& input_dims, const memory::dims& input_dims,
const std::string& pooling_type, const std::string& pooling_type,
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const memory::data_type& dt, const memory::data_type& dt, const std::string& suffix) {
const std::string& suffix) { std::string key;
auto dims2str = [](const memory::dims& operand_dims) { key.reserve(platform::MKLDNNHandler::MaxKeyLength);
std::string dstr = ""; platform::MKLDNNHandler::AppendKeyDims(&key, input_dims);
for (size_t i = 0; i < operand_dims.size(); ++i) { platform::MKLDNNHandler::AppendKey(&key, pooling_type);
dstr += std::to_string(operand_dims[i]) + "-"; platform::MKLDNNHandler::AppendKeyVec(&key, ksize);
} platform::MKLDNNHandler::AppendKeyVec(&key, strides);
return dstr; platform::MKLDNNHandler::AppendKeyVec(&key, paddings);
}; platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
return dims2str(input_dims) + dims2str(ksize) + dims2str(strides) + platform::MKLDNNHandler::AppendKey(&key, suffix);
dims2str(paddings) + std::to_string(dt) + pooling_type + suffix; return key;
} }
static inline int ComputeCeiledOutput(int input_size, int kernel_size, static inline int ComputeCeiledOutput(int input_size, int kernel_size,
...@@ -114,8 +115,8 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -114,8 +115,8 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn::memory::data_type dt = mkldnn::memory::data_type dt =
paddle::framework::ToMKLDNNDataType(input->type()); paddle::framework::ToMKLDNNDataType(input->type());
const std::string key = gethash(src_tz, pooling_type, ksize, strides, const std::string key = CreateKey(ctx, src_tz, pooling_type, ksize, strides,
paddings, dt, ctx.op().Output("Out")); paddings, dt, ctx.op().Output("Out"));
const std::string key_pool_p = key + "@pool_p"; const std::string key_pool_p = key + "@pool_p";
const std::string key_pool_pd = key + "@pool_pd"; const std::string key_pool_pd = key + "@pool_pd";
const std::string key_pool_src_mem_p = key + "@pool_src_mem_p"; const std::string key_pool_src_mem_p = key + "@pool_src_mem_p";
...@@ -294,8 +295,8 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -294,8 +295,8 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// Get an unique name from "argument" name of "Out" variable // Get an unique name from "argument" name of "Out" variable
// This name will be used as key when referring info from device context // This name will be used as key when referring info from device context
const std::string key = const std::string key =
gethash(diff_src_tz, pooling_type, ksize, strides, paddings, CreateKey(ctx, diff_src_tz, pooling_type, ksize, strides, paddings,
memory::data_type::f32, ctx.op().Input("Out")); memory::data_type::f32, ctx.op().Input("Out"));
const std::string key_pool_bwd_p = key + "@pool_bwd_p"; const std::string key_pool_bwd_p = key + "@pool_bwd_p";
const std::string key_pool_diff_src_mem_p = key + "@pool_diff_src_mem_p"; const std::string key_pool_diff_src_mem_p = key + "@pool_diff_src_mem_p";
const std::string key_pool_diff_dst_mem_p = key + "@pool_diff_dst_mem_p"; const std::string key_pool_diff_dst_mem_p = key + "@pool_diff_dst_mem_p";
......
...@@ -30,6 +30,18 @@ using framework::DataLayout; ...@@ -30,6 +30,18 @@ using framework::DataLayout;
using mkldnn::stream; using mkldnn::stream;
using platform::GetMKLDNNFormat; using platform::GetMKLDNNFormat;
std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
const std::vector<int>& src_tz, const float scale_data,
const bool is_negative) {
std::string key;
key.reserve(platform::MKLDNNHandler::MaxKeyLength);
platform::MKLDNNHandler::AppendKeyDims(&key, src_tz);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data));
platform::MKLDNNHandler::AppendKey(&key, std::to_string(is_negative));
platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output"));
return key;
}
template <typename T> template <typename T>
class QuantOpKernel : public framework::OpKernel<T> { class QuantOpKernel : public framework::OpKernel<T> {
public: public:
...@@ -47,32 +59,61 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -47,32 +59,61 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
input->format());
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
bool is_negative = ctx.Attr<bool>("is_negative_input"); bool is_negative = ctx.Attr<bool>("is_negative_input");
std::shared_ptr<mkldnn::memory::primitive_desc> dst_pd; std::string key = CreateKey(ctx, src_tz, scale_data, is_negative);
const std::string key_prim = key + "@reorder_p";
const std::string key_src_mem = key + "@src_mem";
const std::string key_dst_mem = key + "@dst_mem";
std::shared_ptr<mkldnn::memory> src_memory;
std::shared_ptr<mkldnn::memory> dst_memory; std::shared_ptr<mkldnn::memory> dst_memory;
if (is_negative) { std::shared_ptr<reorder> reorder_p;
platform::ConvMKLDNNHandler::SetDstMemory<int8_t>( reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
ctx, output, dst_tz, engine, dst_pd, dst_memory);
if (reorder_p == nullptr) {
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
input->format());
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
std::shared_ptr<mkldnn::memory::primitive_desc> dst_pd;
if (is_negative) {
platform::ConvMKLDNNHandler::SetDstMemory<int8_t>(
ctx, output, dst_tz, engine, dst_pd, dst_memory);
} else {
platform::ConvMKLDNNHandler::SetDstMemory<uint8_t>(
ctx, output, dst_tz, engine, dst_pd, dst_memory);
}
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, *dst_pd, attri));
reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, *dst_memory));
dev_ctx.SetBlob(key_prim, reorder_p);
dev_ctx.SetBlob(key_src_mem, src_memory);
dev_ctx.SetBlob(key_dst_mem, dst_memory);
} else { } else {
platform::ConvMKLDNNHandler::SetDstMemory<uint8_t>( src_memory = std::static_pointer_cast<mkldnn::memory>(
ctx, output, dst_tz, engine, dst_pd, dst_memory); dev_ctx.GetBlob(key_src_mem));
src_memory->set_data_handle(to_void_cast<T>(input_data));
dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem));
auto place = ctx.GetPlace();
if (is_negative) {
dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
} else {
dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
}
} }
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, *dst_pd, attri));
auto reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, *dst_memory));
pipeline.push_back(*reorder_p); pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
......
...@@ -158,6 +158,14 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> { ...@@ -158,6 +158,14 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto softmax_p = auto softmax_p =
handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p); handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p);
// We cannot use softmax_dst_memory_p to get prim desc as
// it contains flattened dims (2D) while output tensor can
// have 2,3,4+ dims
auto output_mem_pd = paddle::platform::create_prim_desc_from_dims(
paddle::framework::vectorize2int(output->dims()),
mkldnn::memory::format::blocked);
output->set_mkldnn_prim_desc(output_mem_pd);
std::vector<primitive> pipeline{ std::vector<primitive> pipeline{
*(static_cast<softmax_forward::primitive*>(softmax_p.get()))}; *(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
......
...@@ -79,15 +79,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -79,15 +79,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
memory::format input_format = input0.format(); memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(), PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors"); "all inputs must be all LoDTensors");
...@@ -115,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -115,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
memory::desc(dst_tz, memory::data_type::f32, memory::format::any); memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd); auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
auto dst_mem_pd = sum_pd.dst_primitive_desc();
std::shared_ptr<memory> dst_mem; std::shared_ptr<memory> dst_mem;
if (in_place) { if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc())); dst_mem.reset(new memory(dst_mem_pd));
} else { } else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data)); dst_mem.reset(new memory(dst_mem_pd, output_data));
} }
std::vector<mkldnn::primitive::at> inputs; std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) { for (size_t i = 0; i < srcs_mem.size(); ++i) {
...@@ -145,107 +136,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -145,107 +136,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (in_place) pipeline.push_back(reorder_prim); if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait(); stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN); output->set_mkldnn_prim_desc(dst_mem_pd);
output->set_format(output_format); } else { // Fallback to naive version
} else if (out_var->IsType<framework::SelectedRows>()) { // TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support
// TODO(@mozga-intel) Add MKLDNN SelectedRows support SumKernel<CPUDeviceContext, T> reference_kernel;
std::unique_ptr<framework::SelectedRows> in0; reference_kernel.Compute(ctx);
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0;
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
std::vector<int64_t> in_dim;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() > 0) {
in_dim = framework::vectorize(sel_row.value().dims());
break;
}
}
if (in_dim.empty()) {
VLOG(3) << "WARNING: all the inputs are empty";
in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
} else {
in_dim[0] = static_cast<int64_t>(first_dim);
}
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
out_value->mutable_data<T>(ctx.GetPlace());
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
framework::ToTypeName(out_var->Type()));
} }
} }
}; };
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
#include <memory>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
......
...@@ -16,7 +16,10 @@ limitations under the License. */ ...@@ -16,7 +16,10 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <unordered_set>
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
...@@ -483,7 +486,8 @@ void NgraphEngine::Run(const framework::Scope& scope, ...@@ -483,7 +486,8 @@ void NgraphEngine::Run(const framework::Scope& scope,
} }
} }
backend_->call(backend_->compile(ngraph_function_), t_out, t_in); auto handle = backend_->compile(ngraph_function_);
handle->call_with_validate(t_out, t_in);
} // NgraphEngine::Run } // NgraphEngine::Run
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle {
namespace operators {
namespace ngraphs {
void BuildAdamNode(
const std::shared_ptr<framework::OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto op_attrs = framework::AttrReader(op->Attrs());
auto beta1pow = platform::GetInputNode(op, "Beta1Pow", ngb_node_map);
auto beta2pow = platform::GetInputNode(op, "Beta2Pow", ngb_node_map);
auto grad = platform::GetInputNode(op, "Grad", ngb_node_map);
auto learning_rate = platform::GetInputNode(op, "LearningRate", ngb_node_map);
auto moment1 = platform::GetInputNode(op, "Moment1", ngb_node_map);
auto moment2 = platform::GetInputNode(op, "Moment2", ngb_node_map);
auto param = platform::GetInputNode(op, "Param", ngb_node_map);
auto epsilon = op_attrs.Get<float>("epsilon");
auto beta2 = op_attrs.Get<float>("beta2");
auto beta1 = op_attrs.Get<float>("beta1");
auto moment1_shape = moment1->get_shape();
auto grad_shape = grad->get_shape();
auto moment1out = std::make_shared<ngraph::op::Add>(
ElementwiseScalar<ngraph::op::Multiply>(beta1, moment1),
ElementwiseScalar<ngraph::op::Multiply>(1. - beta1, grad));
auto grad_square = std::make_shared<ngraph::op::Multiply>(grad, grad);
auto moment2out = std::make_shared<ngraph::op::Add>(
ElementwiseScalar<ngraph::op::Multiply>(beta2, moment2),
ElementwiseScalar<ngraph::op::Multiply>(1. - beta2, grad_square));
auto node_sqrt = std::make_shared<ngraph::op::Sqrt>(
ElementwiseScalar<ngraph::op::Subtract>(1., beta2pow));
auto lr = std::make_shared<ngraph::op::Divide>(
node_sqrt, ElementwiseScalar<ngraph::op::Subtract>(1., beta1pow));
auto updated_lr = std::make_shared<ngraph::op::Multiply>(learning_rate, lr);
auto moment2_sqrt = std::make_shared<ngraph::op::Sqrt>(moment2out);
auto param_grad = std::make_shared<ngraph::op::Divide>(
moment1out, ElementwiseScalar<ngraph::op::Add>(epsilon, moment2_sqrt));
auto delta = ElementwiseScalar<ngraph::op::Multiply>(updated_lr, param_grad);
auto param_out = std::make_shared<ngraph::op::Subtract>(param, delta);
platform::SetOutputNode(op, "Moment1Out", moment1out, ngb_node_map);
platform::SetOutputNode(op, "Moment2Out", moment2out, ngb_node_map);
platform::SetOutputNode(op, "ParamOut", param_out, ngb_node_map);
}
} // namespace ngraphs
} // namespace operators
} // namespace paddle
REGISTER_NG_OP(adam, BuildAdamNode);
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h" #include "paddle/fluid/platform/ngraph_helper.h"
......
/*Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle {
namespace operators {
namespace ngraphs {
void BuildConcatNode(
const std::shared_ptr<framework::OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
std::vector<std::shared_ptr<ngraph::Node>> args;
for (auto& var_name_item : op->Inputs()) {
for (auto& var_name : var_name_item.second) {
auto& node0 = ngb_node_map->at(var_name);
args.push_back(node0);
}
}
auto op_attrs = framework::AttrReader(op->Attrs());
const size_t axis = op_attrs.Get<int>("axis");
auto out = std::make_shared<ngraph::op::Concat>(args, axis);
platform::SetOutputNode(op, "Out", out, ngb_node_map);
}
} // namespace ngraphs
} // namespace operators
} // namespace paddle
REGISTER_NG_OP(concat, BuildConcatNode);
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
...@@ -15,7 +15,9 @@ limitations under the License. */ ...@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once #pragma once
#include <functional> #include <functional>
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
...@@ -15,7 +15,9 @@ limitations under the License. */ ...@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once #pragma once
#include <functional> #include <functional>
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h" #include "paddle/fluid/platform/ngraph_helper.h"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h" #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h" #include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle { namespace paddle {
...@@ -53,3 +54,5 @@ void BuildSumNode( ...@@ -53,3 +54,5 @@ void BuildSumNode(
} // namespace ngraphs } // namespace ngraphs
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_NG_OP(sum, BuildSumNode);
...@@ -14,7 +14,9 @@ limitations under the License. */ ...@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp" #include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h" #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h" #include "paddle/fluid/platform/ngraph_helper.h"
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -32,53 +33,57 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -32,53 +33,57 @@ class SGDOpKernel : public framework::OpKernel<T> {
if (param_var->IsType<framework::LoDTensor>()) { if (param_var->IsType<framework::LoDTensor>()) {
const auto *param = ctx.Input<framework::Tensor>("Param"); const auto *param = ctx.Input<framework::Tensor>("Param");
auto *param_out = ctx.Output<framework::Tensor>("ParamOut"); auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
// Actually, all tensors are LoDTensor except SelectedRows. // Actually, all tensors are LoDTensor except SelectedRows.
if (grad_var->IsType<framework::LoDTensor>()) { if (grad_var->IsType<framework::LoDTensor>()) {
param_out->mutable_data<T>(ctx.GetPlace());
const auto *grad = ctx.Input<framework::Tensor>("Grad"); const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto sz = param_out->numel();
auto p = framework::EigenVector<T>::Flatten(*param); PADDLE_ENFORCE_EQ(param->numel(), sz);
auto g = framework::EigenVector<T>::Flatten(*grad); PADDLE_ENFORCE_EQ(grad->numel(), sz);
auto o = framework::EigenVector<T>::Flatten(*param_out);
auto *lr = learning_rate->data<T>(); jit::sgd_attr_t attr(1, sz, 1, sz, 1);
const T *lr = learning_rate->data<T>();
o = p - lr[0] * g; const T *param_data = param->data<T>();
const T *grad_data = grad->data<T>();
int64_t rows_idx = 0;
T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
auto sgd =
jit::Get<jit::kSgd, jit::SgdTuples<T>, platform::CPUPlace>(attr);
sgd(lr, param_data, grad_data, &rows_idx, out_data, &attr);
} else if (grad_var->IsType<framework::SelectedRows>()) { } else if (grad_var->IsType<framework::SelectedRows>()) {
// TODO(qijun): In Sparse SGD operator, in-place update is enforced. // TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency. // This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution. // It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ(param, param_out); PADDLE_ENFORCE_EQ(param, param_out);
const auto *grad = ctx.Input<framework::SelectedRows>("Grad"); const auto *grad = ctx.Input<framework::SelectedRows>("Grad");
auto &grad_rows = grad->rows();
// for distributed training, a sparse var may be empty, // for distributed training, a sparse var may be empty,
// just skip updating. // just skip updating.
if (grad->rows().size() == 0) { if (grad_rows.size() == 0) {
return; return;
} }
auto grad_height = grad->height();
auto out_dims = param_out->dims(); auto out_dims = param_out->dims();
PADDLE_ENFORCE_EQ(grad_height, out_dims[0]); PADDLE_ENFORCE_EQ(grad->height(), out_dims[0]);
auto &grad_value = grad->value(); auto &grad_value = grad->value();
auto &grad_rows = grad->rows(); const T *param_data = param->data<T>();
const T *grad_data = grad_value.data<T>();
size_t grad_row_numel = grad_value.numel() / grad_rows.size(); const T *lr = learning_rate->data<T>();
PADDLE_ENFORCE_EQ(static_cast<int64_t>(grad_row_numel), const int64_t *rows_data = grad_rows.data();
param_out->numel() / grad_height); T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
auto *grad_data = grad_value.data<T>(); jit::sgd_attr_t attr;
auto *out_data = param_out->data<T>(); attr.param_height = out_dims[0];
auto *lr = learning_rate->data<T>(); attr.param_width = param_out->numel() / attr.param_height;
for (size_t i = 0; i < grad_rows.size(); i++) { attr.grad_height = grad_rows.size(); // note: it is not grad->height()
PADDLE_ENFORCE(grad_rows[i] < grad_height, attr.grad_width = grad_value.numel() / attr.grad_height;
"Input rows index should less than height"); attr.selected_rows_size = grad_rows.size();
for (size_t j = 0; j < grad_row_numel; j++) { PADDLE_ENFORCE_EQ(attr.grad_width, attr.param_width);
out_data[grad_rows[i] * grad_row_numel + j] -=
lr[0] * grad_data[i * grad_row_numel + j]; auto sgd =
} jit::Get<jit::kSgd, jit::SgdTuples<T>, platform::CPUPlace>(attr);
} sgd(lr, param_data, grad_data, rows_data, out_data, &attr);
} else { } else {
PADDLE_THROW("Unsupported Variable Type of Grad"); PADDLE_THROW("Unsupported Variable Type of Grad");
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/reader/buffered_reader.h" #include "paddle/fluid/operators/reader/buffered_reader.h"
#include <memory>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
......
...@@ -22,6 +22,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { ...@@ -22,6 +22,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->IsRuntime()) {
return;
}
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->HasInput("X"), ctx->HasInput("X"),
"Input(X) of SequecceEnumerate operator should not be null."); "Input(X) of SequecceEnumerate operator should not be null.");
...@@ -33,9 +36,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel { ...@@ -33,9 +36,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x_dims.size(), 2, x_dims.size(), 2,
"Input(X) of SequenceEnumerate operator's rank should be 2."); "Input(X) of SequenceEnumerate operator's rank should be 2.");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(x_dims[1], 1,
x_dims[1], 1, "Input(X) of SequenceEnumerate operator's 2nd "
"Input(X) of SequenceEnumerate operator's 2nd dimension should be 1."); "dimension should be 1.");
const auto win_size = ctx->Attrs().Get<int>("win_size"); const auto win_size = ctx->Attrs().Get<int>("win_size");
ctx->SetOutputDim("Out", {x_dims[0], win_size}); ctx->SetOutputDim("Out", {x_dims[0], win_size});
......
...@@ -65,6 +65,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> { ...@@ -65,6 +65,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
auto lod0 = in_lod[0]; auto lod0 = in_lod[0];
auto in_len = in->numel(); auto in_len = in->numel();
auto in_data = in->data<T>(); auto in_data = in->data<T>();
out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace()); auto out_data = out->mutable_data<T>(context.GetPlace());
// Copy LoD to GPU // Copy LoD to GPU
const size_t* dev_in_lod_ptr = lod0.CUDAData(context.GetPlace()); const size_t* dev_in_lod_ptr = lod0.CUDAData(context.GetPlace());
...@@ -72,6 +73,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> { ...@@ -72,6 +73,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
PADDLE_CUDA_NUM_THREADS, 0, stream>>>( PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
in_data, dev_in_lod_ptr, lod0.size(), win_size, pad_value, out_data); in_data, dev_in_lod_ptr, lod0.size(), win_size, pad_value, out_data);
out->set_lod(in->lod());
} }
}; };
......
...@@ -39,6 +39,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> { ...@@ -39,6 +39,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
// Generate enumerate sequence set // Generate enumerate sequence set
auto lod0 = in_lod[0]; auto lod0 = in_lod[0];
auto in_data = in->data<T>(); auto in_data = in->data<T>();
out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace()); auto out_data = out->mutable_data<T>(context.GetPlace());
for (size_t i = 0; i < lod0.size() - 1; ++i) { for (size_t i = 0; i < lod0.size() - 1; ++i) {
for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) { for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) {
...@@ -49,6 +50,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> { ...@@ -49,6 +50,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
} }
} }
} }
out->set_lod(in->lod());
} }
}; };
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <algorithm> #include <algorithm>
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h" #include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h"
#include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/cuda_primitives.h"
...@@ -88,6 +89,49 @@ void GetOutputOffset(const framework::Vector<size_t>& x_lod, ...@@ -88,6 +89,49 @@ void GetOutputOffset(const framework::Vector<size_t>& x_lod,
} }
} }
template <typename T>
static int ExpandByMemoryCopy(const platform::CUDADeviceContext& context,
const LoDTensor& x, LoDTensor* out,
const framework::Vector<size_t>& x_lod,
const framework::Vector<size_t>& ref_lod,
bool do_copy) {
auto out_data = out->data<T>();
auto x_data = x.data<T>();
auto& gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace());
int x_item_length = x.numel() / x.dims()[0];
int out_offset = 0;
int num_copys = 0;
for (size_t i = 1; i < ref_lod.size(); ++i) {
int repeat_num = ref_lod[i] - ref_lod[i - 1];
int x_start = x_lod[i - 1];
int x_end = x_lod[i];
int x_seq_len = x_end - x_start;
if (repeat_num > 0) {
if (do_copy) {
int out_start = out_offset;
if (out->lod().size() == 1) {
out_start = out->lod()[0][out_offset];
}
for (int j = 0; j < repeat_num; j++) {
for (int k = 0; k < x_seq_len; k++) {
memory::Copy(
gpu_place,
out_data + (out_start + j * x_seq_len + k) * x_item_length,
gpu_place, x_data + (x_start + k) * x_item_length,
sizeof(T) * x_item_length, context.stream());
}
}
} else {
num_copys += repeat_num * x_seq_len;
}
}
out_offset += repeat_num;
}
return num_copys;
}
template <typename T> template <typename T>
struct SequenceExpandFunctor<platform::CUDADeviceContext, T> { struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
void operator()( void operator()(
...@@ -95,22 +139,40 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> { ...@@ -95,22 +139,40 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
const framework::Vector<size_t>& x_lod, /*expand source lod*/ const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/ const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out) { LoDTensor* out) {
int x_item_length = x.numel() / x.dims()[0]; int num_copys =
framework::Vector<size_t> out_offset(x_lod.size()); ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, false);
GetOutputOffset(x_lod, ref_lod, &out_offset); // Sometimes direct copies will be faster, this maybe need deeply analysis.
if (num_copys < 5) {
int thread_x = std::min(32, std::max(static_cast<int>(ref_lod.size()), 16)); ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, true);
int thread_y = 16; } else {
int thread_z = 1024 / thread_x / thread_y; int x_item_length = x.numel() / x.dims()[0];
int block_x = static_cast<int>(ref_lod.size()); size_t x_lod_size = x_lod.size();
dim3 block_size(thread_x, thread_y, thread_z); framework::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size());
dim3 grid_size(block_x, 1); GetOutputOffset(x_lod, ref_lod, &out_offset);
for (size_t i = 0; i < x_lod_size; ++i) {
out_offset[x_lod_size + i] = x_lod[i];
}
for (size_t i = 0; i < ref_lod.size(); ++i) {
out_offset[2 * x_lod_size + i] = ref_lod[i];
}
sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>( const size_t* out_offset_data = out_offset.CUDAData(context.GetPlace());
x.data<T>(), x_lod.CUDAData(context.GetPlace()), const size_t* x_lod_data = out_offset_data + x_lod_size;
ref_lod.CUDAData(context.GetPlace()), const size_t* ref_lod_data = out_offset_data + 2 * x_lod_size;
out_offset.CUDAData(context.GetPlace()), x_lod.size(), x_item_length,
out->mutable_data<T>(context.GetPlace())); int thread_x =
std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
int thread_y = 16;
int thread_z = 1024 / thread_x / thread_y;
int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1);
sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), x_lod_data, ref_lod_data, out_offset_data, x_lod_size,
x_item_length, out->mutable_data<T>(context.GetPlace()));
}
} }
}; };
......
...@@ -46,10 +46,10 @@ class SoftmaxWithCrossEntropyOpMaker ...@@ -46,10 +46,10 @@ class SoftmaxWithCrossEntropyOpMaker
.SetDefault(false); .SetDefault(false);
AddAttr<bool>( AddAttr<bool>(
"numeric_stable_mode", "numeric_stable_mode",
"(bool, default: false), A flag to indicate whether to use more " "(bool, default: true), A flag to indicate whether to use more "
"numerically stable algorithm. This flag is only valid when " "numerically stable algorithm. This flag is only valid when "
"soft_label is false and GPU is used.") "soft_label is false and GPU is used.")
.SetDefault(false); .SetDefault(true);
AddAttr<int>( AddAttr<int>(
"ignore_index", "ignore_index",
"(int, default -100), Specifies a target value that is ignored and" "(int, default -100), Specifies a target value that is ignored and"
......
...@@ -117,11 +117,11 @@ class TeacherStudentSigmoidLossOpMaker ...@@ -117,11 +117,11 @@ class TeacherStudentSigmoidLossOpMaker
"[N x 1]. The teacher student sigmoid loss."); "[N x 1]. The teacher student sigmoid loss.");
AddAttr<float>( AddAttr<float>(
"soft_max_up_bound", "soft_max_up_bound",
"fp32, if input > soft_max_up_bound, will be bound, default 15.0") "fp32, if input > soft_max_up_bound, input will be bound, default 15.0")
.SetDefault(15.0); .SetDefault(15.0);
AddAttr<float>( AddAttr<float>("soft_max_lower_bound",
"soft_max_lower_bound", "fp32, if input < soft_max_lower_bound, input will be "
"fp32, if input < soft_max_lower_bound, will be bound, default -15.0") "bound, default -15.0")
.SetDefault(-15.0); .SetDefault(-15.0);
AddComment(R"DOC( AddComment(R"DOC(
TeacherStudentSigmoidLoss Operator. TeacherStudentSigmoidLoss Operator.
...@@ -134,7 +134,7 @@ we add another label(z') to original. ...@@ -134,7 +134,7 @@ we add another label(z') to original.
label = {-2, -1, [0, 2]} label = {-2, -1, [0, 2]}
when z' is not exist, clk = 0 : label = -2; when z' is not exist, clk = 0 : label = -2;
when z' is not exist, clk = 1 : label = -1; when z' is not exist, clk = 1 : label = -1;
when z' is exist , clk = 0 : label = 0 + z'; when z' is exist , clk = 0 : label = 0 + z';
when z' is exist , clk = 1 : label = 1 + z'; when z' is exist , clk = 1 : label = 1 + z';
)DOC"); )DOC");
......
...@@ -82,6 +82,7 @@ nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_ ...@@ -82,6 +82,7 @@ nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_
cc_test(init_test SRCS init_test.cc DEPS device_context) cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
cc_library(timer SRCS timer.cc) cc_library(timer SRCS timer.cc)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <memory>
#include <numeric>
#include <string>
#include <vector>
#include "paddle/fluid/platform/cudnn_helper.h"
namespace paddle {
namespace platform {
using framework::Tensor;
template <typename T>
cudnnDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(type);
}
template <>
cudnnDataType_t ToCudnnDataType(const framework::proto::VarType::Type& t) {
cudnnDataType_t type = CUDNN_DATA_FLOAT;
switch (t) {
case framework::proto::VarType::FP16:
type = CUDNN_DATA_HALF;
break;
case framework::proto::VarType::FP32:
type = CUDNN_DATA_FLOAT;
break;
case framework::proto::VarType::FP64:
type = CUDNN_DATA_DOUBLE;
break;
default:
break;
}
return type;
}
class ActivationDescriptor {
public:
using T = cudnnActivationStruct;
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t));
t = nullptr;
}
}
};
ActivationDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
template <typename T>
void set(cudnnActivationMode_t mode, const T& coef) {
CUDNN_ENFORCE(dynload::cudnnSetActivationDescriptor(
desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef)));
}
T* desc() { return desc_.get(); }
T* desc() const { return desc_.get(); }
private:
std::unique_ptr<T, Deleter> desc_;
};
class TensorDescriptor {
public:
using T = cudnnTensorStruct;
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t));
t = nullptr;
}
}
};
TensorDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
T* desc() const { return desc_.get(); }
void set(const Tensor& tensor, const int groups = 1) {
auto dims = framework::vectorize2int(tensor.dims());
std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1;
for (int i = dims.size() - 2; i >= 0; i--) {
strides[i] = dims[i + 1] * strides[i + 1];
}
std::vector<int> dims_with_group(dims.begin(), dims.end());
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(),
dims_with_group.data(), strides.data()));
}
private:
std::unique_ptr<T, Deleter> desc_;
};
} // namespace platform
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/cudnn_desc.h"
#include <gtest/gtest.h>
namespace paddle {
namespace platform {
TEST(TensorDescriptor, Empty) {
ActivationDescriptor a;
TensorDescriptor t;
TensorDescriptor t1;
TensorDescriptor *t11 = new TensorDescriptor();
delete t11;
std::unique_ptr<TensorDescriptor> tt(new TensorDescriptor());
}
TEST(TensorDescriptor, Normal) {
framework::Tensor tt;
tt.Resize({2, 3, 4});
tt.mutable_data<float>(platform::CPUPlace());
TensorDescriptor desc;
desc.set(tt);
EXPECT_TRUE(desc.desc() != nullptr);
}
} // namespace platform
} // namespace paddle
...@@ -99,6 +99,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -99,6 +99,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(cudnnDestroy); \ __macro(cudnnDestroy); \
__macro(cudnnSetStream); \ __macro(cudnnSetStream); \
__macro(cudnnActivationForward); \ __macro(cudnnActivationForward); \
__macro(cudnnActivationBackward); \
__macro(cudnnConvolutionForward); \ __macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias); \ __macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize); \ __macro(cudnnGetConvolutionForwardWorkspaceSize); \
......
...@@ -86,6 +86,8 @@ extern void* mklml_dso_handle; ...@@ -86,6 +86,8 @@ extern void* mklml_dso_handle;
__macro(vdPowx); \ __macro(vdPowx); \
__macro(vsInv); \ __macro(vsInv); \
__macro(vdInv); \ __macro(vdInv); \
__macro(vmsErf); \
__macro(vmdErf); \
__macro(MKL_Set_Num_Threads) __macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
......
...@@ -14,6 +14,9 @@ limitations under the License. */ ...@@ -14,6 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
#endif
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -271,7 +271,6 @@ class MKLDNNHandler { ...@@ -271,7 +271,6 @@ class MKLDNNHandler {
AppendKey(key, suffix); AppendKey(key, suffix);
} }
protected:
static void AppendKeyDims(std::string* key, static void AppendKeyDims(std::string* key,
const mkldnn::memory::dims& dims) { const mkldnn::memory::dims& dims) {
for (unsigned int i = 0; i < dims.size(); i++) { for (unsigned int i = 0; i < dims.size(); i++) {
...@@ -289,6 +288,7 @@ class MKLDNNHandler { ...@@ -289,6 +288,7 @@ class MKLDNNHandler {
key->append(s); key->append(s);
} }
protected:
static std::string dims2str(const mkldnn::memory::dims& operand_dims) { static std::string dims2str(const mkldnn::memory::dims& operand_dims) {
std::string dstr = ""; std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) { for (size_t i = 0; i < operand_dims.size(); ++i) {
...@@ -302,6 +302,9 @@ class MKLDNNHandler { ...@@ -302,6 +302,9 @@ class MKLDNNHandler {
mkldnn::engine engine_; mkldnn::engine engine_;
std::string key_; std::string key_;
bool is_reusing_; bool is_reusing_;
public:
static constexpr int MaxKeyLength = 256;
}; };
class TransposeMKLDNNHandler : public MKLDNNHandler { class TransposeMKLDNNHandler : public MKLDNNHandler {
......
...@@ -33,7 +33,7 @@ class Layer : public imperative::Layer { ...@@ -33,7 +33,7 @@ class Layer : public imperative::Layer {
} }
}; };
class PyOpBase : public imperative::OpBase { class PYBIND11_HIDDEN PyOpBase : public imperative::OpBase {
public: public:
using imperative::OpBase::OpBase; // Inherit constructors using imperative::OpBase::OpBase; // Inherit constructors
}; };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/pybind/ir.h" #include "paddle/fluid/pybind/ir.h"
#include <algorithm> #include <algorithm>
#include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
...@@ -116,7 +117,7 @@ void BindNode(py::module *m) { ...@@ -116,7 +117,7 @@ void BindNode(py::module *m) {
.def("is_var", &Node::IsVar) .def("is_var", &Node::IsVar)
.def("is_ctrl_var", &Node::IsCtrlVar) .def("is_ctrl_var", &Node::IsCtrlVar)
.def("clear_inputs", [](Node &self) { self.inputs.clear(); }) .def("clear_inputs", [](Node &self) { self.inputs.clear(); })
.def("inputs_remove", .def("remove_input",
[](Node &self, int node_id) { [](Node &self, int node_id) {
auto pos = std::find_if( auto pos = std::find_if(
self.inputs.begin(), self.inputs.end(), self.inputs.begin(), self.inputs.end(),
...@@ -125,7 +126,7 @@ void BindNode(py::module *m) { ...@@ -125,7 +126,7 @@ void BindNode(py::module *m) {
self.inputs.erase(pos); self.inputs.erase(pos);
} }
}) })
.def("inputs_remove", .def("remove_input",
[](Node &self, Node &node) { [](Node &self, Node &node) {
auto pos = auto pos =
std::find(self.inputs.begin(), self.inputs.end(), &node); std::find(self.inputs.begin(), self.inputs.end(), &node);
...@@ -133,10 +134,10 @@ void BindNode(py::module *m) { ...@@ -133,10 +134,10 @@ void BindNode(py::module *m) {
self.inputs.erase(pos); self.inputs.erase(pos);
} }
}) })
.def("inputs_append", .def("append_input",
[](Node &self, Node &node) { self.inputs.push_back(&node); }) [](Node &self, Node &node) { self.inputs.push_back(&node); })
.def("clear_outputs", [](Node &self) { self.outputs.clear(); }) .def("clear_outputs", [](Node &self) { self.outputs.clear(); })
.def("outputs_remove", .def("remove_output",
[](Node &self, int node_id) { [](Node &self, int node_id) {
auto pos = std::find_if( auto pos = std::find_if(
self.outputs.begin(), self.outputs.end(), self.outputs.begin(), self.outputs.end(),
...@@ -145,7 +146,7 @@ void BindNode(py::module *m) { ...@@ -145,7 +146,7 @@ void BindNode(py::module *m) {
self.outputs.erase(pos); self.outputs.erase(pos);
} }
}) })
.def("outputs_remove", .def("remove_output",
[](Node &self, Node &node) { [](Node &self, Node &node) {
auto pos = auto pos =
std::find(self.outputs.begin(), self.outputs.end(), &node); std::find(self.outputs.begin(), self.outputs.end(), &node);
...@@ -153,7 +154,7 @@ void BindNode(py::module *m) { ...@@ -153,7 +154,7 @@ void BindNode(py::module *m) {
self.outputs.erase(pos); self.outputs.erase(pos);
} }
}) })
.def("outputs_append", .def("append_output",
[](Node &self, Node &node) { self.outputs.push_back(&node); }) [](Node &self, Node &node) { self.outputs.push_back(&node); })
.def_readwrite("inputs", &Node::inputs) .def_readwrite("inputs", &Node::inputs)
.def_readwrite("outputs", &Node::outputs); .def_readwrite("outputs", &Node::outputs);
......
...@@ -189,8 +189,6 @@ void BindBlockDesc(pybind11::module *m) { ...@@ -189,8 +189,6 @@ void BindBlockDesc(pybind11::module *m) {
return self.HasVar(name); return self.HasVar(name);
}, },
pybind11::return_value_policy::reference) pybind11::return_value_policy::reference)
.def("_clear_block", [](pd::BlockDesc &self) { return self.Clear(); },
pybind11::return_value_policy::reference)
.def("_rename_var", .def("_rename_var",
[](pd::BlockDesc &self, const pybind11::bytes &byte_name, [](pd::BlockDesc &self, const pybind11::bytes &byte_name,
const pybind11::bytes &byte_name_new) { const pybind11::bytes &byte_name_new) {
......
...@@ -86,6 +86,14 @@ bool IsCompiledWithCUDA() { ...@@ -86,6 +86,14 @@ bool IsCompiledWithCUDA() {
#endif #endif
} }
bool IsCompiledWithMKLDNN() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
return true;
#endif
}
bool IsCompiledWithBrpc() { bool IsCompiledWithBrpc() {
#ifndef PADDLE_WITH_DISTRIBUTE #ifndef PADDLE_WITH_DISTRIBUTE
return false; return false;
...@@ -169,6 +177,23 @@ PYBIND11_MODULE(core, m) { ...@@ -169,6 +177,23 @@ PYBIND11_MODULE(core, m) {
py::return_value_policy::take_ownership) py::return_value_policy::take_ownership)
.def("value", [](const imperative::VarBase &self) { return self.var_; }, .def("value", [](const imperative::VarBase &self) { return self.var_; },
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property("name",
[](const imperative::VarBase &self) { return self.name_; },
[](imperative::VarBase &self, const std::string &name) {
self.name_ = name;
})
.def_property("block",
[](const imperative::VarBase &self) { return self.block_; },
[](imperative::VarBase &self, framework::BlockDesc *block) {
self.block_ = block;
},
py::return_value_policy::reference)
.def_property(
"persistable",
[](const imperative::VarBase &self) { return self.persistable_; },
[](imperative::VarBase &self, const bool persistable) {
self.persistable_ = persistable;
})
.def_property( .def_property(
"desc", "desc",
[](const imperative::VarBase &self) { return self.var_desc_; }, [](const imperative::VarBase &self) { return self.var_desc_; },
...@@ -185,6 +210,10 @@ PYBIND11_MODULE(core, m) { ...@@ -185,6 +210,10 @@ PYBIND11_MODULE(core, m) {
py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC") py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
.def(py::init<>()) .def(py::init<>())
.def("register_backward_hooks",
[](imperative::OpBase &self, const py::object &callable) {
self.RegisterBackwardHooks(callable);
})
.def_property( .def_property(
"desc", [](const imperative::OpBase &self) { return self.op_desc_; }, "desc", [](const imperative::OpBase &self) { return self.op_desc_; },
[](imperative::OpBase &self, framework::OpDesc *op_desc) { [](imperative::OpBase &self, framework::OpDesc *op_desc) {
...@@ -193,6 +222,16 @@ PYBIND11_MODULE(core, m) { ...@@ -193,6 +222,16 @@ PYBIND11_MODULE(core, m) {
} }
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def_property("_trace_id",
[](const imperative::OpBase &self) {
pybind11::gil_scoped_release release;
return self.trace_id_;
},
[](imperative::OpBase &self, int trace_id) {
pybind11::gil_scoped_release release;
self.trace_id_ = trace_id;
},
py::return_value_policy::reference)
.def_property( .def_property(
"forward_id", "forward_id",
[](const imperative::OpBase &self) { return self.forward_id_; }, [](const imperative::OpBase &self) { return self.forward_id_; },
...@@ -405,11 +444,11 @@ PYBIND11_MODULE(core, m) { ...@@ -405,11 +444,11 @@ PYBIND11_MODULE(core, m) {
Set LoD of the LoDTensor according to recursive sequence length. Set LoD of the LoDTensor according to recursive sequence length.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that For example, if recursive_sequence_lengths=[[2, 3]], meaning that
there are two sequences with length 2 and 3 respectively, the there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]]. corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
Args: Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths. recursive_sequence_lengths (List[List[int]]): sequence lengths.
)DOC") )DOC")
.def("lod", .def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> { [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
...@@ -440,7 +479,7 @@ PYBIND11_MODULE(core, m) { ...@@ -440,7 +479,7 @@ PYBIND11_MODULE(core, m) {
Return the sequence length of the LoDTensor corresponding to LoD. Return the sequence length of the LoDTensor corresponding to LoD.
Returns: Returns:
out (List[List[int]): the sequence lengths. out (List[List[int]): the sequence lengths.
)DOC") )DOC")
.def("has_valid_recursive_sequence_lengths", .def("has_valid_recursive_sequence_lengths",
[](LoDTensor &self) -> bool { [](LoDTensor &self) -> bool {
...@@ -592,29 +631,29 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -592,29 +631,29 @@ All parameter, weight, gradient are variables in Paddle.
}, },
py::arg("name"), py::arg("name"),
R"DOC( R"DOC(
Find or create variable named :code:`name` in the current scope. Find or create variable named :code:`name` in the current scope.
If the variable named :code:`name` does not exist in the If the variable named :code:`name` does not exist in the
current scope, the variable would be created. Otherwise, current scope, the variable would be created. Otherwise,
return the existing variable. return the existing variable.
Args: Args:
name (str): the variable name. name (str): the variable name.
Returns: Returns:
out (core.Variable): the found or created variable. out (core.Variable): the found or created variable.
)DOC", )DOC",
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::arg("name"), .def("find_var", &Scope::FindVar, py::arg("name"),
R"DOC( R"DOC(
Find variable named :code:`name` in the current scope or Find variable named :code:`name` in the current scope or
its parent scope. Return None if not found. its parent scope. Return None if not found.
Args: Args:
name (str): the variable name. name (str): the variable name.
Returns: Returns:
out (core.Variable|None): the found variable or None. out (core.Variable|None): the found variable or None.
)DOC", )DOC",
py::return_value_policy::reference) py::return_value_policy::reference)
.def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
...@@ -638,7 +677,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -638,7 +677,7 @@ All parameter, weight, gradient are variables in Paddle.
}, },
R"DOC( R"DOC(
Create a new scope. Create a new scope.
Returns: Returns:
out (core._Scope): the created scope. out (core._Scope): the created scope.
)DOC", )DOC",
...@@ -849,6 +888,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -849,6 +888,7 @@ All parameter, weight, gradient are variables in Paddle.
[](bool init_p2p) { framework::InitDevices(init_p2p); }); [](bool init_p2p) { framework::InitDevices(init_p2p); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA); m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN);
m.def("is_compiled_with_brpc", IsCompiledWithBrpc); m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
m.def("is_compiled_with_dist", IsCompiledWithDIST); m.def("is_compiled_with_dist", IsCompiledWithDIST);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
因为 它太大了无法显示 source diff 。你可以改为 查看blob
...@@ -259,6 +259,7 @@ function check_style() { ...@@ -259,6 +259,7 @@ function check_style() {
eval "$(GIMME_GO_VERSION=1.8.3 gimme)" eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
fi fi
pip install cpplint
# set up go environment for running gometalinter # set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/ mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf ${PADDLE_ROOT} $GOPATH/src/github.com/PaddlePaddle/Paddle ln -sf ${PADDLE_ROOT} $GOPATH/src/github.com/PaddlePaddle/Paddle
...@@ -414,10 +415,11 @@ function assert_api_not_changed() { ...@@ -414,10 +415,11 @@ function assert_api_not_changed() {
source .env/bin/activate source .env/bin/activate
pip install ${PADDLE_ROOT}/build/python/dist/*whl pip install ${PADDLE_ROOT}/build/python/dist/*whl
python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid,paddle.reader > new.spec python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid,paddle.reader > new.spec
if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then
# Use sed to make python2 and python3 sepc keeps the same # Use sed to make python2 and python3 sepc keeps the same
sed -i 's/arg0: str/arg0: unicode/g' new.spec sed -i 's/arg0: str/arg0: unicode/g' new.spec
sed -i "s/\(.*Transpiler.*\).__init__ ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec sed -i "s/\(.*Transpiler.*\).__init__ (ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec
fi fi
# ComposeNotAligned has significant difference between py2 and py3 # ComposeNotAligned has significant difference between py2 and py3
sed -i '/.*ComposeNotAligned.*/d' new.spec sed -i '/.*ComposeNotAligned.*/d' new.spec
...@@ -431,8 +433,8 @@ function assert_api_spec_approvals() { ...@@ -431,8 +433,8 @@ function assert_api_spec_approvals() {
BRANCH="develop" BRANCH="develop"
fi fi
API_FILES=("cmake/external" API_FILES=("paddle/fluid/API.spec"
"paddle/fluid/API.spec" "python/paddle/fluid/parallel_executor.py"
"paddle/fluid/framework/operator.h" "paddle/fluid/framework/operator.h"
"paddle/fluid/framework/tensor.h" "paddle/fluid/framework/tensor.h"
"paddle/fluid/framework/lod_tensor.h" "paddle/fluid/framework/lod_tensor.h"
...@@ -451,12 +453,21 @@ function assert_api_spec_approvals() { ...@@ -451,12 +453,21 @@ function assert_api_spec_approvals() {
echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}" echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable. # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \ if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803` APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308`
else
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
fi
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}" echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then if [ "${APPROVALS}" == "FALSE" ]; then
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
echo "You must have panyx0718 and shanyi15 approval for the api change! ${API_FILE}"
else
echo "You must have panyx0718 approval for the api change! ${API_FILE}" echo "You must have panyx0718 approval for the api change! ${API_FILE}"
exit 1 fi
exit 1
fi fi
fi fi
done done
...@@ -471,19 +482,6 @@ function assert_api_spec_approvals() { ...@@ -471,19 +482,6 @@ function assert_api_spec_approvals() {
exit 1 exit 1
fi fi
fi fi
pip install ${PADDLE_ROOT}/build/opt/paddle/share/wheels/*.whl
CHECK_DOCK_MD5=`python ${PADDLE_ROOT}/tools/check_doc_approval.py`
if [ "True" != ${CHECK_DOCK_MD5} ]; then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have shanyi15 approval for the api doc change! "
exit 1
fi
echo ${CHECK_DOCK_MD5} >/root/.cache/doc_md5.txt
fi
} }
......
...@@ -125,14 +125,13 @@ def __bootstrap__(): ...@@ -125,14 +125,13 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads) os.environ['OMP_NUM_THREADS'] = str(num_threads)
sysstr = platform.system() sysstr = platform.system()
read_env_flags = [ read_env_flags = [
'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_mkldnn', 'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_ngraph',
'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
'eager_delete_tensor_gb', 'fast_eager_deletion_mode', 'fast_eager_deletion_mode', 'allocator_strategy',
'allocator_strategy', 'reader_queue_speed_test_mode', 'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir', 'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism',
'inner_op_parallelism', 'enable_parallel_graph', 'enable_parallel_graph', 'multiple_of_cupti_buffer_size'
'multiple_of_cupti_buffer_size'
] ]
if 'Darwin' not in sysstr: if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory') read_env_flags.append('use_pinned_memory')
...@@ -140,6 +139,9 @@ def __bootstrap__(): ...@@ -140,6 +139,9 @@ def __bootstrap__():
if os.name != 'nt': if os.name != 'nt':
read_env_flags.append('cpu_deterministic') read_env_flags.append('cpu_deterministic')
if core.is_compiled_with_mkldnn():
read_env_flags.append('use_mkldnn')
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
read_env_flags.append('rpc_server_profile_path') read_env_flags.append('rpc_server_profile_path')
......
...@@ -17,7 +17,6 @@ import os ...@@ -17,7 +17,6 @@ import os
import six import six
import sys import sys
from .. import compat as cpt from .. import compat as cpt
from . import framework
from . import core from . import core
from . import framework from . import framework
...@@ -36,6 +35,30 @@ def _place_obj(place): ...@@ -36,6 +35,30 @@ def _place_obj(place):
return p return p
def _is_pserver_mode(main_program):
main = main_program if main_program \
else default_main_program()
for op in main.global_block().ops:
if op.type in ["send", "recv"]:
return True
return False
def get_available_places(use_cuda):
if use_cuda:
gpus_env = os.getenv("FLAGS_selected_gpus")
if gpus_env:
gpus = [int(s) for s in gpus_env.split(",")]
else:
gpus = [i for i in six.moves.range(core.get_cuda_device_count())]
places = [core.CUDAPlace(i) for i in gpus]
else:
cpu_num = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
assert places, "no place for execution"
return places
class CompiledProgram(object): class CompiledProgram(object):
""" """
Compiles to Graph for execution. Compiles to Graph for execution.
...@@ -127,8 +150,7 @@ class CompiledProgram(object): ...@@ -127,8 +150,7 @@ class CompiledProgram(object):
self._exec_strategy = ExecutionStrategy() self._exec_strategy = ExecutionStrategy()
if self._build_strategy is None: if self._build_strategy is None:
self._build_strategy = BuildStrategy() self._build_strategy = BuildStrategy()
self._build_strategy.is_distribution = framework.is_pserver_mode( self._build_strategy.is_distribution = _is_pserver_mode(self._program)
self._program)
return self return self
def with_inference_optimize(self, config): def with_inference_optimize(self, config):
...@@ -153,9 +175,9 @@ class CompiledProgram(object): ...@@ -153,9 +175,9 @@ class CompiledProgram(object):
def _with_distributed(self): def _with_distributed(self):
raise NotImplementedError() raise NotImplementedError()
def _compile_data_parallel(self): def _compile_data_parallel(self, use_cuda=False, scope=None):
if self._share_vars_from: if self._share_vars_from:
if self._scope: if scope:
sys.stderr.write("share_vars_from is set, scope is ignored.\n") sys.stderr.write("share_vars_from is set, scope is ignored.\n")
if not self._share_vars_from._is_data_parallel: if not self._share_vars_from._is_data_parallel:
raise ValueError("share_vars_from is not data parallel. Cannot " raise ValueError("share_vars_from is not data parallel. Cannot "
...@@ -166,23 +188,11 @@ class CompiledProgram(object): ...@@ -166,23 +188,11 @@ class CompiledProgram(object):
"var to share.") "var to share.")
self._local_scopes = self._share_vars_from._executor.local_scopes() self._local_scopes = self._share_vars_from._executor.local_scopes()
else: else:
assert scope is not None, ""
self._local_scopes = [] self._local_scopes = []
self._exec_strategy.use_cuda = isinstance(self._place, core.CUDAPlace) self._exec_strategy.use_cuda = use_cuda
if self._exec_strategy.use_cuda: self._places = get_available_places(self._exec_strategy.use_cuda)
gpus_env = os.getenv("FLAGS_selected_gpus")
if gpus_env:
gpus = [int(s) for s in gpus_env.split(",")]
else:
gpus = [
i for i in six.moves.range(core.get_cuda_device_count())
]
self._places = [core.CUDAPlace(i) for i in gpus]
else:
cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
assert self._places, "no place for execution"
if self._exec_strategy.num_threads == 0: if self._exec_strategy.num_threads == 0:
if self._exec_strategy.use_cuda: if self._exec_strategy.use_cuda:
...@@ -197,9 +207,11 @@ class CompiledProgram(object): ...@@ -197,9 +207,11 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize # FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass. # if turn on python memory optimize, turn off the inplace_pass.
if self._build_strategy.memory_optimize is None: if self._build_strategy.memory_optimize is None:
self._build_strategy.memory_optimize = False if self._program and self._program._is_mem_optimized else True self._build_strategy.memory_optimize = False \
if self._program and self._program._is_mem_optimized else True
if self._build_strategy.enable_inplace is None: if self._build_strategy.enable_inplace is None:
self._build_strategy.enable_inplace = False if self._program and self._program._is_mem_optimized else True self._build_strategy.enable_inplace = False \
if self._program and self._program._is_mem_optimized else True
# TODO(wuyi): trainer endpoings should be passed in through # TODO(wuyi): trainer endpoings should be passed in through
# build_strategy, not program.xxx. # build_strategy, not program.xxx.
...@@ -221,12 +233,12 @@ class CompiledProgram(object): ...@@ -221,12 +233,12 @@ class CompiledProgram(object):
places = list(map(_place_obj, self._places)) places = list(map(_place_obj, self._places))
return core.ParallelExecutor( return core.ParallelExecutor(places,
places, set(self._persistable_vars),
set(self._persistable_vars), cpt.to_text(self._loss_name)
cpt.to_text(self._loss_name) if self._loss_name else six.u(''), scope,
if self._loss_name else six.u(''), self._scope, self._local_scopes, self._local_scopes, self._exec_strategy,
self._exec_strategy, self._build_strategy, self._graph) self._build_strategy, self._graph)
def _compile_inference(self): def _compile_inference(self):
return core.create_paddle_predictor(self._infer_config) return core.create_paddle_predictor(self._infer_config)
...@@ -253,7 +265,9 @@ class CompiledProgram(object): ...@@ -253,7 +265,9 @@ class CompiledProgram(object):
self._scope = scope self._scope = scope
self._place = place self._place = place
if self._is_data_parallel: if self._is_data_parallel:
self._executor = self._compile_data_parallel() self._executor = self._compile_data_parallel(
use_cuda=isinstance(self._place, core.CUDAPlace),
scope=self._scope)
elif self._is_inference: elif self._is_inference:
self._executor = self._compile_inference() self._executor = self._compile_inference()
else: else:
......
...@@ -17,7 +17,9 @@ import numpy as np ...@@ -17,7 +17,9 @@ import numpy as np
import six import six
from ..... import compat as cpt from ..... import compat as cpt
from .... import core from .... import core
from .... import Executor
from ....framework import IrGraph from ....framework import IrGraph
from ....framework import IrNode
from ....framework import Program from ....framework import Program
from ....initializer import Constant from ....initializer import Constant
from .... import unique_name from .... import unique_name
...@@ -31,7 +33,7 @@ __all__ = [ ...@@ -31,7 +33,7 @@ __all__ = [
class QuantizationTransformPass(object): class QuantizationTransformPass(object):
def __init__(self, def __init__(self,
scope=None, scope=None,
program_exe=None, place=None,
weight_bits=8, weight_bits=8,
activation_bits=8, activation_bits=8,
activation_quantize_type='abs_max', activation_quantize_type='abs_max',
...@@ -45,7 +47,7 @@ class QuantizationTransformPass(object): ...@@ -45,7 +47,7 @@ class QuantizationTransformPass(object):
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to type, this pass will create some new parameters. The scope is used to
initialize these new parameters. initialize these new parameters.
program_exe(fluid.Executor): program_exe is used to initialize new place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new
parameters described above. parameters described above.
weight_bits (int): quantization bit number for weights, weight_bits (int): quantization bit number for weights,
the bias is not quantized. the bias is not quantized.
...@@ -71,13 +73,13 @@ class QuantizationTransformPass(object): ...@@ -71,13 +73,13 @@ class QuantizationTransformPass(object):
from paddle.fluid import core from paddle.fluid import core
graph = IrGraph(core.Graph(program.desc), for_test=False) graph = IrGraph(core.Graph(program.desc), for_test=False)
exe = fluid.Executor(fluid.CPUPlace()) place = fluid.CPUPlace()
transform_pass = QuantizationTransformPass(fluid.global_scope(), transform_pass = QuantizationTransformPass(fluid.global_scope(),
exe) place)
transform_pass.apply(graph) transform_pass.apply(graph)
""" """
self._scope = scope self._scope = scope
self._program_exe = program_exe self._place = place
self._weight_bits = weight_bits self._weight_bits = weight_bits
self._activation_bits = activation_bits self._activation_bits = activation_bits
...@@ -118,7 +120,7 @@ class QuantizationTransformPass(object): ...@@ -118,7 +120,7 @@ class QuantizationTransformPass(object):
self._is_test = graph.is_test() self._is_test = graph.is_test()
# marked the variable which has been dequantized. # marked the variable which has been dequantized.
dequantized_vars = collections.OrderedDict() dequantized_vars = collections.OrderedDict()
persistable_vars = [p.name() for p in graph.all_persistable_vars()] persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
def _transform_forward(graph, op): def _transform_forward(graph, op):
for var_node in op.inputs: for var_node in op.inputs:
...@@ -149,7 +151,7 @@ class QuantizationTransformPass(object): ...@@ -149,7 +151,7 @@ class QuantizationTransformPass(object):
if not self._is_test: if not self._is_test:
self._create_global_step(graph) self._create_global_step(graph)
ops = graph.all_ops() ops = graph.all_op_nodes()
# The process of _transform_forward and _transform_backward is needed in two for loops. # The process of _transform_forward and _transform_backward is needed in two for loops.
# The loop for transforming the forward graph: # The loop for transforming the forward graph:
for op in ops: for op in ops:
...@@ -163,8 +165,8 @@ class QuantizationTransformPass(object): ...@@ -163,8 +165,8 @@ class QuantizationTransformPass(object):
if len(self._need_initialized) > 0: if len(self._need_initialized) > 0:
assert self._scope is not None, \ assert self._scope is not None, \
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.' 'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
assert self._program_exe is not None, \ assert self._place is not None, \
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.' 'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
init_program = Program() init_program = Program()
for var_desc, initializer in six.iteritems(self._need_initialized): for var_desc, initializer in six.iteritems(self._need_initialized):
var = init_program.global_block().create_var( var = init_program.global_block().create_var(
...@@ -175,7 +177,8 @@ class QuantizationTransformPass(object): ...@@ -175,7 +177,8 @@ class QuantizationTransformPass(object):
lod_level=var_desc.lod_level(), lod_level=var_desc.lod_level(),
persistable=var_desc.persistable()) persistable=var_desc.persistable())
initializer(var, init_program.global_block()) initializer(var, init_program.global_block())
self._program_exe.run(program=init_program, scope=self._scope) exe = Executor(self._place)
exe.run(program=init_program, scope=self._scope)
return graph return graph
...@@ -183,11 +186,11 @@ class QuantizationTransformPass(object): ...@@ -183,11 +186,11 @@ class QuantizationTransformPass(object):
if self._weight_quantize_type == 'range_abs_max' or \ if self._weight_quantize_type == 'range_abs_max' or \
self._activation_quantize_type == 'range_abs_max': self._activation_quantize_type == 'range_abs_max':
counter_name = cpt.to_text('@STEP_COUNTER@') counter_name = cpt.to_text('@STEP_COUNTER@')
for node in graph.all_vars(): for node in graph.all_var_nodes():
if node.name() == counter_name: if node.name() == counter_name:
self._global_step = node self._global_step = node
if self._global_step is None: if self._global_step is None:
global_step_in = graph.create_param_node( global_step_in = graph.create_persistable_node(
name=counter_name, name=counter_name,
var_type=core.VarDesc.VarType.LOD_TENSOR, var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1], shape=[1],
...@@ -228,14 +231,14 @@ class QuantizationTransformPass(object): ...@@ -228,14 +231,14 @@ class QuantizationTransformPass(object):
quant_var_node = graph.create_var_node( quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()), name=self._quantized_var_name(var_node.name()),
var_type=var_node.var().type(), var_type=var_node.type(),
shape=var_node.var().shape(), shape=var_node.shape(),
var_dtype=var_node.var().dtype()) var_dtype=var_node.dtype())
scale_var_node = graph.create_var_node( scale_var_node = graph.create_var_node(
name=self._quantized_scale_name(var_node.name()), name=self._quantized_scale_name(var_node.name()),
var_type=var_node.var().type(), var_type=var_node.type(),
shape=var_node.var().shape(), shape=var_node.shape(),
var_dtype=var_node.var().dtype()) var_dtype=var_node.dtype())
quant_op_node = graph.create_op_node( quant_op_node = graph.create_op_node(
op_type='fake_quantize_abs_max', op_type='fake_quantize_abs_max',
attrs={ attrs={
...@@ -258,15 +261,15 @@ class QuantizationTransformPass(object): ...@@ -258,15 +261,15 @@ class QuantizationTransformPass(object):
quant_var_node = graph.create_var_node( quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()), name=self._quantized_var_name(var_node.name()),
var_type=var_node.var().type(), var_type=var_node.type(),
shape=var_node.var().shape(), shape=var_node.shape(),
var_dtype=var_node.var().dtype()) var_dtype=var_node.dtype())
scale_in_node = graph.create_param_node( scale_in_node = graph.create_persistable_node(
name=self._quantized_scale_name(var_node.name()), name=self._quantized_scale_name(var_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR, var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1], shape=[1],
var_dtype=var_node.var().dtype()) var_dtype=var_node.dtype())
self._need_initialized[scale_in_node.var()] = Constant(value=0.001) self._need_initialized[scale_in_node.var()] = Constant(value=0.001)
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var()) scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
...@@ -275,11 +278,11 @@ class QuantizationTransformPass(object): ...@@ -275,11 +278,11 @@ class QuantizationTransformPass(object):
if not self._is_test: if not self._is_test:
# The name of scales_var_node maybe 'scales_0', 'scales_1', etc. # The name of scales_var_node maybe 'scales_0', 'scales_1', etc.
scales_node = graph.create_param_node( scales_node = graph.create_persistable_node(
name=unique_name.generate('scales'), name=unique_name.generate('scales'),
var_type=core.VarDesc.VarType.LOD_TENSOR, var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[self._window_size], shape=[self._window_size],
var_dtype=var_node.var().dtype()) var_dtype=var_node.dtype())
self._need_initialized[scales_node.var()] = Constant(value=0) self._need_initialized[scales_node.var()] = Constant(value=0)
inputs['Iter'] = self._global_step inputs['Iter'] = self._global_step
outputs['OutScales'] = scales_node outputs['OutScales'] = scales_node
...@@ -314,9 +317,9 @@ class QuantizationTransformPass(object): ...@@ -314,9 +317,9 @@ class QuantizationTransformPass(object):
dequant_var_node = graph.create_var_node( dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(var_node.name()), name=self._dequantized_var_name(var_node.name()),
var_type=var_node.var().type(), var_type=var_node.type(),
shape=var_node.var().shape(), shape=var_node.shape(),
var_dtype=var_node.var().dtype()) var_dtype=var_node.dtype())
max_range = (1 << (quant_bits - 1)) - 1 max_range = (1 << (quant_bits - 1)) - 1
dequant_op_node = graph.create_op_node( dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs', op_type='fake_dequantize_max_abs',
...@@ -400,22 +403,22 @@ class QuantizationFreezePass(object): ...@@ -400,22 +403,22 @@ class QuantizationFreezePass(object):
Args: Args:
graph(IrGraph): the applied graph. graph(IrGraph): the applied graph.
""" """
persistable_vars = [p.name() for p in graph.all_persistable_vars()] persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
ops = graph.all_ops() ops = graph.all_op_nodes()
for op_node in ops: for op_node in ops:
op_name = op_node.name() op_name = op_node.name()
if op_name in self._fake_quant_op_names: if op_name in self._fake_quant_op_names:
input_arg_name = op_node.op().input('X')[0] input_arg_name = op_node.input('X')[0]
if input_arg_name in persistable_vars: if input_arg_name in persistable_vars:
if self._weight_quantize_type == 'abs_max': if self._weight_quantize_type == 'abs_max':
param = self._load_var(input_arg_name) param = self._load_var(input_arg_name)
scale_v = np.max(np.abs(param)) scale_v = np.max(np.abs(param))
else: else:
scale_v = self._load_var(op_node.op().output('OutScale') scale_v = self._load_var(
[0])[0] op_node.output('OutScale')[0])[0]
self._var_scale_map[input_arg_name] = scale_v self._var_scale_map[input_arg_name] = scale_v
else: else:
scale_v = graph.var_node(op_node.op().output('OutScale')[0]) scale_v = graph.var_node(op_node.output('OutScale')[0])
self._var_scale_map[input_arg_name] = scale_v self._var_scale_map[input_arg_name] = scale_v
if input_arg_name in persistable_vars: if input_arg_name in persistable_vars:
self._remove_fake_quant_and_dequant_op(graph, op_node) self._remove_fake_quant_and_dequant_op(graph, op_node)
...@@ -425,13 +428,13 @@ class QuantizationFreezePass(object): ...@@ -425,13 +428,13 @@ class QuantizationFreezePass(object):
self._weight_bits) self._weight_bits)
self._restore_var(input_arg_name, quantized_param_v) self._restore_var(input_arg_name, quantized_param_v)
ops = graph.all_ops() ops = graph.all_op_nodes()
for op_node in ops: for op_node in ops:
op_name = op_node.name() op_name = op_node.name()
if op_name in self._fake_dequant_op_names: if op_name in self._fake_dequant_op_names:
self._remove_fake_quant_and_dequant_op(graph, op_node) self._remove_fake_quant_and_dequant_op(graph, op_node)
ops = graph.all_ops() ops = graph.all_op_nodes()
for op_node in ops: for op_node in ops:
op_name = op_node.name() op_name = op_node.name()
if op_name in self._quantizable_ops: if op_name in self._quantizable_ops:
...@@ -451,8 +454,8 @@ class QuantizationFreezePass(object): ...@@ -451,8 +454,8 @@ class QuantizationFreezePass(object):
return graph return graph
def _remove_fake_quant_and_dequant_op(self, graph, op_node): def _remove_fake_quant_and_dequant_op(self, graph, op_node):
k = op_node.op().output('Out')[0] k = op_node.output('Out')[0]
v = op_node.op().input('X')[0] v = op_node.input('X')[0]
if v not in self._op_input_rename_map: if v not in self._op_input_rename_map:
self._op_input_rename_map[k] = v self._op_input_rename_map[k] = v
else: else:
...@@ -462,7 +465,7 @@ class QuantizationFreezePass(object): ...@@ -462,7 +465,7 @@ class QuantizationFreezePass(object):
def _insert_post_dequant_op(self, graph, op_node): def _insert_post_dequant_op(self, graph, op_node):
max_range = None max_range = None
scale_var_node = None scale_var_node = None
persistable_vars = [p.name() for p in graph.all_persistable_vars()] persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
for var_node in op_node.inputs: for var_node in op_node.inputs:
name = var_node.name() name = var_node.name()
if name in self._op_input_rename_map: if name in self._op_input_rename_map:
...@@ -480,7 +483,7 @@ class QuantizationFreezePass(object): ...@@ -480,7 +483,7 @@ class QuantizationFreezePass(object):
original_var_name) original_var_name)
max_range = param_range * act_range / scale_v max_range = param_range * act_range / scale_v
else: else:
assert isinstance(scale_v, core.Node) assert isinstance(scale_v, IrNode)
scale_var_node = self._var_scale_map[original_var_name] scale_var_node = self._var_scale_map[original_var_name]
if len(op_node.outputs) != 1: if len(op_node.outputs) != 1:
...@@ -490,9 +493,9 @@ class QuantizationFreezePass(object): ...@@ -490,9 +493,9 @@ class QuantizationFreezePass(object):
output_var_node = op_node.outputs[0] output_var_node = op_node.outputs[0]
dequant_var_node = graph.create_var_node( dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(output_var_node.name()), name=self._dequantized_var_name(output_var_node.name()),
var_type=output_var_node.var().type(), var_type=output_var_node.type(),
shape=output_var_node.var().shape(), shape=output_var_node.shape(),
var_dtype=output_var_node.var().dtype()) var_dtype=output_var_node.dtype())
dequant_op_node = graph.create_op_node( dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs', op_type='fake_dequantize_max_abs',
attrs={ attrs={
...@@ -517,14 +520,19 @@ class QuantizationFreezePass(object): ...@@ -517,14 +520,19 @@ class QuantizationFreezePass(object):
def _remove_unused_var_nodes(self, graph): def _remove_unused_var_nodes(self, graph):
all_used_vars = set() all_used_vars = set()
ops = graph.all_ops() ops = graph.all_op_nodes()
for op_node in ops: for op_node in ops:
for input_node in op_node.inputs: for input_node in op_node.inputs:
all_used_vars.add(input_node) all_used_vars.add(input_node)
for output_node in op_node.outputs: for output_node in op_node.outputs:
all_used_vars.add(output_node) all_used_vars.add(output_node)
all_unused_vars = graph.all_vars() - all_used_vars all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars) graph.safe_remove_nodes(all_unused_vars)
def _original_var_name(self, var_name): def _original_var_name(self, var_name):
...@@ -583,8 +591,8 @@ class ConvertToInt8Pass(object): ...@@ -583,8 +591,8 @@ class ConvertToInt8Pass(object):
Args: Args:
graph(IrGraph): the applied graph. graph(IrGraph): the applied graph.
""" """
persistable_vars = [p.name() for p in graph.all_persistable_vars()] persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
ops = graph.all_ops() ops = graph.all_op_nodes()
input_map = {} input_map = {}
for op_node in ops: for op_node in ops:
op_name = op_node.name() op_name = op_node.name()
...@@ -605,10 +613,10 @@ class ConvertToInt8Pass(object): ...@@ -605,10 +613,10 @@ class ConvertToInt8Pass(object):
def _convert_to_int8(self, graph, var_node): def _convert_to_int8(self, graph, var_node):
int8_var_node_name = var_node.name() + ".int8" int8_var_node_name = var_node.name() + ".int8"
int8_var_node = graph.create_param_node( int8_var_node = graph.create_persistable_node(
name=cpt.to_text(int8_var_node_name), name=cpt.to_text(int8_var_node_name),
var_type=var_node.var().type(), var_type=var_node.type(),
shape=var_node.var().shape(), shape=var_node.shape(),
var_dtype=core.VarDesc.VarType.INT8) var_dtype=core.VarDesc.VarType.INT8)
array = self._load_var(var_node.name()) array = self._load_var(var_node.name())
self._scope.var(int8_var_node_name) self._scope.var(int8_var_node_name)
...@@ -624,14 +632,19 @@ class ConvertToInt8Pass(object): ...@@ -624,14 +632,19 @@ class ConvertToInt8Pass(object):
def _remove_unused_var_nodes(self, graph): def _remove_unused_var_nodes(self, graph):
all_used_vars = set() all_used_vars = set()
ops = graph.all_ops() ops = graph.all_op_nodes()
for op_node in ops: for op_node in ops:
for input_node in op_node.inputs: for input_node in op_node.inputs:
all_used_vars.add(input_node) all_used_vars.add(input_node)
for output_node in op_node.outputs: for output_node in op_node.outputs:
all_used_vars.add(output_node) all_used_vars.add(output_node)
all_unused_vars = graph.all_vars() - all_used_vars all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars) graph.safe_remove_nodes(all_unused_vars)
...@@ -655,11 +668,11 @@ class TransformForMobilePass(object): ...@@ -655,11 +668,11 @@ class TransformForMobilePass(object):
Args: Args:
graph(IrGraph): the graph will be transformed. graph(IrGraph): the graph will be transformed.
""" """
ops = graph.all_ops() ops = graph.all_op_nodes()
for op_node in ops: for op_node in ops:
name = op_node.name() name = op_node.name()
if name in self._fake_quant_op_names: if name in self._fake_quant_op_names:
op_node.op().set_type('quantize') op_node.set_type('quantize')
quant_node = graph.create_op_node_from_desc(op_node.op()) quant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs: for input_node in op_node.inputs:
graph.link_to(input_node, quant_node) graph.link_to(input_node, quant_node)
...@@ -667,7 +680,7 @@ class TransformForMobilePass(object): ...@@ -667,7 +680,7 @@ class TransformForMobilePass(object):
graph.link_to(quant_node, output_node) graph.link_to(quant_node, output_node)
graph.safe_remove_nodes(op_node) graph.safe_remove_nodes(op_node)
if name in self._fake_dequant_op_names: if name in self._fake_dequant_op_names:
op_node.op().set_type('dequantize') op_node.set_type('dequantize')
dequant_node = graph.create_op_node_from_desc(op_node.op()) dequant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs: for input_node in op_node.inputs:
graph.link_to(input_node, dequant_node) graph.link_to(input_node, dequant_node)
......
...@@ -61,16 +61,16 @@ class TestGraph(unittest.TestCase): ...@@ -61,16 +61,16 @@ class TestGraph(unittest.TestCase):
opt.minimize(loss) opt.minimize(loss)
graph = IrGraph(core.Graph(main.desc), for_test=False) graph = IrGraph(core.Graph(main.desc), for_test=False)
marked_nodes = set() marked_nodes = set()
for op in graph.all_ops(): for op in graph.all_op_nodes():
if op.name().find('conv2d') > -1: if op.name().find('conv2d') > -1:
marked_nodes.add(op) marked_nodes.add(op)
graph.draw('.', 'residual', marked_nodes) graph.draw('.', 'residual', marked_nodes)
self.assertFalse(graph.has_circle()) self.assertFalse(graph.has_circle())
self.assertEqual(graph.graph_num(), 1) self.assertEqual(graph.graph_num(), 1)
nodes = graph.topology_sort() nodes = graph.topology_sort()
self.assertEqual(len(nodes), len(graph.all_ops())) self.assertEqual(len(nodes), len(graph.all_op_nodes()))
nodes_map = graph.build_adjacency_list() nodes_map = graph.build_adjacency_list()
self.assertEqual(len(nodes_map), len(graph.all_ops())) self.assertEqual(len(nodes_map), len(graph.all_op_nodes()))
nodes_num = len(graph.all_nodes()) nodes_num = len(graph.all_nodes())
graph.safe_remove_nodes(marked_nodes) graph.safe_remove_nodes(marked_nodes)
self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes)) self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
......
...@@ -130,15 +130,16 @@ class TestQuantizationTransformPass(unittest.TestCase): ...@@ -130,15 +130,16 @@ class TestQuantizationTransformPass(unittest.TestCase):
loss = linear_fc(3) loss = linear_fc(3)
opt = fluid.optimizer.Adam(learning_rate=0.001) opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss) opt.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace()) place = fluid.CPUPlace()
exe = fluid.Executor(place)
graph = IrGraph(core.Graph(main.desc), for_test=False) graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass( transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(), scope=fluid.global_scope(),
program_exe=exe, place=place,
activation_quantize_type=quant_type) activation_quantize_type=quant_type)
transform_pass.apply(graph) transform_pass.apply(graph)
marked_nodes = set() marked_nodes = set()
for op in graph.all_ops(): for op in graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes) graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes)
...@@ -146,7 +147,7 @@ class TestQuantizationTransformPass(unittest.TestCase): ...@@ -146,7 +147,7 @@ class TestQuantizationTransformPass(unittest.TestCase):
self.check_program(transform_pass, program) self.check_program(transform_pass, program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False) val_graph = IrGraph(core.Graph(program.desc), for_test=False)
val_marked_nodes = set() val_marked_nodes = set()
for op in val_graph.all_ops(): for op in val_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
val_marked_nodes.add(op) val_marked_nodes.add(op)
val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes) val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes)
...@@ -166,15 +167,16 @@ class TestQuantizationTransformPass(unittest.TestCase): ...@@ -166,15 +167,16 @@ class TestQuantizationTransformPass(unittest.TestCase):
loss = residual_block(2) loss = residual_block(2)
opt = fluid.optimizer.Adam(learning_rate=0.001) opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss) opt.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace()) place = fluid.CPUPlace()
exe = fluid.Executor(place)
graph = IrGraph(core.Graph(main.desc), for_test=False) graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass( transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(), scope=fluid.global_scope(),
program_exe=exe, place=place,
activation_quantize_type=quant_type) activation_quantize_type=quant_type)
transform_pass.apply(graph) transform_pass.apply(graph)
marked_nodes = set() marked_nodes = set()
for op in graph.all_ops(): for op in graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes) graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
...@@ -182,7 +184,7 @@ class TestQuantizationTransformPass(unittest.TestCase): ...@@ -182,7 +184,7 @@ class TestQuantizationTransformPass(unittest.TestCase):
self.check_program(transform_pass, program) self.check_program(transform_pass, program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False) val_graph = IrGraph(core.Graph(program.desc), for_test=False)
val_marked_nodes = set() val_marked_nodes = set()
for op in val_graph.all_ops(): for op in val_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
val_marked_nodes.add(op) val_marked_nodes.add(op)
val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes) val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
...@@ -231,17 +233,17 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -231,17 +233,17 @@ class TestQuantizationFreezePass(unittest.TestCase):
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
exe.run(startup) exe.run(startup)
transform_pass = QuantizationTransformPass( transform_pass = QuantizationTransformPass(
scope=scope, program_exe=exe, activation_quantize_type=quant_type) scope=scope, place=place, activation_quantize_type=quant_type)
transform_pass.apply(main_graph) transform_pass.apply(main_graph)
transform_pass.apply(test_graph) transform_pass.apply(test_graph)
dev_name = '_gpu_' if use_cuda else '_cpu_' dev_name = '_gpu_' if use_cuda else '_cpu_'
marked_nodes = set() marked_nodes = set()
for op in main_graph.all_ops(): for op in main_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes) main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
marked_nodes = set() marked_nodes = set()
for op in test_graph.all_ops(): for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes) test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
...@@ -251,11 +253,6 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -251,11 +253,6 @@ class TestQuantizationFreezePass(unittest.TestCase):
iters = 5 iters = 5
batch_size = 8 batch_size = 8
#train_exe = fluid.ParallelExecutor(
# main_program=quantized_main_program,
# use_cuda=bool(use_cuda),
# loss_name=loss.name,
# scope=scope)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500), paddle.dataset.mnist.train(), buf_size=500),
...@@ -269,9 +266,7 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -269,9 +266,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
loss_v = exe.run(program=quantized_main_program, loss_v = exe.run(program=quantized_main_program,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[loss]) fetch_list=[loss])
#loss_v = train_exe.run(feed=feeder.feed(data), print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
# fetch_list=[loss.name])
#print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
test_data = next(test_reader()) test_data = next(test_reader())
with fluid.program_guard(quantized_test_program): with fluid.program_guard(quantized_test_program):
...@@ -287,7 +282,7 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -287,7 +282,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
freeze_pass = QuantizationFreezePass(scope=scope, place=place) freeze_pass = QuantizationFreezePass(scope=scope, place=place)
freeze_pass.apply(test_graph) freeze_pass.apply(test_graph)
marked_nodes = set() marked_nodes = set()
for op in test_graph.all_ops(): for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
test_graph.draw('.', 'test_freeze' + dev_name + quant_type, test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
...@@ -299,21 +294,21 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -299,21 +294,21 @@ class TestQuantizationFreezePass(unittest.TestCase):
feed=feeder.feed(test_data), feed=feeder.feed(test_data),
fetch_list=[loss]) fetch_list=[loss])
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
#print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1)) print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
#print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2)) print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
# Maybe failed, this is due to the calculation precision # Maybe failed, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type, print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze))) np.sum(w_freeze)))
#print('{}: {}'.format('w_quant' + dev_name + quant_type, print('{}: {}'.format('w_quant' + dev_name + quant_type,
# np.sum(w_quant))) np.sum(w_quant)))
# Convert parameter to 8-bit. # Convert parameter to 8-bit.
convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place) convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
convert_int8_pass.apply(test_graph) convert_int8_pass.apply(test_graph)
marked_nodes = set() marked_nodes = set()
for op in test_graph.all_ops(): for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
test_graph.draw('.', 'test_int8' + dev_name + quant_type, marked_nodes) test_graph.draw('.', 'test_int8' + dev_name + quant_type, marked_nodes)
...@@ -330,14 +325,14 @@ class TestQuantizationFreezePass(unittest.TestCase): ...@@ -330,14 +325,14 @@ class TestQuantizationFreezePass(unittest.TestCase):
w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor()) w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
self.assertEqual(w_8bit.dtype, np.int8) self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
#print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit))) print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type, print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze))) np.sum(w_freeze)))
mobile_pass = TransformForMobilePass() mobile_pass = TransformForMobilePass()
mobile_pass.apply(test_graph) mobile_pass.apply(test_graph)
marked_nodes = set() marked_nodes = set()
for op in test_graph.all_ops(): for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1: if op.name().find('quantize') > -1:
marked_nodes.add(op) marked_nodes.add(op)
test_graph.draw('.', 'test_mobile' + dev_name + quant_type, test_graph.draw('.', 'test_mobile' + dev_name + quant_type,
......
...@@ -261,45 +261,42 @@ def _as_lodtensor(data, place): ...@@ -261,45 +261,42 @@ def _as_lodtensor(data, place):
class Executor(object): class Executor(object):
""" """
An Executor in Python, only support the single-GPU running. For multi-cards, please refer to An Executor in Python, supports single/multiple-GPU running, and single/multiple-CPU running.
ParallelExecutor. Python executor takes a program, adds feed operators and fetch operators to this program according
Python executor takes a program, add feed operators and fetch operators to this program according
to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides
the variables(or names) that user want to get after program run. Note: the executor will run all the variables(or names) that user wants to get after program runs. Note: the executor will run all
operators in the program but not only the operators dependent by the fetch_list. operators in the program but not only the operators dependent by the fetch_list.
It store the global variables into the global scope, and create a local scope for the temporary It stores the global variables into the global scope, and creates a local scope for the temporary
variables. The local scope contents will be discarded after every minibatch forward/backward finished. variables. The contents in local scope may be discarded after every minibatch forward/backward
But the global scope variables will be persistent through different runs. finished. But the global scope variables will be persistent through different runs.
All of ops in program will be running in sequence.
Example: Example:
.. code-block:: python
# First create the Executor. .. code-block:: python
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) # First create the Executor.
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
# Run the startup program once and only once. exe = fluid.Executor(place)
# Not need to optimize/compile the startup program.
exe.run(fluid.default_startup_program()) # Run the startup program once and only once.
# Not need to optimize/compile the startup program.
# Run the main program directly without compile. exe.run(fluid.default_startup_program())
loss, = exe.run(fluid.default_main_program(),
feed=feed_dict, # Run the main program directly without compile.
fetch_list=[loss.name]) loss, = exe.run(fluid.default_main_program(),
# Or, compiled the program and run. See `CompiledProgram` for more detail. feed=feed_dict,
compiled_prog = compiler.CompiledProgram( fetch_list=[loss.name])
fluid.default_main_program()).with_data_parallel( # Or, compiled the program and run. See `CompiledProgram` for more detail.
loss_name=loss.name) compiled_prog = compiler.CompiledProgram(
loss, = exe.run(compiled_prog, fluid.default_main_program()).with_data_parallel(
feed=feed_dict, loss_name=loss.name)
fetch_list=[loss.name]) loss, = exe.run(compiled_prog,
feed=feed_dict,
fetch_list=[loss.name])
Args: Args:
place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device
Note: For debugging complicated network in parallel-GPUs, you can test it on the executor.
They has the exactly same arguments, and expected the same results.
""" """
def __init__(self, place): def __init__(self, place):
...@@ -382,6 +379,12 @@ class Executor(object): ...@@ -382,6 +379,12 @@ class Executor(object):
] ]
return outs return outs
'''
TODO(typhoonzero): Define "no longer use" meaning? Can user create
a new Executor for the same program and run?
TODO(panyx0718): Why ParallelExecutor doesn't have close?
'''
def close(self): def close(self):
""" """
Close this executor. Close this executor.
...@@ -389,9 +392,6 @@ class Executor(object): ...@@ -389,9 +392,6 @@ class Executor(object):
You can no longer use this executor after calling this method. You can no longer use this executor after calling this method.
For the distributed training, this method would free the resource on PServers related to For the distributed training, this method would free the resource on PServers related to
the current Trainer. the current Trainer.
TODO(typhoonzero): Define "no longer use" meaning? Can user create
a new Executor for the same program and run?
TODO(panyx0718): Why ParallelExecutor doesn't have close?
Example: Example:
>>> cpu = core.CPUPlace() >>> cpu = core.CPUPlace()
......
...@@ -87,15 +87,6 @@ def _current_expected_place(): ...@@ -87,15 +87,6 @@ def _current_expected_place():
return _imperative_current_expected_place_ return _imperative_current_expected_place_
def is_pserver_mode(main_program):
main = main_program if main_program \
else default_main_program()
for op in main.global_block().ops:
if op.type in ["send", "recv"]:
return True
return False
class NameScope(object): class NameScope(object):
def __init__(self, name="", parent=None): def __init__(self, name="", parent=None):
self._children = dict() self._children = dict()
...@@ -393,6 +384,9 @@ class Variable(object): ...@@ -393,6 +384,9 @@ class Variable(object):
if not self._ivar: if not self._ivar:
self._ivar = core.VarBase(stop_gradient) self._ivar = core.VarBase(stop_gradient)
self._ivar.desc = self.desc self._ivar.desc = self.desc
self._ivar.block = block.desc
self._ivar.name = name
self._ivar.persistable = persistable
if persistable: if persistable:
self.block.vars[name] = self self.block.vars[name] = self
else: else:
...@@ -721,7 +715,9 @@ class Operator(object): ...@@ -721,7 +715,9 @@ class Operator(object):
out_arg_names = [] out_arg_names = []
for arg in out_args: for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name)) out_arg_names.append(cpt.to_text(arg.name))
arg.op = self # TODO(minqiyang): could we remove variable's op in static mode?
if not _in_imperative_mode():
arg.op = self
self.desc.set_output(out_proto.name, out_arg_names) self.desc.set_output(out_proto.name, out_arg_names)
if op_attrs is not None: if op_attrs is not None:
...@@ -1200,15 +1196,6 @@ class Block(object): ...@@ -1200,15 +1196,6 @@ class Block(object):
else: else:
raise ValueError("Var {0} is not found recursively".format(name)) raise ValueError("Var {0} is not found recursively".format(name))
def _clear_block(self):
# TODO(minqiyang): move this to backward_hooks
self.desc._clear_block()
for name in self.vars.keys():
assert self.vars[name].persistable
del self.ops[:]
def all_parameters(self): def all_parameters(self):
return list(self.iter_parameters()) return list(self.iter_parameters())
...@@ -1345,26 +1332,13 @@ class Block(object): ...@@ -1345,26 +1332,13 @@ class Block(object):
# #
# TODO(minqiyang): add op stop_gradient support in static mode too. # TODO(minqiyang): add op stop_gradient support in static mode too.
# currently, we only support stop_gradient in imperative mode. # currently, we only support stop_gradient in imperative mode.
self._trace_op(op, kwargs.get("stop_gradient", False)) _imperative_tracer().trace_op(op,
self.ops.append(op) kwargs.get("stop_gradient", False))
else:
self.ops.append(op)
return op return op
def _trace_op(self, op, stop_gradient=False):
backward_refs = _imperative_tracer().trace(
op.iop, op.inputs, op.outputs, self.desc,
_imperative_current_expected_place_, stop_gradient)
# TODO(minqiyang): support backward_hooks to eager remove backward_refs
op.backward_refs = defaultdict(list)
for k, v in six.iteritems(op.inputs):
if k in backward_refs:
op.backward_refs[k] = op.inputs[k]
for k, v in six.iteritems(op.outputs):
if k in backward_refs:
op.backward_refs[k] = op.outputs[k]
def _insert_op(self, index, *args, **kwargs): def _insert_op(self, index, *args, **kwargs):
""" """
Insert a Operator according to the giving arguments. Insert a Operator according to the giving arguments.
...@@ -1417,9 +1391,11 @@ class Block(object): ...@@ -1417,9 +1391,11 @@ class Block(object):
inputs=kwargs.get("inputs", None), inputs=kwargs.get("inputs", None),
outputs=kwargs.get("outputs", None), outputs=kwargs.get("outputs", None),
attrs=kwargs.get("attrs", None)) attrs=kwargs.get("attrs", None))
self.ops.insert(0, op)
if _in_imperative_mode(): if _in_imperative_mode():
self._trace_op(op, kwargs.get("stop_gradient", False)) _imperative_tracer().trace_op(op,
kwargs.get("stop_gradient", False))
else:
self.ops.insert(0, op)
return op return op
def _sync_with_cpp(self): def _sync_with_cpp(self):
...@@ -1566,10 +1542,397 @@ class Block(object): ...@@ -1566,10 +1542,397 @@ class Block(object):
return ret_var return ret_var
class IrNode(object):
"""
Python IrNode. Beneath it is a core.Node, which is used for Ir Pass.
"""
def __init__(self, node):
"""
Construct an IrNode using core.Node.
Args:
node(core.Node): C++ Node.
"""
assert isinstance(node,
core.Node), 'node must be the instance of core.Node.'
self.node = node
def name(self):
"""
Return the node name.
Returns:
str: node name.
"""
return self.node.name()
def node_type(self):
"""
Return the node type.
Returns:
core.Node.Type: node type(core.Node.Type.Operation or core.Node.Type.Variable).
"""
return self.node.node_type()
def var(self):
"""
Return the node variable description.
Returns:
core.VarDesc: node variable description.
"""
return self.node.var()
def op(self):
"""
Return the node operator description.
Returns:
core.OpDesc: node operator description.
"""
return self.node.op()
def id(self):
"""
Return the node id.
Returns:
int: node id.
"""
return self.node.id()
def is_op(self):
"""
If the node is an operator, then return true.
Returns:
bool: indicate whether the node is an operator.
"""
return self.node.is_op()
def is_var(self):
"""
If the node is a variable, then return true.
Returns:
bool: indicate whether the node is a variable.
"""
return self.node.is_var()
def is_ctrl_var(self):
"""
If the node is a control dependence variable, then return true.
Returns:
bool: indicate whether the node is a control dependence variable.
"""
return self.node.is_ctrl_var()
def clear_inputs(self):
"""
Clear the node inputs. After executing the `clear_inputs` function,
the node inputs will be empty.
"""
self.node.clear_inputs()
def remove_input_by_id(self, node_id):
"""
Remove a node from inputs by the given node id.
Args:
node_id(int): the given node id.
"""
self.node.remove_input(node_id)
def remove_input(self, node):
"""
Remove a node from inputs.
Args:
node(IrNode): the node being removed.
"""
self.node.remove_input(node.node)
def append_input(self, node):
"""
Append a node in inputs.
Args:
node(IrNode): the node being appended.
"""
self.node.append_input(node.node)
def clear_outputs(self):
"""
Clear the node outputs. After executing the `clear_outputs` function,
the node outputs will be empty.
"""
self.node.clear_outputs()
def remove_output_by_id(self, node_id):
"""
Remove a node from outputs by the given node id.
Args:
node_id(int): the given node id.
"""
self.node.remove_output(node_id)
def remove_output(self, node):
"""
Remove a node from outputs.
Args:
node(IrNode): the node being removed.
"""
self.node.remove_output(node.node)
def append_output(self, node):
"""
Append a node in outputs.
Args:
node(IrNode): the node being appended.
"""
self.node.append_output(node.node)
@property
def inputs(self):
"""
Return the node inputs.
Returns:
list(IrNode): node inputs wrapped by IrNode.
"""
return [IrNode(n) for n in self.node.inputs]
@property
def outputs(self):
"""
Return the node outputs.
Returns:
list(IrNode): node outputs wrapped by IrNode.
"""
return [IrNode(n) for n in self.node.outputs]
class IrVarNode(IrNode):
"""
Python IrVarNode. Beneath it is a core.Node, it inherits from IrNode.
"""
def __init__(self, node):
"""
Construct an IrVarNode using core.Node.
Args:
node(core.Node): C++ Node.
"""
assert isinstance(node, core.Node) and node.is_var(), \
'node must be the instance of core.Node and it must be a variable node.'
super(IrVarNode, self).__init__(node)
self.node = node
def set_shape(self, shape):
"""
Set the node variable shape.
Args:
shape(list): shape to be set.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
self.node.var().set_shape(shape)
def persistable(self):
"""
If the variable node is a persistable variable, then return true.
Returns:
bool: indicate whether the variable is persistable.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().persistable()
def type(self):
"""
Return the variable type.
Returns:
core.VarDesc.VarType: the variable type.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().type()
def dtype(self):
"""
Return the variable data type.
Returns:
core.VarDesc.VarType: the variable data type.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().dtype()
def shape(self):
"""
Return the variable shape.
Returns:
list: the variable shape.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().shape()
@property
def inputs(self):
"""
Return the node inputs.
Returns:
list(IrOpNode): node inputs wrapped by IrOpNode.
"""
return [IrOpNode(n) for n in self.node.inputs]
@property
def outputs(self):
"""
Return the node outputs.
Returns:
list(IrOpNode): node outputs wrapped by IrOpNode.
"""
return [IrOpNode(n) for n in self.node.outputs]
class IrOpNode(IrNode):
"""
Python IrOpNode. Beneath it is a core.Node, it inherits from IrNode.
"""
def __init__(self, node):
"""
Construct an IrOpNode using core.Node.
Args:
node(core.Node): C++ Node.
"""
assert isinstance(node, core.Node) and node.is_op(), \
'node must be the instance of core.Node and it must be a operator node.'
super(IrOpNode, self).__init__(node)
self.node = node
def rename_input(self, old_input_name, new_input_name):
"""
Rename the input of this node.
Args:
old_input_name(str): the old input name.
new_input_name(str): the new input name.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
self.node.op()._rename_input(old_input_name, new_input_name)
def input(self, name):
"""
Get the argument name list by the parameter name for input.
Args:
name(str): the parameter name.
Returns:
list(str): the argument name list.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
return self.node.op().input(name)
def output(self, name):
"""
Get the argument name list by the parameter name for output.
Args:
name(str): the parameter name.
Returns:
list(str): the argument name list.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
return self.node.op().output(name)
def set_type(self, new_type):
"""
Change the operator type into new type.
Args:
new_type(str): new operator type to be set.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
return self.node.op().set_type(new_type)
def set_attr(self, name, val):
"""
Set the value of attribute by attribute's name.
Args:
name(str): the attribute name.
val(bool|int|str|float|list): the value of the attribute.
"""
self._update_desc_attr(name, val)
def _update_desc_attr(self, name, val):
"""
Update the value of the op desc's attribute by attribute's name.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
desc = self.node.op()
if isinstance(val, Block):
desc.set_block_attr(name, val.desc)
elif isinstance(val, list) and val and \
all(isinstance(v, Block) for v in val):
desc.set_blocks_attr(name, [v.desc for v in val])
elif isinstance(val, core.BlockDesc) or \
isinstance(val, core.ProgramDesc):
desc.set_serialized_attr(name, val.serialize_to_string())
else:
desc._set_attr(name, val)
@property
def inputs(self):
"""
Return the node inputs.
Returns:
list(IrVarNode): node inputs wrapped by IrVarNode.
"""
return [IrVarNode(n) for n in self.node.inputs]
@property
def outputs(self):
"""
Return the node outputs.
Returns:
list(IrVarNode): node outputs wrapped by IrVarNode.
"""
return [IrVarNode(n) for n in self.node.outputs]
class IrGraph(object): class IrGraph(object):
""" """
Python IrGraph. Beneath it is a core.Graph, which is used for Python IrGraph. Beneath it is a core.Graph, which is used for
create a c++ Ir Pass Graph. An IrGraph is just a graph view of creating a c++ Ir Pass Graph. An IrGraph is just a graph view of
a Program. In an IrGraph, both Variables and Operators are graph a Program. In an IrGraph, both Variables and Operators are graph
nodes. nodes.
""" """
...@@ -1597,15 +1960,15 @@ class IrGraph(object): ...@@ -1597,15 +1960,15 @@ class IrGraph(object):
""" """
Return all nodes included in the graph as a set. Return all nodes included in the graph as a set.
""" """
return {node for node in self.graph.nodes()} return {IrNode(node) for node in self.graph.nodes()}
def all_vars(self): def all_var_nodes(self):
""" """
Return all variable nodes included in the graph as a set. Return all variable nodes included in the graph as a set.
""" """
return {node for node in self.graph.nodes() if node.is_var()} return {IrVarNode(node) for node in self.graph.nodes() if node.is_var()}
def all_persistable_vars(self): def all_persistable_nodes(self):
""" """
Return all persistable variable nodes included in the graph as a set. Return all persistable variable nodes included in the graph as a set.
""" """
...@@ -1614,13 +1977,13 @@ class IrGraph(object): ...@@ -1614,13 +1977,13 @@ class IrGraph(object):
if node.is_var() and node.var() is not None and node.var( if node.is_var() and node.var() is not None and node.var(
).persistable(): ).persistable():
persistable_nodes.add(node) persistable_nodes.add(node)
return persistable_nodes return {IrVarNode(p) for p in persistable_nodes}
def all_ops(self): def all_op_nodes(self):
""" """
Return all operator nodes included in the graph as a set. Return all operator nodes included in the graph as a set.
""" """
return {node for node in self.graph.nodes() if node.is_op()} return {IrOpNode(node) for node in self.graph.nodes() if node.is_op()}
def var_node(self, name): def var_node(self, name):
""" """
...@@ -1634,14 +1997,14 @@ class IrGraph(object): ...@@ -1634,14 +1997,14 @@ class IrGraph(object):
doesn't have a variable with the giving name. doesn't have a variable with the giving name.
Returns: Returns:
core.Node: the variable node with the giving name. IrVarNode: the variable node with the giving name.
""" """
if not isinstance(name, six.string_types): if not isinstance(name, six.string_types):
raise TypeError( raise TypeError(
"var require string as parameter, but get %s instead." % "var require string as parameter, but get %s instead." %
(type(name))) (type(name)))
target_var_node = None target_var_node = None
var_nodes = self.all_vars() var_nodes = self.all_var_nodes()
for var_node in var_nodes: for var_node in var_nodes:
if var_node.name() == name: if var_node.name() == name:
target_var_node = var_node target_var_node = var_node
...@@ -1649,7 +2012,7 @@ class IrGraph(object): ...@@ -1649,7 +2012,7 @@ class IrGraph(object):
raise ValueError("var_node %s not in this graph" % name) raise ValueError("var_node %s not in this graph" % name)
return target_var_node return target_var_node
def create_param_node(self, name, var_type, shape, var_dtype): def create_persistable_node(self, name, var_type, shape, var_dtype):
""" """
Create a persistable variable node in the graph. In IrGraph, Create a persistable variable node in the graph. In IrGraph,
it can not distinguish between persistable variables and parameters. it can not distinguish between persistable variables and parameters.
...@@ -1661,14 +2024,14 @@ class IrGraph(object): ...@@ -1661,14 +2024,14 @@ class IrGraph(object):
var_dtype(core.VarDesc.VarType): the data type of the persistable variable node. var_dtype(core.VarDesc.VarType): the data type of the persistable variable node.
Returns: Returns:
core.Node: the created persistable variable node. IrVarNode: the created persistable variable node.
""" """
var_desc = core.VarDesc(name) var_desc = core.VarDesc(name)
var_desc.set_type(var_type) var_desc.set_type(var_type)
var_desc.set_shape(shape) var_desc.set_shape(shape)
var_desc.set_dtype(var_dtype) var_desc.set_dtype(var_dtype)
var_desc.set_persistable(True) var_desc.set_persistable(True)
return self.graph.create_var_node(var_desc) return IrVarNode(self.graph.create_var_node(var_desc))
def create_var_node(self, name, var_type, shape, var_dtype): def create_var_node(self, name, var_type, shape, var_dtype):
""" """
...@@ -1682,14 +2045,14 @@ class IrGraph(object): ...@@ -1682,14 +2045,14 @@ class IrGraph(object):
var_dtype(core.VarDesc.VarType): the data type of the variable node. var_dtype(core.VarDesc.VarType): the data type of the variable node.
Returns: Returns:
core.Node: the created variable node. IrVarNode: the created variable node.
""" """
var_desc = core.VarDesc(name) var_desc = core.VarDesc(name)
var_desc.set_type(var_type) var_desc.set_type(var_type)
var_desc.set_shape(shape) var_desc.set_shape(shape)
var_desc.set_dtype(var_dtype) var_desc.set_dtype(var_dtype)
return self.graph.create_var_node(var_desc) return IrVarNode(self.graph.create_var_node(var_desc))
def create_var_node_from_desc(self, var_desc): def create_var_node_from_desc(self, var_desc):
""" """
...@@ -1700,9 +2063,9 @@ class IrGraph(object): ...@@ -1700,9 +2063,9 @@ class IrGraph(object):
var_desc(core.VarDesc): the giving variable description. var_desc(core.VarDesc): the giving variable description.
Returns: Returns:
core.Node: the created variable node. IrVarNode: the created variable node.
""" """
return self.graph.create_var_node(var_desc) return IrVarNode(self.graph.create_var_node(var_desc))
def create_op_node(self, op_type, attrs, inputs, outputs): def create_op_node(self, op_type, attrs, inputs, outputs):
""" """
...@@ -1715,7 +2078,7 @@ class IrGraph(object): ...@@ -1715,7 +2078,7 @@ class IrGraph(object):
outputs(dict): the outpus of the operator node. outputs(dict): the outpus of the operator node.
Returns: Returns:
core.Node: the created operator node. IrOpNode: the created operator node.
""" """
op_desc = core.OpDesc() op_desc = core.OpDesc()
op_desc.set_type(op_type) op_desc.set_type(op_type)
...@@ -1731,7 +2094,7 @@ class IrGraph(object): ...@@ -1731,7 +2094,7 @@ class IrGraph(object):
var_nodes = [var_nodes] var_nodes = [var_nodes]
op_desc.set_output(output_name, op_desc.set_output(output_name,
[var_node.name() for var_node in var_nodes]) [var_node.name() for var_node in var_nodes])
return self.graph.create_op_node(op_desc) return IrOpNode(self.graph.create_op_node(op_desc))
def create_op_node_from_desc(self, op_desc): def create_op_node_from_desc(self, op_desc):
""" """
...@@ -1741,40 +2104,40 @@ class IrGraph(object): ...@@ -1741,40 +2104,40 @@ class IrGraph(object):
op_desc(core.VarDesc): the giving operator description. op_desc(core.VarDesc): the giving operator description.
Returns: Returns:
core.Node: the created operator node. IrOpNode: the created operator node.
""" """
return self.graph.create_op_node(op_desc) return IrOpNode(self.graph.create_op_node(op_desc))
def update_input_link(self, old_input_node, new_input_node, op_node): def update_input_link(self, old_input_node, new_input_node, op_node):
""" """
Update the input's link of a operator node. Update the input's link of a operator node.
Args: Args:
old_input_node(core.Node): the old input node of the giving op_node. old_input_node(IrNode): the old input node of the giving op_node.
new_input_node(core.Node): the new input node of the giving op_node. new_input_node(IrNode): the new input node of the giving op_node.
op_node(core.Node): the operator node that is needed to update input's link. op_node(IrOpNode): the operator node that is needed to update input's link.
""" """
assert old_input_node in self.graph.nodes() and new_input_node in \ assert old_input_node.node in self.graph.nodes() and new_input_node.node in \
self.graph.nodes() and op_node in self.graph.nodes(), \ self.graph.nodes() and op_node.node in self.graph.nodes(), \
'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.' 'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.'
old_input_node.outputs_remove(op_node) old_input_node.remove_output(op_node)
op_node.inputs_remove(old_input_node) op_node.remove_input(old_input_node)
new_input_node.outputs_append(op_node) new_input_node.append_output(op_node)
op_node.inputs_append(new_input_node) op_node.append_input(new_input_node)
op_node.op()._rename_input(old_input_node.name(), new_input_node.name()) op_node.rename_input(old_input_node.name(), new_input_node.name())
def link_to(self, node_in, node_out): def link_to(self, node_in, node_out):
""" """
Connect two nodes. Connect two nodes.
Args: Args:
node_in(core.Node): the input node. node_in(IrNode): the input node.
node_out(core.Node): the output node. node_out(IrNode): the output node.
""" """
assert node_in in self.graph.nodes() and node_out in self.graph.nodes(), \ assert node_in.node in self.graph.nodes() and node_out.node in self.graph.nodes(), \
'The two arguments(node_in&node_out) must be in the graph nodes.' 'The two arguments(node_in&node_out) must be in the graph nodes.'
node_in.outputs_append(node_out) node_in.append_output(node_out)
node_out.inputs_append(node_in) node_out.append_input(node_in)
def safe_remove_nodes(self, remove_nodes): def safe_remove_nodes(self, remove_nodes):
""" """
...@@ -1789,7 +2152,8 @@ class IrGraph(object): ...@@ -1789,7 +2152,8 @@ class IrGraph(object):
remove_nodes = set(remove_nodes) remove_nodes = set(remove_nodes)
else: else:
remove_nodes = {remove_nodes} remove_nodes = {remove_nodes}
core.graph_safe_remove_nodes(self.graph, remove_nodes) original_nodes = {n.node for n in remove_nodes}
core.graph_safe_remove_nodes(self.graph, original_nodes)
def has_circle(self): def has_circle(self):
""" """
...@@ -1816,18 +2180,23 @@ class IrGraph(object): ...@@ -1816,18 +2180,23 @@ class IrGraph(object):
Notes: the `graph` cannot contain a circle. Notes: the `graph` cannot contain a circle.
Returns: Returns:
set(core.Node): nodes in topology order. set(IrNode): nodes in topology order.
""" """
return core.topology_sort(self.graph) ordered_nodes = core.topology_sort(self.graph)
return {IrNode(n) for n in ordered_nodes}
def build_adjacency_list(self): def build_adjacency_list(self):
""" """
Build an adjacency list of operations for the `graph`. Build an adjacency list of operations for the `graph`.
Returns: Returns:
dict{core.Node: set(core.Node)}: the adjacency list. dict{IrNode: set(IrNode)}: the adjacency list.
""" """
return core.build_adjacency_list(self.graph) adj_list = core.build_adjacency_list(self.graph)
wrapped_adj_list = dict()
for k, v in six.iteritems(adj_list):
wrapped_adj_list[IrNode(k)] = {IrNode(n) for n in v}
return wrapped_adj_list
def draw(self, save_path, name, marked_nodes=None, remove_ctr_var=True): def draw(self, save_path, name, marked_nodes=None, remove_ctr_var=True):
""" """
...@@ -1837,7 +2206,7 @@ class IrGraph(object): ...@@ -1837,7 +2206,7 @@ class IrGraph(object):
Args: Args:
save_path(str): the save path of drawn graph. save_path(str): the save path of drawn graph.
name(str): the name of drawn graph. name(str): the name of drawn graph.
marked_nodes(set(core.Node)): nodes that are needed to be marked. marked_nodes(set(IrNode)): nodes that are needed to be marked.
Default value is None. Default value is None.
remove_ctr_var(bool): If it is set True, all control variable nodes remove_ctr_var(bool): If it is set True, all control variable nodes
in the graph will be removed. Default value is True. in the graph will be removed. Default value is True.
...@@ -1852,20 +2221,22 @@ class IrGraph(object): ...@@ -1852,20 +2221,22 @@ class IrGraph(object):
print('The {} is saved as the dot filetype.'.format( print('The {} is saved as the dot filetype.'.format(
dot_file_path)) dot_file_path))
remove_ctr_vars = set()
if remove_ctr_var: if remove_ctr_var:
remove_ctr_vars = set() for node in self.all_var_nodes():
for node in self.graph.nodes():
if node.is_ctrl_var(): if node.is_ctrl_var():
remove_ctr_vars.add(node) remove_ctr_vars.add(node)
self.safe_remove_nodes(remove_ctr_vars) self.safe_remove_nodes(remove_ctr_vars)
ops_num = 0 print('Total ops num = {}.'.format(len(self.all_op_nodes())))
for node in self.graph.nodes():
if node.is_op():
ops_num += 1
print('Total ops num = {}.'.format(ops_num))
if marked_nodes is not None: if marked_nodes is not None:
if not isinstance(marked_nodes, set): if not isinstance(marked_nodes, set):
marked_nodes = set(marked_nodes) if isinstance(marked_nodes, Iterable):
marked_nodes = set(marked_nodes)
else:
marked_nodes = {marked_nodes}
marked_nodes = {n.node for n in marked_nodes}
remove_ctr_vars = {n.node for n in remove_ctr_vars}
marked_nodes = marked_nodes - remove_ctr_vars marked_nodes = marked_nodes - remove_ctr_vars
if self.graph.has('__graphviz__marked_node__'): if self.graph.has('__graphviz__marked_node__'):
self.graph.erase('__graphviz__marked_node__') self.graph.erase('__graphviz__marked_node__')
......
...@@ -23,7 +23,11 @@ from .layers import * ...@@ -23,7 +23,11 @@ from .layers import *
from . import nn from . import nn
from .nn import * from .nn import *
from . import tracer
from .tracer import *
__all__ = [] __all__ = []
__all__ += layers.__all__ __all__ += layers.__all__
__all__ += base.__all__ __all__ += base.__all__
__all__ += nn.__all__ __all__ += nn.__all__
__all__ += tracer.__all__
...@@ -16,6 +16,7 @@ import numpy as np ...@@ -16,6 +16,7 @@ import numpy as np
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid import framework from paddle.fluid import framework
from .tracer import Tracer
__all__ = ['enabled', 'guard', 'to_variable'] __all__ = ['enabled', 'guard', 'to_variable']
...@@ -28,7 +29,7 @@ def enabled(): ...@@ -28,7 +29,7 @@ def enabled():
def guard(place=None): def guard(place=None):
train = framework.Program() train = framework.Program()
startup = framework.Program() startup = framework.Program()
tracer = core.Tracer(train.current_block().desc) tracer = Tracer(train.current_block().desc)
if place is None: if place is None:
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import six
from collections import defaultdict
from paddle.fluid import core
from paddle.fluid import framework
__all__ = ['Tracer']
def release_op(op):
del framework._imperative_tracer()._ops[op._trace_id]
class Tracer(core.Tracer):
"""
Python wrapper of imperative tracer
"""
def __init__(self, block):
super(Tracer, self).__init__(block)
self._ops = defaultdict()
self._trace_id = 0
def trace_op(self, op, stop_gradient=False):
# record op's trace id
op.iop._trace_id = self._trace_id
# trace op and save it
backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.block.desc,
framework._current_expected_place(),
stop_gradient)
if not stop_gradient:
self._trace_id += 1
self._ops[op.iop._trace_id] = op
# register backward hooks and variables if needed
if len(backward_refs) > 0:
op.iop.register_backward_hooks(release_op)
# TODO(minqiyang): remove all inputs and outputs after seperate
# var and grad
op.backward_refs = defaultdict(list)
for k, v in six.iteritems(op.inputs):
if k in backward_refs:
op.backward_refs[k] = op.inputs[k]
for k, v in six.iteritems(op.outputs):
if k in backward_refs:
op.backward_refs[k] = op.outputs[k]
...@@ -19,6 +19,7 @@ import numpy as np ...@@ -19,6 +19,7 @@ import numpy as np
from .wrapped_decorator import signature_safe_contextmanager from .wrapped_decorator import signature_safe_contextmanager
from .core import VarDesc from .core import VarDesc
from . import unique_name from . import unique_name
from .imperative import base as imperative_base
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear', 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
...@@ -165,7 +166,8 @@ class ConstantInitializer(Initializer): ...@@ -165,7 +166,8 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu() 'force_cpu': self._force_cpu or force_init_on_cpu()
}, },
stop_gradient=True) stop_gradient=True)
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -244,7 +246,8 @@ class UniformInitializer(Initializer): ...@@ -244,7 +246,8 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -322,7 +325,8 @@ class NormalInitializer(Initializer): ...@@ -322,7 +325,8 @@ class NormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -400,7 +404,8 @@ class TruncatedNormalInitializer(Initializer): ...@@ -400,7 +404,8 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -505,7 +510,8 @@ class XavierInitializer(Initializer): ...@@ -505,7 +510,8 @@ class XavierInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -605,7 +611,8 @@ class MSRAInitializer(Initializer): ...@@ -605,7 +611,8 @@ class MSRAInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -703,7 +710,8 @@ class BilinearInitializer(Initializer): ...@@ -703,7 +710,8 @@ class BilinearInitializer(Initializer):
'shape': list(shape), 'shape': list(shape),
value_name: values value_name: values
}) })
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
...@@ -761,7 +769,8 @@ class NumpyArrayInitializer(Initializer): ...@@ -761,7 +769,8 @@ class NumpyArrayInitializer(Initializer):
value_name: values value_name: values
}, },
stop_gradient=True) stop_gradient=True)
var.op = op if not imperative_base.enabled():
var.op = op
return op return op
......
...@@ -468,9 +468,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): ...@@ -468,9 +468,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None):
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model" param_path = "./my_paddle_model"
# `prog` can be a program defined by the user
prog = fluid.default_main_program() prog = fluid.default_main_program()
fluid.io.save_persistables(executor=exe, dirname=param_path, fluid.io.save_persistables(executor=exe, dirname=param_path,
main_program=None) main_program=prog)
""" """
if main_program and main_program._is_distributed: if main_program and main_program._is_distributed:
......
...@@ -28,10 +28,12 @@ from . import ops ...@@ -28,10 +28,12 @@ from . import ops
from . import tensor from . import tensor
from ..initializer import init_on_cpu from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter, unique_name, name_scope from ..framework import default_main_program, Parameter, unique_name, name_scope
import math
__all__ = [ __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS' 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay'
] ]
...@@ -307,6 +309,41 @@ def piecewise_decay(boundaries, values): ...@@ -307,6 +309,41 @@ def piecewise_decay(boundaries, values):
return lr return lr
def cosine_decay(learning_rate, step_each_epoch, epochs):
"""
Applies cosine decay to the learning rate.
when training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy.
decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1)
Args:
learning_rate(Variable|float): The initial learning rate.
step_each_epoch(int): the number of steps in an epoch.
epochs(int): the number of epochs.
Returns:
Variable: The decayed learning rate.
Examples:
..code-block:: python
base_lr = 0.1
lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter()
cur_epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
ops.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay): def append_LARS(params_grads, learning_rate, weight_decay):
""" """
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
......
...@@ -1767,7 +1767,7 @@ def sequence_softmax(input, use_cudnn=False, name=None): ...@@ -1767,7 +1767,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
return softmax_out return softmax_out
def softmax(input, use_cudnn=True, name=None): def softmax(input, use_cudnn=False, name=None):
""" """
The input of the softmax operator is a tensor of any rank. The output tensor The input of the softmax operator is a tensor of any rank. The output tensor
has the same shape as the input. has the same shape as the input.
...@@ -1795,7 +1795,8 @@ def softmax(input, use_cudnn=True, name=None): ...@@ -1795,7 +1795,8 @@ def softmax(input, use_cudnn=True, name=None):
Args: Args:
input (Variable): The input variable. input (Variable): The input variable.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed. library is installed. To improve numerical stablity, set use_cudnn to \
False by default. Default: False
name (str|None): A name for this layer(optional). If set None, the layer name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None. will be named automatically. Default: None.
...@@ -3041,7 +3042,6 @@ def data_norm(input, ...@@ -3041,7 +3042,6 @@ def data_norm(input,
param_attr=None, param_attr=None,
data_layout='NCHW', data_layout='NCHW',
in_place=False, in_place=False,
use_mkldnn=False,
name=None, name=None,
moving_mean_name=None, moving_mean_name=None,
moving_variance_name=None, moving_variance_name=None,
...@@ -3075,7 +3075,6 @@ def data_norm(input, ...@@ -3075,7 +3075,6 @@ def data_norm(input,
param_attr(ParamAttr): The parameter attribute for Parameter `scale`. param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
data_layout(string, default NCHW): NCHW|NHWC data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): Make the input and output of batch norm reuse memory. in_place(bool, Default False): Make the input and output of batch norm reuse memory.
use_mkldnn(bool, Default false): ${use_mkldnn_comment}
name(string, Default None): A name for this layer(optional). If set None, the layer name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
...@@ -3156,8 +3155,7 @@ def data_norm(input, ...@@ -3156,8 +3155,7 @@ def data_norm(input,
outputs={"Y": data_norm_out, outputs={"Y": data_norm_out,
"Means": means, "Means": means,
"Scales": scales}, "Scales": scales},
attrs={"epsilon": epsilon, attrs={"epsilon": epsilon})
"use_mkldnn": use_mkldnn})
return helper.append_activation(data_norm_out) return helper.append_activation(data_norm_out)
...@@ -5756,7 +5754,7 @@ def softmax_with_cross_entropy(logits, ...@@ -5756,7 +5754,7 @@ def softmax_with_cross_entropy(logits,
label, label,
soft_label=False, soft_label=False,
ignore_index=kIgnoreIndex, ignore_index=kIgnoreIndex,
numeric_stable_mode=False, numeric_stable_mode=True,
return_softmax=False): return_softmax=False):
""" """
**Softmax With Cross Entropy Operator.** **Softmax With Cross Entropy Operator.**
...@@ -5820,7 +5818,7 @@ def softmax_with_cross_entropy(logits, ...@@ -5820,7 +5818,7 @@ def softmax_with_cross_entropy(logits,
When soft_label is True or CPU is used, When soft_label is True or CPU is used,
the algorithm is always numerically stable. the algorithm is always numerically stable.
Note that the speed may be slower when use Note that the speed may be slower when use
stable algorithm. Default: False stable algorithm. Default: True
return_softmax (bool): A flag indicating whether to return the softmax return_softmax (bool): A flag indicating whether to return the softmax
along with the cross entropy loss. Default: False along with the cross entropy loss. Default: False
...@@ -6846,56 +6844,58 @@ def image_resize(input, ...@@ -6846,56 +6844,58 @@ def image_resize(input,
Example: Example:
For scale: .. code-block:: text
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0) For scale:
else: if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
scale_factor = float(in_size/out_size) Nearest neighbor interpolation:
if:
Nearest neighbor interpolation: align_corners = False
if:
align_corners = False
input : (N,C,H_in,W_in) input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where: output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor H_out = floor (H_{in} * scale_{factor})
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor W_out = floor (W_{in} * scale_{factor})
else: else:
align_corners = True align_corners = True
input : (N,C,H_in,W_in) input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where: output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor}) H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor}) W_out = round(W_{in} * scale_{factor})
Bilinear interpolation: Bilinear interpolation:
if: if:
align_corners = False , align_mode = 0 align_corners = False , align_mode = 0
input : (N,C,H_in,W_in) input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where: output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5 H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5 W_out = (W_{in}+0.5) * scale_{factor} - 0.5
else: else:
input : (N,C,H_in,W_in) input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where: output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor} H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor} W_out = W_{in} * scale_{factor}
For details of nearest neighbor interpolation, please refer to Wikipedia: For details of nearest neighbor interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation. https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.
...@@ -7050,41 +7050,39 @@ def resize_bilinear(input, ...@@ -7050,41 +7050,39 @@ def resize_bilinear(input,
Align_corners and align_mode are optinal parameters,the calculation Align_corners and align_mode are optinal parameters,the calculation
method of interpolation can be selected by them. method of interpolation can be selected by them.
Align_corners and align_mode are optinal parameters,the calculation method
of interpolation can be selected by them.
Example: Example:
For scale: .. code-block:: text
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0) For scale:
else:
scale_factor = float(in_size/out_size) if align_corners = True && out_size > 1 :
Bilinear interpolation: scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
if: Bilinear interpolation:
align_corners = False , align_mode = 0
if:
input : (N,C,H_in,W_in) align_corners = False , align_mode = 0
output: (N,C,H_out,W_out) where:
input : (N,C,H_in,W_in)
H_out = (H_{in}+0.5) * scale_{factor} - 0.5 output: (N,C,H_out,W_out) where:
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
else: else:
input : (N,C,H_in,W_in) input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where: output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor} H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor} W_out = W_{in} * scale_{factor}
...@@ -7136,42 +7134,44 @@ def resize_nearest(input, ...@@ -7136,42 +7134,44 @@ def resize_nearest(input,
align_corners=True): align_corners=True):
""" """
Resize input by performing nearest neighbor interpolation in both the Resize input by performing nearest neighbor interpolation in both the
3rd dimention(in height direction) and the 4th dimention(in width 3rd dimension(in height direction) and the 4th dimension(in width
direction) based on given output shape which specified by actual_shape, direction) based on given output shape which is specified by actual_shape,
out_shape and scale in priority order. out_shape and scale in priority order.
Example: Example:
For scale: .. code-block:: text
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0) For scale:
else:
scale_factor = float(in_size/out_size) if align_corners = True && out_size > 1 :
Nearest neighbor interpolation:
if:
align_corners = False
input : (N,C,H_in,W_in) scale_factor = (in_size-1.0)/(out_size-1.0)
output: (N,C,H_out,W_out) where:
else:
scale_factor = float(in_size/out_size)
Nearest neighbor interpolation:
if:
align_corners = False
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor H_out = floor(H_{in} * scale_{factor})
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor W_out = floor(W_{in} * scale_{factor})
else: else:
align_corners = True align_corners = True
input : (N,C,H_in,W_in) input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where: output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor}) H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor}) W_out = round(W_{in} * scale_{factor})
For details of nearest neighbor interpolation, please refer to Wikipedia: For details of nearest neighbor interpolation, please refer to Wikipedia:
...@@ -9945,6 +9945,7 @@ def teacher_student_sigmoid_loss(input, ...@@ -9945,6 +9945,7 @@ def teacher_student_sigmoid_loss(input,
Examples: Examples:
.. code-block:: python .. code-block:: python
cost = fluid.layers.teacher_student_sigmoid_loss(input=similarity, label=label) cost = fluid.layers.teacher_student_sigmoid_loss(input=similarity, label=label)
""" """
helper = LayerHelper('teacher_student_sigmoid_loss', **locals()) helper = LayerHelper('teacher_student_sigmoid_loss', **locals())
......
...@@ -13,15 +13,11 @@ ...@@ -13,15 +13,11 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import multiprocessing
from . import core from . import core
from . import framework from . import framework
from . import executor from . import executor
from .. import compat as cpt from . import compiler
import warnings
import sys import sys
import six
import os
__all__ = ['ParallelExecutor'] __all__ = ['ParallelExecutor']
...@@ -92,104 +88,31 @@ class ParallelExecutor(object): ...@@ -92,104 +88,31 @@ class ParallelExecutor(object):
num_trainers=1, num_trainers=1,
trainer_id=0, trainer_id=0,
scope=None): scope=None):
# step1: get places, the places are used in run too. sys.stderr.write(
self._places = [] 'ParallelExecutor is deprecated. '
if use_cuda: 'Please use CompiledProgram and Executor. CompiledProgram '
gpus_env = os.getenv("FLAGS_selected_gpus") 'is a central place for optimization and Executor is the '
if gpus_env: 'unified executor. Example can be found in compiler.py.\n')
gpus = [int(s) for s in gpus_env.split(",")]
else:
gpus = [
i for i in six.moves.range(core.get_cuda_device_count())
]
self._places = [core.CUDAPlace(i) for i in gpus]
else:
cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
assert self._places, "no place for execution"
# step2: init exec_strategy
if exec_strategy is None:
exec_strategy = ExecutionStrategy()
exec_strategy.use_cuda = use_cuda
if exec_strategy.num_threads == 0:
if use_cuda:
# Experiments on se-resnext shows that too many threads hurt
# performance. Worth tunning for other models in the future.
exec_strategy.num_threads = len(self._places) * 4
else:
cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exec_strategy.num_threads = cpu_num * 2
# step3: init build_strategy
if build_strategy is None: if build_strategy is None:
build_strategy = BuildStrategy() build_strategy = BuildStrategy()
build_strategy.num_trainers = num_trainers build_strategy.num_trainers = num_trainers
build_strategy.trainer_id = trainer_id build_strategy.trainer_id = trainer_id
# FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode,
# num_trainers is 1, so the current fields of build_strategy doesn't tell if
# it's distributed model.
build_strategy.is_distribution = framework.is_pserver_mode(
main_program) or num_trainers > 1
# step4: get main_program, scope, local_scopes
main = main_program if main_program \
else framework.default_main_program()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if build_strategy.memory_optimize is None:
build_strategy.memory_optimize = False if main._is_mem_optimized else True
if build_strategy.enable_inplace is None:
build_strategy.enable_inplace = False if main._is_mem_optimized else True
scope = scope if scope is not None else executor.global_scope()
if share_vars_from and not isinstance(share_vars_from,
ParallelExecutor):
raise TypeError("share_vars_from must be ParallelExecutor.")
local_scopes = share_vars_from.executor.local_scopes()\
if share_vars_from else []
# step5: check trainers_endpoints, it is used for distribution.
trainers_endpoints = main._trainers_endpoints
if num_trainers > 1 and trainers_endpoints:
assert num_trainers == len(
trainers_endpoints), "num_trainers == len(endpoints)"
build_strategy.trainers_endpoints = trainers_endpoints
# step6: get persistable_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars = set([
cpt.to_text(v.name) for v in [
var for var in main.list_vars()
if var.persistable and var.type != core.VarDesc.VarType.RAW
]
])
def place_obj(place):
p = core.Place()
p.set_place(place)
return p
places = list(map(place_obj, self._places))
# step7: init ParallelExecutor self._places = compiler.get_available_places(use_cuda)
# ParallelExecutor API will be deprecated, don't support parallel graph. self._scope = scope if scope is not None else executor.global_scope()
self._graphs = []
if build_strategy.async_mode:
for _ in range(cpu_num):
self._graphs.append(core.Graph(main.desc))
else:
self._graphs.append(core.Graph(main.desc))
self.executor = core.ParallelExecutor( main_program = main_program if main_program is not None \
places, persistable_vars, else framework.default_main_program()
cpt.to_text(loss_name) if loss_name else six.u(''), scope, self._compiled_program = compiler.CompiledProgram(main_program)
local_scopes, exec_strategy, build_strategy, self._graphs) self._compiled_program.with_data_parallel(
loss_name=loss_name,
self.scope = scope build_strategy=build_strategy,
exec_strategy=exec_strategy,
share_vars_from=share_vars_from)
self._place = core.CUDAPlace(0) if use_cuda else core.CPUPlace()
self._executor = executor.Executor(self._place)
self._compiled_program._compile(place=self._place, scope=self._scope)
def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=True): def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=True):
""" """
...@@ -256,56 +179,11 @@ class ParallelExecutor(object): ...@@ -256,56 +179,11 @@ class ParallelExecutor(object):
loss = pe.run(feed=feeder.feed(cur_batch), loss = pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name])) fetch_list=[avg_cost.name]))
""" """
if feed is None and feed_dict is not None: return self._executor.run(program=self._compiled_program,
feed = feed_dict scope=self._scope,
print( feed=feed,
"`feed_dict` is deprecated. Please use `feed=`", fetch_list=fetch_list,
file=sys.stderr) return_numpy=return_numpy)
if isinstance(feed, dict):
feed_tensor_dict = dict()
for feed_name in feed:
feed_tensor = feed[feed_name]
if not isinstance(feed_tensor, core.LoDTensor):
feed_tensor = core.LoDTensor()
# always set to CPU place, since the tensor need to be splitted
# it is fast in CPU
feed_tensor.set(feed[feed_name], core.CPUPlace())
feed_tensor_dict[feed_name] = feed_tensor
self.executor.feed_and_split_tensor_into_local_scopes(
feed_tensor_dict)
elif isinstance(feed, list) or isinstance(feed, tuple):
if len(feed) != len(self._places):
raise ValueError(
"Feed a list of tensor, the list should be the same size as places"
)
res = list()
for i, each in enumerate(feed):
if not isinstance(each, dict):
raise TypeError(
"Each element of feed list should be a dict")
res_dict = dict()
for feed_name in each:
tensor = each[feed_name]
if not isinstance(tensor, core.LoDTensor):
tmp = core.LoDTensor()
tmp.set(tensor, self._places[i])
tensor = tmp
res_dict[feed_name] = tensor
res.append(res_dict)
self.executor.feed_tensors_into_local_scopes(res)
fetch_var_name = 'fetch'
self.executor.run(fetch_list, fetch_var_name)
arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array()
if return_numpy:
return executor.as_numpy(arr)
return [arr[i] for i in range(len(arr))]
@property @property
def device_count(self): def device_count(self):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import unittest
import time
import math
import multiprocessing
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler
# open eager delete mode
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
os.environ['FLAGS_fast_eager_deletion_mode'] = 'true'
os.environ['CPU_NUM'] = '2'
class BuildIrMemOptBase(unittest.TestCase):
def check_network_convergence(self,
network,
use_cuda=True,
memory_opt=True,
use_ir_memory_optimize=True,
enable_inplace=True,
iter=5):
if use_cuda and not core.is_compiled_with_cuda():
print('Skip use_cuda=True because Paddle is not compiled with cuda')
return
if os.name == 'nt':
print(
'Skip use_parallel_executor=True because Paddle comes without parallel support on windows'
)
return
fluid.default_startup_program().random_seed = 100
fluid.default_main_program().random_seed = 100
batch_size = 32
batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
# build network
word_dict = paddle.dataset.imdb.word_dict()
train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), batch_size=batch_size)
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = network(data, label, len(word_dict))
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(cost)
if memory_opt:
fluid.memory_optimize(fluid.default_main_program())
# execution
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
reader = feeder.decorate_reader(train_reader, multi_devices=True)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_cp = compiler.CompiledProgram(fluid.default_main_program())
train_cp = train_cp.with_data_parallel(loss_name=cost.name)
fetch_list = [cost.name]
begin = time.time()
first_loss, last_loss = None, None
step_id = 0
custom_iter = getattr(self, "iter", None)
if not custom_iter == None:
iter = custom_iter
for data in reader():
ret = exe.run(train_cp, feed=data, fetch_list=fetch_list)
print(ret)
step_id += 1
if step_id == 1:
first_loss = ret[0]
if step_id == iter:
last_loss = ret[0]
break
end = time.time()
print("%.4f Instance per second" % (
(batch_size * iter) / (end - begin)))
print(first_loss, last_loss)
avg_last_loss_val = np.array(last_loss).mean()
avg_first_loss_val = np.array(first_loss).mean()
if math.isnan(float(avg_last_loss_val)) or math.isnan(
float(avg_first_loss_val)):
sys.exit("got NaN loss, training failed.")
return first_loss, last_loss
class TestIrMemOptBase(BuildIrMemOptBase):
def setUp(self):
self.network = None
def test_network(self):
if self.network is None or not core.is_compiled_with_cuda():
return
baseline_first_loss, baseline_last_loss = None, None
for use_cuda in [True]:
for use_python_mem_opt in [True, False]:
print(
'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'.
format(self.network.__name__, use_cuda, use_python_mem_opt,
not use_python_mem_opt))
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(core.Scope()):
if use_cuda is True and use_python_mem_opt is True:
baseline_first_loss, baseline_last_loss = self.check_network_convergence(
self.network,
use_cuda=use_cuda,
memory_opt=use_python_mem_opt)
else:
cur_first_loss, cur_last_loss = self.check_network_convergence(
self.network,
use_cuda=use_cuda,
memory_opt=use_python_mem_opt)
self.assertAlmostEquals(
np.mean(baseline_last_loss),
np.mean(cur_last_loss),
delta=1e-2)
self.assertAlmostEquals(
np.mean(baseline_first_loss),
np.mean(cur_first_loss),
delta=1e-2)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid.core as core
import paddle.fluid as fluid
def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out,
out_grad, x_grad):
def __assert_close(tensor, np_array, msg, atol=1e-4):
test_case.assertTrue(
np.allclose(
np.array(tensor), np_array, atol=atol), msg)
place = core.CPUPlace()
var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad}
var_names = list(var_dict.keys())
ground_truth = {name: var_dict[name] for name in var_names}
program = fluid.Program()
with fluid.program_guard(program):
block = program.global_block()
for name in ground_truth:
block.create_var(
name=name, dtype=np.float32, shape=ground_truth[name].shape)
op = block.append_op(
type=op_type,
inputs={'X': block.var('x'), },
outputs={'Out': block.var('out')},
attrs={'use_mkldnn': True})
# Generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc,
set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode('ascii'))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode('ascii'))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
# Do at least 2 iterations
for i in range(2):
out = exe.run(
program,
feed={name: var_dict[name]
for name in ['x', 'out@GRAD']},
fetch_list=['x@GRAD', 'out'])
__assert_close(x_grad, out[0], 'x@GRAD')
...@@ -19,7 +19,7 @@ import numpy as np ...@@ -19,7 +19,7 @@ import numpy as np
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest from paddle.fluid.tests.unittests.op_test import OpTest
from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs
import paddle.fluid as fluid from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
class TestMKLDNNReluDim2(TestRelu): class TestMKLDNNReluDim2(TestRelu):
...@@ -98,62 +98,24 @@ class TestMKLDNNAbsDim4(TestAbs): ...@@ -98,62 +98,24 @@ class TestMKLDNNAbsDim4(TestAbs):
# Check if primitives already exist in backward # Check if primitives already exist in backward
class TestMKLDNNReluPrimitivesAlreadyExist(unittest.TestCase): class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase):
def __assert_close(self, tensor, np_array, msg, atol=1e-4): def setUp(self):
self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg) super(TestMKLDNNAbsPrimitivesAlreadyExist, self).setUp()
def test_check_forward_backward(self):
place = core.CPUPlace()
np.random.seed(123) np.random.seed(123)
x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32) self.op_type = 'abs'
out = np.abs(x) self.x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32)
self.out = np.abs(self.x)
out_grad = np.random.random_sample(x.shape).astype(np.float32) self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
x_grad = out_grad * np.sign(x) # Abs grad calculation self.x_grad = self.__abs_bwd(self.x, self.out_grad)
var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad} # Abs grad calculation
var_names = list(var_dict.keys()) def __abs_bwd(self, x, out_grad):
ground_truth = {name: var_dict[name] for name in var_names} return out_grad * np.sign(x)
program = fluid.Program() def test_check(self):
with fluid.program_guard(program): check_if_mkldnn_primitives_exist_in_bwd(
block = program.global_block() self, self.op_type, self.x, self.out, self.out_grad, self.x_grad)
for name in ground_truth:
block.create_var(
name=name, dtype='float32', shape=ground_truth[name].shape)
relu_op = block.append_op(
type="abs",
inputs={"X": block.var('x'), },
outputs={"Out": block.var('out')},
attrs={"use_mkldnn": True})
# Generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
relu_op.desc, set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode("ascii"))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
# Do at least 2 iterations
for i in range(2):
out = exe.run(
program,
feed={name: var_dict[name]
for name in ['x', 'out@GRAD']},
fetch_list=['x@GRAD'])
self.__assert_close(x_grad, out[0], "x@GRAD")
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -15,44 +15,139 @@ ...@@ -15,44 +15,139 @@
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import numpy as np
from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1 import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest
from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp
class TestMKLDNN(TestConv2dOp): def conv2d_bias_naive(out, bias):
def init_kernel_type(self): _, out_c, _, _ = out.shape
self.use_mkldnn = True
self.data_format = "NCHW"
for l in range(out_c):
out[:, l, :, :] = out[:, l, :, :] + bias[l]
return out
class TestMKLDNNWithPad(TestWithPad):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
def conv2d_residual_naive(out, residual):
assert out.shape == residual.shape
out = np.add(out, residual)
return out
class TestMKLDNNWithStride(TestWithStride):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
class TestConv2dMKLDNNOp(TestConv2dOp):
def init_group(self):
self.groups = 1
class TestMKLDNNWithGroup(TestWithGroup):
def init_kernel_type(self): def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW" self.data_format = "NCHW"
self.use_mkldnn = True
self._cpu_only = True
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestMKLDNNWith1x1(TestWith1x1): def setUp(self):
def init_kernel_type(self): self.fuse_bias = False
self.use_mkldnn = True self.bias_size = None
self.data_format = "NCHW" self.fuse_relu = False
self.fuse_residual_connection = False
self.input_residual_size = None
TestConv2dOp.setUp(self)
output = self.outputs['Output']
class TestMKLDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1): #mkldnn only support either conv-sum-relu, or conv-relu.
def init_kernel_type(self): if self.fuse_bias and self.bias_size is not None:
self.use_mkldnn = True bias = np.random.random(self.bias_size).astype(self.dtype)
self.data_format = "NCHW" output = conv2d_bias_naive(output, bias)
output = output.astype(self.dtype)
self.attrs['fuse_bias'] = self.fuse_bias
self.inputs['Bias'] = OpTest.np_dtype_to_fluid_dtype(bias)
if self.fuse_residual_connection and self.input_residual_size is not None:
input_residual = np.random.random(self.input_residual_size).astype(
self.dtype)
output = conv2d_residual_naive(output, input_residual)
self.attrs[
'fuse_residual_connection'] = self.fuse_residual_connection
self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
input_residual)
if self.fuse_relu:
output = np.maximum(output, 0).astype(self.dsttype)
output = output.astype(self.dtype)
self.attrs['fuse_bias'] = self.fuse_bias
self.attrs['fuse_relu'] = self.fuse_relu
self.attrs['fuse_residual_connection'] = self.fuse_residual_connection
self.outputs['Output'] = output
class TestWithFuse(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [1, 1]
self.fuse_bias = True
self.bias_size = [6]
self.fuse_residual_connection = True
self.input_residual_size = [2, 6, 5, 5]
def test_check_grad(self):
pass
def test_check_grad_no_filter(self):
pass
def test_check_grad_no_input(self):
pass
class TestWithPadWithBias(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [1, 1]
self.input_size = [2, 3, 6, 6]
class TestWithStride(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6]
class TestWithGroup(TestConv2dMKLDNNOp):
def init_group(self):
self.groups = 3
class TestWith1x1(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.filter_size = [6, 3, 1, 1]
class TestWithInput1x1Filter1x1(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.input_size = [2, 3, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1]
def init_group(self):
self.groups = 3
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -18,6 +18,24 @@ import unittest ...@@ -18,6 +18,24 @@ import unittest
from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5 from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5
def create_test_mkldnn_use_ceil_class(parent):
class TestMKLDNNPool2DUseCeilCase(parent):
def init_kernel_type(self):
self.use_mkldnn = True
def init_ceil_mode(self):
self.ceil_mode = True
cls_name = "{0}_{1}".format(parent.__name__, "MKLDNNCeilModeCast")
TestMKLDNNPool2DUseCeilCase.__name__ = cls_name
globals()[cls_name] = TestMKLDNNPool2DUseCeilCase
create_test_mkldnn_use_ceil_class(TestPool2D_Op)
create_test_mkldnn_use_ceil_class(TestCase1)
create_test_mkldnn_use_ceil_class(TestCase2)
def create_test_mkldnn_class(parent): def create_test_mkldnn_class(parent):
class TestMKLDNNCase(parent): class TestMKLDNNCase(parent):
def init_kernel_type(self): def init_kernel_type(self):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from paddle.fluid.tests.unittests.op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp, stable_softmax
from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_mkldnn = True
class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
# Check if primitives already exist in backward
class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase):
def setUp(self):
super(TestSoftmaxMKLDNNPrimitivesAlreadyExist, self).setUp()
np.random.seed(123)
self.op_type = 'softmax'
self.x = np.random.uniform(-1, 1, 2).astype(np.float32)
self.out = stable_softmax(self.x)
self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
self.x_grad = self.__softmax_bwd(self.out, self.out_grad)
# Softmax grad calculation
def __softmax_bwd(self, out, out_grad):
return out * (out_grad - np.dot(out, out_grad))
def test_check(self):
check_if_mkldnn_primitives_exist_in_bwd(
self, self.op_type, self.x, self.out, self.out_grad, self.x_grad)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from paddle.fluid.tests.unittests.test_adam_op import TestAdamOp1, TestAdamOp2, TestAdamOpMultipleSteps, TestSparseAdamOp
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from paddle.fluid.tests.unittests.test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3
if __name__ == '__main__':
unittest.main()
...@@ -26,6 +26,7 @@ class TestActivation(OpTest): ...@@ -26,6 +26,7 @@ class TestActivation(OpTest):
self.op_type = "exp" self.op_type = "exp"
self.dtype = np.float32 self.dtype = np.float32
self.init_dtype() self.init_dtype()
self.init_kernel_type()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.exp(x) out = np.exp(x)
...@@ -44,6 +45,9 @@ class TestActivation(OpTest): ...@@ -44,6 +45,9 @@ class TestActivation(OpTest):
def init_dtype(self): def init_dtype(self):
self.dtype = np.float32 self.dtype = np.float32
def init_kernel_type(self):
pass
class TestSigmoid(TestActivation): class TestSigmoid(TestActivation):
def setUp(self): def setUp(self):
...@@ -601,6 +605,25 @@ class TestSwish(TestActivation): ...@@ -601,6 +605,25 @@ class TestSwish(TestActivation):
self.check_grad(['X'], 'Out', max_relative_error=0.008) self.check_grad(['X'], 'Out', max_relative_error=0.008)
#------------------ Test Cudnn Activation----------------------
def create_test_act_cudnn_class(parent, atol=1e-3, grad_atol=1e-3):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestActCudnn(parent):
def init_kernel_type(self):
self.attrs = {"use_cudnn": True}
cls_name = "{0}_{1}".format(parent.__name__, "cudnn")
TestActCudnn.__name__ = cls_name
globals()[cls_name] = TestActCudnn
create_test_act_cudnn_class(TestRelu)
create_test_act_cudnn_class(TestRelu6)
create_test_act_cudnn_class(TestSigmoid)
create_test_act_cudnn_class(TestTanh)
#------------------ Test Fp16 ---------------------- #------------------ Test Fp16 ----------------------
def create_test_act_fp16_class(parent, def create_test_act_fp16_class(parent,
atol=1e-3, atol=1e-3,
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
class TestAllocContinuousSpace(OpTest):
def setUp(self):
self.op_type = "alloc_continuous_space"
self.dtype = np.float32
attrs = self.init_attr()
self.copy_data = attrs["copy_data"]
self.constant = attrs["constant"]
self.set_constant = attrs["set_constant"]
self.Inputs = self.init_input()
self.FusedOutput = self.init_output(self.Inputs, self.set_constant,
self.constant)
self.inputs = {'Input': self.Inputs}
self.attrs = attrs
self.outputs = {'Output': self.Inputs, 'FusedOutput': self.FusedOutput}
def init_dtype(self):
self.dtype = np.float32
def init_input(self):
inputs = []
inputs.append(("x1", np.random.random([20, 3]).astype(self.dtype)))
inputs.append(("x2", np.random.random([20]).astype(self.dtype)))
inputs.append(("x3", np.random.random([1]).astype(self.dtype)))
inputs.append(("x4", np.random.random([200, 30]).astype(self.dtype)))
inputs.append(("x5", np.random.random([30]).astype(self.dtype)))
inputs.append(("x6", np.random.random([1]).astype(self.dtype)))
return inputs
def init_attr(self):
return {"copy_data": True, "set_constant": False, "constant": 0.0}
def init_output(self, input_list, set_constant, constant):
inputs = [input[1].flatten() for input in input_list]
output = np.concatenate(inputs)
if set_constant:
output = np.ones((len(output))) * constant
return output
def test_check_output(self):
self.check_output()
class TestAllocContinuousSpace2(TestAllocContinuousSpace):
def init_attr(self):
return {"copy_data": False, "set_constant": True, "constant": 0.5}
def test_check_output(self):
self.check_output(no_check_set=["Output"])
if __name__ == '__main__':
unittest.main()
...@@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2): ...@@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
train_reader, multi_devices=use_parallel_executor) train_reader, multi_devices=use_parallel_executor)
exe = fluid.Executor(place) exe = fluid.Executor(place)
fluid.default_startup_program().random_seed = 1
fluid.default_main_program().random_seed = 1
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
train_cp = compiler.CompiledProgram(fluid.default_main_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program())
......
...@@ -35,7 +35,7 @@ class TestFakeQuantizeOp(OpTest): ...@@ -35,7 +35,7 @@ class TestFakeQuantizeOp(OpTest):
self.check_output() self.check_output()
class TestFakeQuantizeOp(OpTest): class TestFakeQuantizeRangeAbsMaxOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "fake_quantize_range_abs_max" self.op_type = "fake_quantize_range_abs_max"
self.attrs = { self.attrs = {
...@@ -43,8 +43,10 @@ class TestFakeQuantizeOp(OpTest): ...@@ -43,8 +43,10 @@ class TestFakeQuantizeOp(OpTest):
'window_size': int(1), 'window_size': int(1),
'is_test': False 'is_test': False
} }
x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
x = x.astype("float32")
self.inputs = { self.inputs = {
'X': np.random.random((8, 16, 7, 7)).astype("float32"), 'X': x,
'Iter': np.zeros(1).astype("int64"), 'Iter': np.zeros(1).astype("int64"),
'InScale': np.zeros(1).astype("float32") 'InScale': np.zeros(1).astype("float32")
} }
...@@ -62,5 +64,36 @@ class TestFakeQuantizeOp(OpTest): ...@@ -62,5 +64,36 @@ class TestFakeQuantizeOp(OpTest):
self.check_output() self.check_output()
class TestFakeQuantizeRangeAbsMaxOp2(OpTest):
def setUp(self):
self.op_type = "fake_quantize_range_abs_max"
self.attrs = {
'bit_length': int(8),
'window_size': int(1),
'is_test': True
}
x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
x = x.astype("float32")
scale = np.max(np.abs(x)).astype("float32") - 1.0
out_scales = np.zeros(self.attrs['window_size']).astype("float32")
out_scales[0] = scale
self.inputs = {
'X': x,
'Iter': np.zeros(1).astype("int64"),
'InScale': scale.astype("float32")
}
xs = np.clip(x, -scale, scale)
qs = np.round(xs / scale * ((1 << (self.attrs['bit_length'] - 1)) - 1))
self.outputs = {
'Out': qs,
'OutScale': scale.astype("float32"),
'OutScales': out_scales,
}
def test_check_output(self):
self.check_output(no_check_set=set(['OutScale', 'OutScales']))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function
import contextlib import contextlib
import unittest import unittest
import numpy as np import numpy as np
...@@ -142,8 +144,6 @@ class TestImperativeMnist(unittest.TestCase): ...@@ -142,8 +144,6 @@ class TestImperativeMnist(unittest.TestCase):
sgd.minimize(avg_loss) sgd.minimize(avg_loss)
mnist.clear_gradients() mnist.clear_gradients()
fluid.default_main_program().global_block()._clear_block()
dy_param_value = {} dy_param_value = {}
for param in mnist.parameters(): for param in mnist.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param._numpy()
......
...@@ -243,7 +243,9 @@ class TestImperativePtbRnn(unittest.TestCase): ...@@ -243,7 +243,9 @@ class TestImperativePtbRnn(unittest.TestCase):
dy_loss = None dy_loss = None
last_hidden = None last_hidden = None
last_cell = None last_cell = None
for i in range(2): batch_num = 50
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps, 1))
...@@ -302,7 +304,7 @@ class TestImperativePtbRnn(unittest.TestCase): ...@@ -302,7 +304,7 @@ class TestImperativePtbRnn(unittest.TestCase):
static_loss_value = None static_loss_value = None
static_last_cell_value = None static_last_cell_value = None
static_last_hidden_value = None static_last_hidden_value = None
for i in range(2): for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1)) x_data = x_data.reshape((-1, num_steps, 1))
......
...@@ -231,7 +231,7 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -231,7 +231,7 @@ class TestImperativeResnet(unittest.TestCase):
seed = 90 seed = 90
batch_size = train_parameters["batch_size"] batch_size = train_parameters["batch_size"]
batch_num = 2 batch_num = 20
with fluid.imperative.guard(): with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
...@@ -286,8 +286,6 @@ class TestImperativeResnet(unittest.TestCase): ...@@ -286,8 +286,6 @@ class TestImperativeResnet(unittest.TestCase):
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
resnet.clear_gradients() resnet.clear_gradients()
fluid.default_main_program().global_block()._clear_block()
dy_param_value = {} dy_param_value = {}
for param in resnet.parameters(): for param in resnet.parameters():
dy_param_value[param.name] = param._numpy() dy_param_value[param.name] = param._numpy()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# nlp model stack of op operate on lod. It's a classical test case in optimize pass.
from __future__ import print_function
import paddle.fluid as fluid
import unittest
from ir_memory_optimize_net_base import TestIrMemOptBase
def lstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestIrMemOptRNN(TestIrMemOptBase):
def setUp(self):
self.network = lstm_net
if __name__ == "__main__":
unittest.main()
...@@ -13,21 +13,44 @@ ...@@ -13,21 +13,44 @@
# limitations under the License. # limitations under the License.
import os import os
import sys
import unittest import unittest
from timeit import default_timer as timer
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.dataset.wmt16 as wmt16
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
os.environ[ os.environ[
'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio' 'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from test_parallel_executor_transformer import TestTransformer from test_parallel_executor_transformer import transformer, ModelHyperParams, transformer_model, transformer, prepare_batch_input
from test_parallel_executor_transformer import transformer from parallel_executor_test_base import TestParallelExecutorBase
# NOTE(dzhwinter): test diferent strategy colisions. # NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default. # open the eager delete tensor strategy by default.
class TestTransformerWithIR(TestTransformer): class TestTransformerWithIR(TestParallelExecutorBase):
@classmethod
def setUpClass(cls):
os.environ['CPU_NUM'] = str(4)
reader = paddle.batch(
wmt16.train(ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=transformer_model.batch_size)
with fluid.recordio_writer.create_recordio_writer(
os.environ.get("RECORDIO_FILENAME")) as writer:
for batch in reader():
for tensor in prepare_batch_input(
batch, ModelHyperParams.src_pad_idx,
ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head):
t = fluid.LoDTensor()
t.set(tensor, fluid.CPUPlace())
writer.append_tensor(t)
writer.complete_append_tensor()
def test_main(self): def test_main(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
# check python transpiler # check python transpiler
...@@ -35,13 +58,15 @@ class TestTransformerWithIR(TestTransformer): ...@@ -35,13 +58,15 @@ class TestTransformerWithIR(TestTransformer):
transformer, transformer,
use_cuda=True, use_cuda=True,
memory_opt=True, memory_opt=True,
use_ir_memory_optimize=False) use_ir_memory_optimize=False,
iter=2)
# check IR memory optimize # check IR memory optimize
self.check_network_convergence( self.check_network_convergence(
transformer, transformer,
use_cuda=True, use_cuda=True,
memory_opt=False, memory_opt=False,
use_ir_memory_optimize=True) use_ir_memory_optimize=True,
iter=2)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values): ...@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values):
return values[len(values) - 1] return values[len(values) - 1]
def cosine_decay(global_step, learning_rate, step_each_epoch, epochs):
cur_epoch = math.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
math.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
class TestLearningRateDecay(unittest.TestCase): class TestLearningRateDecay(unittest.TestCase):
def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs): def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
...@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase): ...@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase):
"boundaries": [3, 6, 9], "boundaries": [3, 6, 9],
"values": [0.1, 0.2, 0.3, 0.4] "values": [0.1, 0.2, 0.3, 0.4]
}), }),
(cosine_decay, layers.cosine_decay, {
"learning_rate": 0.1,
"step_each_epoch": 100,
"epochs": 120
}),
] ]
for py_decay_fn, fluid_decay_fn, kwargs in decay_fns: for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
......
...@@ -24,17 +24,28 @@ from op_test import OpTest ...@@ -24,17 +24,28 @@ from op_test import OpTest
class TestSGDOp(OpTest): class TestSGDOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "sgd" self.op_type = "sgd"
w = np.random.random((102, 105)).astype("float32") self.conf()
g = np.random.random((102, 105)).astype("float32") w = np.random.random((self.h, self.w)).astype("float32")
g = np.random.random((self.h, self.w)).astype("float32")
lr = np.array([0.1]).astype("float32") lr = np.array([0.1]).astype("float32")
self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr} self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
self.outputs = {'ParamOut': w - lr * g} self.outputs = {'ParamOut': w - lr * g}
def conf(self):
self.h = 102
self.w = 105
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
class TestSGDOpCase8X(TestSGDOp):
def conf(self):
self.h = 10
self.w = 64
class TestSparseSGDOp(unittest.TestCase): class TestSparseSGDOp(unittest.TestCase):
def check_with_place(self, place): def check_with_place(self, place):
scope = core.Scope() scope = core.Scope()
...@@ -42,12 +53,12 @@ class TestSparseSGDOp(unittest.TestCase): ...@@ -42,12 +53,12 @@ class TestSparseSGDOp(unittest.TestCase):
# create and initialize Grad Variable # create and initialize Grad Variable
height = 10 height = 10
rows = [0, 4, 7] rows = [0, 4, 7]
row_numel = 12 self.conf()
grad_selected_rows = scope.var('Grad').get_selected_rows() grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(height) grad_selected_rows.set_height(height)
grad_selected_rows.set_rows(rows) grad_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), row_numel)).astype("float32") np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0 np_array[0, 0] = 2.0
np_array[2, 8] = 4.0 np_array[2, 8] = 4.0
...@@ -56,7 +67,7 @@ class TestSparseSGDOp(unittest.TestCase): ...@@ -56,7 +67,7 @@ class TestSparseSGDOp(unittest.TestCase):
# create and initialize Param Variable # create and initialize Param Variable
param = scope.var('Param').get_tensor() param = scope.var('Param').get_tensor()
param_array = np.full((height, row_numel), 5.0).astype("float32") param_array = np.full((height, self.row_numel), 5.0).astype("float32")
param.set(param_array, place) param.set(param_array, place)
# create and initialize LeraningRate Variable # create and initialize LeraningRate Variable
...@@ -98,6 +109,14 @@ class TestSparseSGDOp(unittest.TestCase): ...@@ -98,6 +109,14 @@ class TestSparseSGDOp(unittest.TestCase):
for place in places: for place in places:
self.check_with_place(place) self.check_with_place(place)
def conf(self):
self.row_numel = 12
class TestSparseSGDOpCase8X(TestSparseSGDOp):
def conf(self):
self.row_numel = 16
class TestSGDOpOptimizeSelectedRows(unittest.TestCase): class TestSGDOpOptimizeSelectedRows(unittest.TestCase):
def check_with_place(self, place): def check_with_place(self, place):
......
...@@ -144,15 +144,5 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp): ...@@ -144,15 +144,5 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp):
return [2, 3, 4, 5] return [2, 3, 4, 5]
class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_mkldnn = True
class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
import os import os
import sys
import numpy as np import numpy as np
from .. import core from .. import core
from ..framework import Program from ..framework import Program
...@@ -50,6 +51,9 @@ class InferenceTranspiler(object): ...@@ -50,6 +51,9 @@ class InferenceTranspiler(object):
place (Place): inference place place (Place): inference place
scope (Scope|None): inference Scope scope (Scope|None): inference Scope
''' '''
sys.stderr.write("InferenceTranspiler is deprecated since it's not "
"safe. Users should be "
"responsible for constructing the inference program\n")
if not isinstance(program, Program): if not isinstance(program, Program):
raise TypeError("program should be as Program type") raise TypeError("program should be as Program type")
if not isinstance(place, core.CPUPlace) and not isinstance( if not isinstance(place, core.CPUPlace) and not isinstance(
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
import six import six
import sys
from collections import defaultdict, MutableSet from collections import defaultdict, MutableSet
from .. import core from .. import core
from ... import compat as cpt from ... import compat as cpt
...@@ -509,6 +510,8 @@ def memory_optimize(input_program, ...@@ -509,6 +510,8 @@ def memory_optimize(input_program,
Returns: Returns:
None None
""" """
sys.stderr.write('memory_optimize is deprecated. '
'Use CompiledProgram and Executor\n')
def to_name_str(var): def to_name_str(var):
if isinstance(var, Variable): if isinstance(var, Variable):
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import os import os
import six
class PlotData(object): class PlotData(object):
...@@ -60,9 +61,9 @@ class Ploter(object): ...@@ -60,9 +61,9 @@ class Ploter(object):
def append(self, title, step, value): def append(self, title, step, value):
""" """
Feed data Feed data
Args: Args:
title: assign the group data to this subtitle. title: assign the group data to this subtitle.
step: the x_axis of data. step: the x_axis of data.
value: the y_axis of data. value: the y_axis of data.
...@@ -71,9 +72,9 @@ class Ploter(object): ...@@ -71,9 +72,9 @@ class Ploter(object):
.. code-block:: python .. code-block:: python
plot_curve = Ploter("Curve 1","Curve 2") plot_curve = Ploter("Curve 1","Curve 2")
plot_curve.append(title="Curve 1",step=1,value=1) plot_curve.append(title="Curve 1",step=1,value=1)
""" """
assert isinstance(title, basestring) assert isinstance(title, six.string_types)
assert self.__plot_data__.has_key(title) assert title in self.__plot_data__
data = self.__plot_data__[title] data = self.__plot_data__[title]
assert isinstance(data, PlotData) assert isinstance(data, PlotData)
data.append(step, value) data.append(step, value)
...@@ -89,7 +90,7 @@ class Ploter(object): ...@@ -89,7 +90,7 @@ class Ploter(object):
.. code-block:: python .. code-block:: python
plot_curve = Ploter() plot_curve = Ploter()
plot_cure.plot() plot_cure.plot()
""" """
if self.__plot_is_disabled__(): if self.__plot_is_disabled__():
return return
......
...@@ -122,7 +122,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -122,7 +122,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
def create_dataset_from_list(self, path): def create_dataset_from_list(self, path):
data = [] data = []
label_set = [] label_set = []
for line in open(file_list): for line in open(path):
items = line.rstrip.split() items = line.rstrip.split()
image_path = items[0] image_path = items[0]
label_name = items[1] label_name = items[1]
...@@ -141,7 +141,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater): ...@@ -141,7 +141,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
path: the path of the image dataset. path: the path of the image dataset.
""" """
if self.from_list: if self.from_list:
return create_dataset_from_list(path) return self.create_dataset_from_list(path)
label_set = preprocess_util.get_label_set_from_dir(path) label_set = preprocess_util.get_label_set_from_dir(path)
data = [] data = []
for l_name in list(label_set.keys()): for l_name in list(label_set.keys()):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import ast
import hashlib
import importlib
import paddle.fluid
files = [
"paddle.fluid", "paddle.fluid.average", "paddle.fluid.backward",
"paddle.fluid.clip", "paddle.fluid.data_feeder", "paddle.fluid.executor",
"paddle.fluid.initializer", "paddle.fluid.io", "paddle.fluid.layers",
"paddle.fluid.metrics", "paddle.fluid.nets", "paddle.fluid.optimizer",
"paddle.fluid.profiler", "paddle.fluid.recordio_writer",
"paddle.fluid.regularizer", "paddle.fluid.transpiler"
]
def md5(doc):
hash = hashlib.md5()
hash.update(str(doc))
return hash.hexdigest()
def get_module():
for fi in files:
fi_lib = importlib.import_module(fi)
doc_function = getattr(fi_lib, "__all__")
for api in doc_function:
api_name = fi + "." + api
try:
doc_module = getattr(eval(api_name), "__doc__")
except:
pass
doc_md5_code = md5(doc_module)
doc_dict[api_name] = doc_md5_code
def doc_md5_dict(doc_md5_path):
with open(doc_md5_path, "rb") as f:
doc_md5 = f.read()
doc_md5_dict = ast.literal_eval(doc_md5)
return doc_md5_dict
def check_doc_md5():
for k, v in doc_dict.items():
try:
if doc_ci_dict[k] != v:
return doc_dict
except:
return doc_dict
return True
if __name__ == "__main__":
doc_dict = {}
doc_ci_dict = {}
doc_md5_file = "/root/.cache/doc_md5.txt"
if not os.path.exists(doc_md5_file):
os.mknod(doc_md5_file)
else:
doc_ci_dict = doc_md5_dict(doc_md5_file)
get_module()
if not os.path.getsize(doc_md5_file):
with open(doc_md5_file, 'w') as f:
f.write(str(doc_dict))
check_dic = True
print(check_dic)
else:
check_dic = check_doc_md5()
print(check_dic)
#!/bin/bash #!/bin/bash
TOTAL_ERRORS=0 TOTAL_ERRORS=0
if [[ ! $TRAVIS_BRANCH ]]; then
# install cpplint on local machine.
if [[ ! $(which cpplint) ]]; then
pip install cpplint
fi
# diff files on local machine.
files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}')
else
# diff files between PR and latest commit on Travis CI.
branch_ref=$(git rev-parse "$TRAVIS_BRANCH")
head_ref=$(git rev-parse HEAD)
files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}')
fi
# The trick to remove deleted files: https://stackoverflow.com/a/2413151 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do for file in $files; do
if [[ $file =~ ^(paddle/legacy/api/.*|paddle/legacy/capi/.*|paddle/contrib/.*|paddle/legacy/cuda/.*|paddle/legacy/function/.*|paddle/legacy/gserver/.*|paddle/legacy/math/.*|paddle/legacy/optimizer/.*|paddle/legacy/parameter/.*|paddle/legacy/pserver/.*|paddle/legacy/trainer/.*|paddle/legacy/utils/.*|paddle/testing/TestUtil.*|patches/grpc/.*) ]]; then if [[ $file =~ ^(patches/grpc/.*) ]]; then
continue; continue;
else else
cpplint --filter=-readability/fn_size $file; cpplint --filter=-readability/fn_size $file;
...@@ -13,4 +25,3 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do ...@@ -13,4 +25,3 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
done done
exit $TOTAL_ERRORS exit $TOTAL_ERRORS
...@@ -26,4 +26,10 @@ for each_diff in result: ...@@ -26,4 +26,10 @@ for each_diff in result:
print(each_diff) print(each_diff)
if error: if error:
print(
'''If you modify/add/delete the API files, including code and comment, please follow these steps in order to pass the CI:
1. cd ${paddle_path}, compile paddle;
2. pip install build/python/dist/(build whl package);
3. run "python tools/print_signatures.py paddle.fluid, paddle.reader > paddle/fluid/API.spec"'''
)
sys.exit(1) sys.exit(1)
...@@ -24,12 +24,19 @@ import inspect ...@@ -24,12 +24,19 @@ import inspect
import collections import collections
import sys import sys
import pydoc import pydoc
import hashlib
member_dict = collections.OrderedDict() member_dict = collections.OrderedDict()
experimental_namespace = {"paddle.fluid.imperative"} experimental_namespace = {"paddle.fluid.imperative"}
def md5(doc):
hash = hashlib.md5()
hash.update(str(doc).encode('utf-8'))
return hash.hexdigest()
def visit_member(parent_name, member): def visit_member(parent_name, member):
cur_name = ".".join([parent_name, member.__name__]) cur_name = ".".join([parent_name, member.__name__])
if inspect.isclass(member): if inspect.isclass(member):
...@@ -39,7 +46,10 @@ def visit_member(parent_name, member): ...@@ -39,7 +46,10 @@ def visit_member(parent_name, member):
visit_member(cur_name, value) visit_member(cur_name, value)
elif callable(member): elif callable(member):
try: try:
member_dict[cur_name] = inspect.getargspec(member) doc = ('document', md5(member.__doc__))
args = inspect.getargspec(member)
all = (args, doc)
member_dict[cur_name] = all
except TypeError: # special for PyBind method except TypeError: # special for PyBind method
member_dict[cur_name] = " ".join([ member_dict[cur_name] = " ".join([
line.strip() for line in pydoc.render_doc(member).split('\n') line.strip() for line in pydoc.render_doc(member).split('\n')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册