提交 e70b1727 编写于 作者: Q Qiao Longfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add-async-ssa-graph-executor

......@@ -42,12 +42,6 @@ repos:
entry: bash ./tools/codestyle/pylint_pre_commit.hook
language: system
files: \.(py)$
- repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
hooks:
- id: go-fmt
types:
- go
- repo: local
hooks:
- id: copyright_checker
......
......@@ -39,8 +39,10 @@ IF(WIN32)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll)
ELSE()
SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
ELSE()
#TODO(intel-huying):
# Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
......
......@@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject)
SET(NGRAPH_PROJECT "extern_ngraph")
SET(NGRAPH_GIT_TAG "20bd8bbc79ae3a81c57313846a2be7313e5d1dab")
SET(NGRAPH_GIT_TAG "a444f7a959b7d87f2c117c9b57a4c387759e481e")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
......@@ -69,7 +69,7 @@ ExternalProject_Add(
CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
)
......
......@@ -153,7 +153,11 @@ function(op_library TARGET)
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN
......@@ -168,6 +172,9 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
else()
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
endif()
......
paddle.fluid.Program.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.block ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.clone ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.program_guard ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.name_scope ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.scope_guard ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.Program.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', 'af5346376065ff4cf6832a8ac0ae0945'))
paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'ebb7765b2962bd2be041d19720e49d0f'))
paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5e162d3bf8dd625703463d9e4be36adb'))
paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'cfb7e05a002b2e64650778cabde7301c'))
paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1c8647b14fe57c7824b1c9562394dd3c'))
paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5'))
paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15'))
paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd'))
paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659'))
paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6'))
paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '78e512cabeda9c7f42cb7c7e88967ae7'))
paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45'))
paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0'))
paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2'))
paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c'))
paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff'))
paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4'))
paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4'))
paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, ''))
paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12))
paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False))
paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.CompiledProgram.__init__ ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None)
paddle.fluid.CompiledProgram.with_data_parallel ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.CompiledProgram.with_inference_optimize ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None)
paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '2cb4bd74481861345c70228a0f57620c'))
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '8e7bb21e83ff4604f5b379672e285b94'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '368f638b99f1dfe59e9b02aa6f077752'))
paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4294493e31c4bc9fc4bd48753044235f'))
paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21'))
paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766'))
paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690'))
paddle.fluid.AsyncExecutor.__init__ (ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.AsyncExecutor.config_distributed_nodes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4810dbe1870452f16b3c60b6c5fd1459'))
paddle.fluid.AsyncExecutor.download_data (ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)), ('document', '799a2066cc26819f1ed31f47c15ad083'))
paddle.fluid.AsyncExecutor.get_instance (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f8688f76a2db1243c7097a60c507b182'))
paddle.fluid.AsyncExecutor.init_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '504f39be2007404a17e5cabea1256c7d'))
paddle.fluid.AsyncExecutor.init_server (ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None), ('document', 'c403ab46c5d3ef25c0f7e94ae75dcb68'))
paddle.fluid.AsyncExecutor.init_worker (ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None), ('document', 'dcf08f4bf2f3282acf11391f5d39c536'))
paddle.fluid.AsyncExecutor.run (ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)), ('document', '848fc53484e8326f6325feea87fe955c'))
paddle.fluid.AsyncExecutor.save_model (ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None), ('document', 'c8ac0dfcb3b187aba25d03af7fea56b2'))
paddle.fluid.AsyncExecutor.stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5f23d043607bb5d55e466ec3f578e093'))
paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'e1af7fd53cf868554f312779fc803864'))
paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8'))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True))
paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0))
paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0))
paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None))
paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None))
paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False))
paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False))
paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100))
paddle.fluid.layers.bpr_loss ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None))
paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
paddle.fluid.layers.data_norm ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'use_mkldnn', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, False, None, None, None, False))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.reduce_max ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer'))
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False))
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
paddle.fluid.layers.sampled_softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0))
paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False))
paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False))
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None))
paddle.fluid.layers.squeeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.lrn ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None))
paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None))
paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None))
paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1))
paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1))
paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True))
paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.selu ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None))
paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None))
paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None))
paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None))
paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None))
paddle.fluid.layers.soft_relu ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None))
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None))
paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
paddle.fluid.layers.sum ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1))
paddle.fluid.layers.shuffle_channel ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.teacher_student_sigmoid_loss ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0))
paddle.fluid.layers.huber_loss ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.tree_conv ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shuffle ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Preprocessor.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Preprocessor.outputs ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.load ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_tensor ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False))
paddle.fluid.layers.create_parameter ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None))
paddle.fluid.layers.create_global_var ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None))
paddle.fluid.layers.cast ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.tensor_array_to_tensor ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.concat ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.sums ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.assign ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.fill_constant_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0))
paddle.fluid.layers.fill_constant ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.argmin ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.argmax ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None))
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.memory ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32'))
paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
paddle.fluid.layers.StaticRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.step ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.step_output ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.update_memory ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.reorder_lod_tensor_by_rank ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both'))
paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.floor ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cos ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sin ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.round ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0))
paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False))
paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None))
paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False))
paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0))
paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None))
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.generate_mask_labels ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0))
paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False))
paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32'))
paddle.fluid.contrib.StateCell.__init__ ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.StateCell.compute_state ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.get_input ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.get_state ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.out_state ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.set_state ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.state_updater ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.StateCell.update_states ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.TrainingDecoder.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None))
paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.Calibrator.__init__ ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.contrib.Calibrator.sample_data ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.Calibrator.save_int8_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.reader.ctr_reader.ctr_reader ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.CompressPass.apply ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.ImitationGraph.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.ImitationGraph.all_parameters ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.__init__ ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.MagnitudePruner.__init__ ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.MagnitudePruner.prune ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.RatioPruner.__init__ ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.RatioPruner.prune ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True))
paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5))
paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,))
paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b55d6193a1d4198d45b013fc5779e1f2'))
paddle.fluid.io.save_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '3a7a99abac3e1bf898871fe609354218'))
paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9141bb5f32caf7975eb3fd88c8a1b2da'))
paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95'))
paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2'))
paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2'))
paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '582d87b8df75a5a639a107db8ff86f9c'))
paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb'))
paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.TruncatedNormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.XavierInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.initializer.BilinearInitializer.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd389912dc079cbef432335a00017cec0'))
paddle.fluid.initializer.MSRAInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)), ('document', '53c757bed9345f2ad3361902531e7cf5'))
paddle.fluid.initializer.force_init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6d0f3e22c90d9d500d36ff57daf056ee'))
paddle.fluid.initializer.init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'a6d7011ca3d8c0d454dac3a56eae0c29'))
paddle.fluid.initializer.NumpyArrayInitializer.__init__ (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)), ('document', '1929058262994f212620599c63aea6bd'))
paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '89c2c55a0b0656b106064048e068e77a'))
paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', 'dfbb624f85015df29e994ca6999e8ff6'))
paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'b4b608b986eb9617aa0525e1be21d32d'))
paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '4ec4845fd7d991bcac822f8b0dfc101f'))
paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', 'e0e2439f7af069b57badca18a6ba60b8'))
paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)), ('document', '7c49ef4bbf0adfd4b9a1d98e2e5f3fea'))
paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '7642373ab65d3fc3b96d16d10fef1538'))
paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', 'd740824aa7316b807c4b4a3c6c8c0bbe'))
paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '025b364dafb4b7975c801eb33e7831a1'))
paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '30add751a0f99347a6257634c03ff254'))
paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', '44b6eef4a0f2bc15f7d9745782406736'))
paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ee152a7ba3036e7b9ede9184545179b4'))
paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)), ('document', 'b6543768e1afaa2ecb869709d6e9c7e2'))
paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '8ca6121acd6d23cd8806a93f493c2e17'))
paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b'))
paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8'))
paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4'))
paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b'))
paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa'))
paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625'))
paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95'))
paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '120f4323a3d7ed9c0916f15a59f0e497'))
paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', 'c527b71b8a4c60dca8df8a745c2b598d'))
paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', 'e45e09e65a2658e07cad987222f0d9ab'))
paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b0b8d53821716cd50c42e09b593f3feb'))
paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '03993955ab1e6d3044c44e6f17fc85e9'))
paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'ec113c6a3686ac94f8fccd1a7953d445'))
paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '79c375214fa427faac504043d162dae9'))
paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d2611f84ab364c5da545e6a82f1770a'))
paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6a1adf3067b20f6e4bcb354d71c19184'))
paddle.fluid.layers.sequence_unpad (ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd12803c903c99aa36ec03aaac5f0cc5b'))
paddle.fluid.layers.lstm_unit (ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)), ('document', '027723966f3ef0d7bc598f22287a96cc'))
paddle.fluid.layers.reduce_sum (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'b69998ce3ff4980fb21da0df05565f1b'))
paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd4d80dd98a1a5839f41eeb3a0f85f370'))
paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4'))
paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c'))
paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca'))
paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3'))
paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce'))
paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6'))
paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'dc7042734c6d8b8ce97321f017f01d6f'))
paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6'))
paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2'))
paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571'))
paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990'))
paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9'))
paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32'))
paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab'))
paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'a10ab9bf88d4a7e328882d411abb6fd1'))
paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1feac48b843d679db82312dc85885f4'))
paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', '3ce01160ede80b1c26f776f8fef9340f'))
paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', 'fddad4896dee5193e1cdf70882c2a347'))
paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', '5db30b8a74e8c93687943a3e8d221da0'))
paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d'))
paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996'))
paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e'))
paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee'))
paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e'))
paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b'))
paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, True, False)), ('document', 'bce1b75e3d95b75cacd1099655cbb3c3'))
paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c6b175d253c55baf4b9c0eca9b1dda88'))
paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None), ('document', '6148b6a555cbfb62fdcd030d8982c18c'))
paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '3f6c828594720c9b2da89c464be94478'))
paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', '323c019f257e55ddea4a824a362de62f'))
paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3229d06517f794e86ca3da14c38b1465'))
paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'bbd62da391b1df984a1909d069a759b2'))
paddle.fluid.layers.lod_reset (ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f122194c562bd674f6ecdccf33785f99'))
paddle.fluid.layers.lrn (ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)), ('document', '0795e9940e42dcd62953514ff7e09f77'))
paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '2f28153bdd2d5ea6f7bad5867bd03eeb'))
paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', 'd2e1f45fef51b2c214e3f2aa8976c46c'))
paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '70c113658102a11cc5d8e3d45145737a'))
paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97'))
paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1'))
paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d'))
paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', 'b3ecb819454832885c1f0f3ab9a5b938'))
paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7'))
paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'e4fb4ed511b2293b8f04f7e872afbfd7'))
paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '735fa9758a6d7ff3b47d7b827f961c1d'))
paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2'))
paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342'))
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b'))
paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c9ab9e460ef0a1823249935a30e82c66'))
paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', '35cbbdfa585d027bb490707c95a176b9'))
paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '866ffa1cc93f29e23662b526a7596537'))
paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '9044c7fe667b76cb2d9264f2db11f417'))
paddle.fluid.layers.log (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '98247c59d1c9b40af6730001b2aea73d'))
paddle.fluid.layers.crop (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '883104791204d3127e24234bb630b2e7'))
paddle.fluid.layers.rank_loss (ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c542e39ac6add24a6bef6e79bf5617e2'))
paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '6d19dcc19917080b7ff3e03bde451bc8'))
paddle.fluid.layers.elu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '463258ee9f8b60760eb1e26357cc9bfa'))
paddle.fluid.layers.relu6 (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)), ('document', '6f367339caf6c7124bc262fe1475df70'))
paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'a5117c1eb84aca2ac0b0abab337a4799'))
paddle.fluid.layers.stanh (ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)), ('document', '959936a477efc6c1447a9c8bf8ce94bb'))
paddle.fluid.layers.hard_sigmoid (ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)), ('document', 'c82059b6fea1aa730f9aac911807b756'))
paddle.fluid.layers.swish (ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'ef745e55a48763ee7b46b21a81dc7e84'))
paddle.fluid.layers.prelu (ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f6acef7ff7d887e49ff499fbb1dad4a9'))
paddle.fluid.layers.brelu (ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)), ('document', '3db337c195e156e6ef2b8b4a57113600'))
paddle.fluid.layers.leaky_relu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)), ('document', 'f878486c82b576938151daad0de995a0'))
paddle.fluid.layers.soft_relu (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)), ('document', '869adce548c342d6cc1bd88a948d83c9'))
paddle.fluid.layers.flatten (ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'cb295c13cb957db85cd9609269d7784d'))
paddle.fluid.layers.sequence_mask (ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)), ('document', 'f0dd6eddd3bff015a3c05269d82fcbd8'))
paddle.fluid.layers.stack (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '367cfbb642839beacb5d117e2d2b4041'))
paddle.fluid.layers.pad2d (ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)), ('document', '7f4d46320cc077ca2e8db600c35f4030'))
paddle.fluid.layers.unstack (ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)), ('document', '98eb9d633116efcfc6f90c114bd44fd6'))
paddle.fluid.layers.sequence_enumerate (ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'f6028537085dc296103bbbd85fa7763d'))
paddle.fluid.layers.expand (ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '117d3607d1ffa0571835bbaebc7857ff'))
paddle.fluid.layers.sequence_concat (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3a1d155dd1bf6e72a0a3e3e1519591d1'))
paddle.fluid.layers.scale (ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)), ('document', '30190413b2fa442e7466d6cf2ce5ea07'))
paddle.fluid.layers.elementwise_add (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '6bfbe72cbadc95ac7ab88c05ed5bf9f0'))
paddle.fluid.layers.elementwise_div (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'cc6e6cc1cb942a152dde3ef08d5f165c'))
paddle.fluid.layers.elementwise_sub (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'a12abdab09c3e57af5a6e1e9f138684a'))
paddle.fluid.layers.elementwise_mul (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '422c77dbfcff355a57b5fdd4ec876daa'))
paddle.fluid.layers.elementwise_max (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'f0bb0b2c454541cfafa761021a5cc776'))
paddle.fluid.layers.elementwise_min (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '8a9cdefefbccbf9f6b0991c0946a21e9'))
paddle.fluid.layers.elementwise_pow (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '1aea4e197c552a284f83888a3c67a32e'))
paddle.fluid.layers.uniform_random_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)), ('document', '129e0a3257f1d532a948eedf9d5bf671'))
paddle.fluid.layers.gaussian_random (ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '389dafe36e099841b6a7fb18d11f1b4c'))
paddle.fluid.layers.sampling_id (ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '840fdac643d1341c1cae218d4511dbb9'))
paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '840026b4766613c5705e06563cd103b6'))
paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860'))
paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a'))
paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8'))
paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42'))
paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012'))
paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25'))
paddle.fluid.layers.logical_not (ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cd1c8cf31e040427d4e05711044caeb6'))
paddle.fluid.layers.clip (ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b020b7aab59719be98a4ae229a76deba'))
paddle.fluid.layers.clip_by_norm (ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1ea0bc5a926f427458c4254ca022749'))
paddle.fluid.layers.mean (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd638d915195ce86a8d7963b81110d4c8'))
paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)), ('document', 'ccd37fa6b53f074adbfb732d738c4c2d'))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '180c284317ea45ef89a460d8d79c0b72'))
paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '891870d069a6aea746d34cc53b61690c'))
paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e'))
paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed'))
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f'))
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35'))
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007'))
paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d'))
paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', '4b5a2341023afe63157a066c14254f98'))
paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '4b9c2e8af5817937d831820874b5aa77'))
paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'aa7540a0fa73ff69a02e11b4091aab75'))
paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'dc63315b84f591ac79ecca0c3632027a'))
paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6'))
paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932'))
paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949'))
paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb'))
paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d'))
paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99'))
paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7'))
paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc'))
paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e'))
paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca'))
paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0'))
paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3'))
paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff'))
paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '13dabc57863f62ab3141586784ee356b'))
paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '350f74d93fab9adb2ac4950f1c26416b'))
paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Preprocessor.outputs (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.load (ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d1a4bc97bbce9fa1d4f7a4200a771ff'))
paddle.fluid.layers.create_tensor (ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)), ('document', 'c0c3d0194f83fff8ea99ce0820657dae'))
paddle.fluid.layers.create_parameter (ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'd62b866c899bc1fedb5385f95b88e1f8'))
paddle.fluid.layers.create_global_var (ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)), ('document', 'ab914fac893607e29ac6e52bbdbea1a4'))
paddle.fluid.layers.cast (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '60cb8f843d625abf33f8bf12455b8f99'))
paddle.fluid.layers.tensor_array_to_tensor (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'b12717d3d4567e6119589f7f655b0cbb'))
paddle.fluid.layers.concat (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b19b79be4f05e85d1d6cec642c9fb535'))
paddle.fluid.layers.sums (ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', '42912092418620b4be07f36af31e7816'))
paddle.fluid.layers.assign (ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b690184f3537df5501e4d9d8f31152a5'))
paddle.fluid.layers.fill_constant_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0)), ('document', 'd4059a2f5763036b07018d76429f9acb'))
paddle.fluid.layers.fill_constant (ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)), ('document', '1d8b14729639fa38509c79b9784740fa'))
paddle.fluid.layers.argmin (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '2778a1d34be49263a51211885599ea37'))
paddle.fluid.layers.argmax (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '04114996cfb98994ba222804a1a6109f'))
paddle.fluid.layers.argsort (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '68ec45c6fb6b93e47de9c9a0945fb98e'))
paddle.fluid.layers.ones (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'b402489c62e668df42e7daceb63c142b'))
paddle.fluid.layers.zeros (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'c155e2efc56ffa5ed4658cca0272e491'))
paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None), ('document', '8ee7cb6ca639e7460e825f953b65d94d'))
paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '8f8c0306117ea441f20dcbbdba1f0ecc'))
paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8'))
paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.case (ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None), ('document', 'f7c7160014c1b46cfeda9dd5808d1789'))
paddle.fluid.layers.Switch.default (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '50853ae884df03d9c36703bb46d9ef07'))
paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', '73bb96ec4783ec1a11e760e8851b0e77'))
paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713'))
paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a'))
paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77'))
paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823'))
paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2'))
paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.output (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.IfElse.true_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.DynamicRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6d3e0a5d9aa519a9773a36e1620ea9b7'))
paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d'))
paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a'))
paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0'))
paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '169d694d2224f62b4f3afdc3dbc19e95'))
paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f'))
paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7'))
paddle.fluid.layers.StaticRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '3545f529ef04e8f6ecb76b47fa3df01a'))
paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', '5fef91b0e21c93610785f2b1f7161732'))
paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,)), ('document', 'bbe578dbb49ad13e15b014e98c22b519'))
paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '29a25ba78de79152076cacfc5443137d'))
paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '81ccb7acafd06c7728e11581f5d342e3'))
paddle.fluid.layers.exp (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e6b3e769413d96aab4176f96db25984b'))
paddle.fluid.layers.tanh (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9d586a0b5bd05f67ee78048f9d503b6'))
paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7'))
paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13'))
paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '072a8541e0f632366bba10f67cb0db27'))
paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd'))
paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad'))
paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973'))
paddle.fluid.layers.cos (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '485f2686bcc2fe37a4bd893769c8a3e2'))
paddle.fluid.layers.sin (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '01f1766aa76eff1df30147505b59f7c4'))
paddle.fluid.layers.round (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b47f5da13913d3e56bdb1e612a73f3f2'))
paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'cc6ac2f14f03c52aaa83a59bf83b8d26'))
paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee'))
paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a'))
paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148'))
paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', '308b619af849caa82bbc31e897f5e641'))
paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e'))
paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926'))
paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a'))
paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8'))
paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9'))
paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'a6ab47a2fe681e52fabb7057ddf0efdd'))
paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1'))
paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c'))
paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1'))
paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '6d5028fd09d01ab82d296adc0ea95aee'))
paddle.fluid.layers.detection_map (ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')), ('document', '1467d91b50c22cd52103b4aa1ee9d0a1'))
paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '1dddef3eb4b3cbd4df8e03ac480dbf97'))
paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '23337cc57bbf5be73884b6bd0f849603'))
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', '5761f9ed83654314416e24372b33bb84'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)), ('document', '87863717edeb7fe87a1268976cbc015d'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '57ab49f3f324f310b7eed322e7c1057a'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'f73706a65468e9ca3e0bee4a31521b0a'))
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e'))
paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b'))
paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691'))
paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
paddle.fluid.layers.accuracy (ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)), ('document', '9808534c12c5e739a10f73ebb0b4eafd'))
paddle.fluid.layers.auc (ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)), ('document', 'e0e95334fce92d16c2d9db6e7caffc47'))
paddle.fluid.layers.exponential_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '98a5050bee8522fcea81aa795adaba51'))
paddle.fluid.layers.natural_exp_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '676a7bc2a218691db50bca233903d21e'))
paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', 'd07e767d59c4a5e6c930f3e6756d3f82'))
paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40'))
paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae'))
paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99'))
paddle.fluid.contrib.StateCell.get_input (ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None), ('document', '6f24a007cfa184e32f01a960703bfd70'))
paddle.fluid.contrib.StateCell.get_state (ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None), ('document', '630a4945cfe659ea4f307598fbbce5d2'))
paddle.fluid.contrib.StateCell.out_state (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7ad681dff0393ddf13a724194e720f28'))
paddle.fluid.contrib.StateCell.set_state (ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None), ('document', 'd4e0e08cd5d9d9a571cbc52d114f5ae9'))
paddle.fluid.contrib.StateCell.state_updater (ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None), ('document', 'd5afe1b7665d94fb023b15cf913ca510'))
paddle.fluid.contrib.StateCell.update_states (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'fe0b0f1338723516a35a30247899c81b'))
paddle.fluid.contrib.TrainingDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.TrainingDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf'))
paddle.fluid.contrib.TrainingDecoder.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'f0a457dee586559036202087ce2eff69'))
paddle.fluid.contrib.TrainingDecoder.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'a024c72664fe815068423ba630b7658a'))
paddle.fluid.contrib.TrainingDecoder.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '4659db7a888a2495e71c1838a0483909'))
paddle.fluid.contrib.BeamSearchDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.BeamSearchDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf'))
paddle.fluid.contrib.BeamSearchDecoder.decode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1e47c60f080c1343ebb6ceaef89656b2'))
paddle.fluid.contrib.BeamSearchDecoder.early_stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a84a7454ed6707f79b9e954d92a7575'))
paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'aa89eb8fd5e4cabaf5cc1bcae14665a4'))
paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7'))
paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47'))
paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa'))
paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
paddle.fluid.contrib.build_compressor (ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.CompressPass.__init__ (ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.CompressPass.add_strategy (ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None), ('document', '3bf6010b6f47d3c86df0ec8957be95e0'))
paddle.fluid.contrib.CompressPass.apply (ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None), ('document', 'a92bf85d4b59bd4f2ac1706d7c4899a6'))
paddle.fluid.contrib.ImitationGraph.__init__ (ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.ImitationGraph.all_parameters (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.__init__ (ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d'))
paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645'))
paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7'))
paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196'))
paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8'))
paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e'))
paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634'))
paddle.fluid.contrib.HDFSClient.is_dir (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', '45bde1bae02605a205c8245b58b9156d'))
paddle.fluid.contrib.HDFSClient.is_exist (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', 'be9c94bccff7ba0c1d95883ac62b5864'))
paddle.fluid.contrib.HDFSClient.ls (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '808acac504870c7e46594b95674f8a86'))
paddle.fluid.contrib.HDFSClient.lsr (ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)), ('document', 'fae835aa3354eb6a0434c0f9ba3c2747'))
paddle.fluid.contrib.HDFSClient.make_local_dirs (ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None), ('document', 'e76b89c8e7f019b5da576c0026fcf689'))
paddle.fluid.contrib.HDFSClient.makedirs (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '44d9972aae390aedf40aaea731a37e4b'))
paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)), ('document', '0eb133644d9a9f4da45bb39261ff0955'))
paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665'))
paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c'))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff'))
paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4'))
paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4'))
paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.HashName.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.HashName.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.RoundRobin.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.transpiler.DistributeTranspilerConfig.__init__
paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True))
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.SGDOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdamOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdamaxOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.DecayedAdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None))
paddle.fluid.optimizer.FtrlOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.FtrlOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.RMSPropOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.AdadeltaOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None))
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.optimizer.ModelAverage.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.ModelAverage.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.LarsMomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', 'e0f67f35abf27f666f81003113b90244'))
paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', '48c434dd7bb827f69d90e5135d77470f'))
paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '08c1c57e1db6b20bf87b264cb7cf3ca8'))
paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', '921714c9bfb351b41403418265393203'))
paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970'))
paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715'))
paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe'))
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1a79bd7d10ae54ca763ec81bca36ba24'))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None 2. __init__(self: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
......@@ -482,38 +483,38 @@ paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray
paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False))
paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeeder.feed_parallel ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.clip.ErrorClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.clip.GradientClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.clip.GradientClipByNorm.__init__ ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None)
paddle.fluid.clip.GradientClipByGlobalNorm.__init__ ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',))
paddle.fluid.profiler.cuda_profiler ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.profiler.reset_profiler ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.profiler.profiler ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile'))
paddle.fluid.profiler.start_profiler ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None)
paddle.fluid.profiler.stop_profiler ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile'))
paddle.fluid.unique_name.generate ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None)
paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.unique_name.guard ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753'))
paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca'))
paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85'))
paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.clip.GradientClipByGlobalNorm.__init__ (ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.profiler.cuda_profiler (ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)), ('document', '2e2fb1cfc469a67f19fb578a2ed6be79'))
paddle.fluid.profiler.reset_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '397ce757fabbe5c622e0c3458c41fcd0'))
paddle.fluid.profiler.profiler (ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'bd3a07eeb68e384f4d2d416cb2e28d86'))
paddle.fluid.profiler.start_profiler (ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None), ('document', '88da8fb6dbebaee2f7520188a09574f9'))
paddle.fluid.profiler.stop_profiler (ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'a7500e39dd033f1e64f562e909333a8a'))
paddle.fluid.unique_name.generate (ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', '65c7523e86f0c50bb729b01667f36310'))
paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7'))
paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
paddle.reader.chain ArgSpec(args=[], varargs='readers', keywords=None, defaults=None)
paddle.reader.shuffle ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None)
paddle.reader.firstn ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None)
paddle.reader.xmap_readers ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,))
paddle.reader.PipeReader.__init__ ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain'))
paddle.reader.PipeReader.get_line ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n'))
paddle.reader.multiprocess_reader ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000))
paddle.reader.Fake.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.reader.creator.np_array ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.reader.creator.text_file ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None)
paddle.reader.creator.recordio ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,))
paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d'))
paddle.reader.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb'))
paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '884291104e1c3f37f33aae44b7deeb0d'))
paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4'))
paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d'))
paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad'))
paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560'))
paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69'))
paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada'))
paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a'))
paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8'))
......@@ -13,7 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/block_desc.h"
#include <queue>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
......@@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
ops_.erase(ops_.begin() + s, ops_.begin() + e);
}
void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) {
// TODO(minqiyang): make this faster
for (auto it = ops_.begin(); it != ops_.end(); ++it) {
if (it->get() == op_desc) {
ops_.erase(it);
break;
}
}
}
std::vector<OpDesc *> BlockDesc::AllOps() const {
std::vector<OpDesc *> res;
for (const auto &op : ops_) {
......@@ -163,20 +177,6 @@ std::vector<OpDesc *> BlockDesc::AllOps() const {
return res;
}
void BlockDesc::Clear() {
// clear all ops
ops_.clear();
// clear all vars which are not persistable
for (auto it = vars_.begin(); it != vars_.end();) {
if (it->second->Persistable()) {
++it;
} else {
vars_.erase(it++);
}
}
}
void BlockDesc::Flush() {
for (auto &op_desc : ops_) {
op_desc->Flush();
......
......@@ -93,12 +93,12 @@ class BlockDesc {
*/
void RemoveOp(size_t s, size_t e);
void RemoveOpInternal(const OpDesc *op_desc);
void RemoveVar(const std::string &name) { vars_.erase(name); }
std::vector<OpDesc *> AllOps() const;
void Clear();
size_t OpSize() const { return ops_.size(); }
OpDesc *Op(int idx) const { return ops_.at(idx).get(); }
......
......@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/details/fetch_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
......@@ -55,7 +57,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
std::vector<FetchOpHandle *> fetch_ops;
for (auto &fetch_var_name : fetch_tensors) {
for (auto &var_map : graph_->Get<details::GraphVars>("vars")) {
for (auto &var_map : graph_->Get<details::GraphVars>(details::kGraphVars)) {
auto it = var_map.find(fetch_var_name);
if (it != var_map.end()) {
fetched_vars[fetch_var_name].push_back(*it->second.rbegin());
......
......@@ -20,6 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/distributed/distributed.h"
......
......@@ -105,4 +105,5 @@ if (WITH_MKLDNN)
cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass)
endif ()
......@@ -44,10 +44,14 @@ struct TestIsReachable {
using func = std::function<bool(const std::string&, const std::string&)>;
auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
auto find_node = [](const std::unique_ptr<ir::Graph>& graph,
const std::string& name) -> Node* {
auto hash = [](const Node* node) -> std::string {
return node->Name() + std::to_string(node->id());
};
auto find_node = [&](const std::unique_ptr<ir::Graph>& graph,
const std::string& name) -> Node* {
for (auto& node : GraphTraits::DFS(*graph)) {
if (name == node.Name()) {
if (name == hash(&node)) {
return &node;
}
}
......@@ -55,13 +59,17 @@ struct TestIsReachable {
return nullptr;
};
return [&](std::string from, const std::string to) -> bool {
// update the from and to strings to hashed equivs in loop from graph traits
return [&](std::string from, std::string to) -> bool {
if (from == to) return true;
std::map<std::string, bool> visited;
for (auto& node : GraphTraits::DFS(*graph)) {
visited[node.Name()] = false;
auto hashed = hash(&node);
if (node.Name() == from) from = hashed;
if (node.Name() == to) to = hashed;
visited[hashed] = false;
}
visited[from] = true;
......@@ -72,15 +80,15 @@ struct TestIsReachable {
while (!queue.empty()) {
auto cur = find_node(graph, queue.front());
queue.pop_front();
if (cur == nullptr) return false;
for (auto n : cur->outputs) {
if (n->Name() == to) return true;
auto hashed_name = hash(n);
if (hashed_name == to) return true;
if (!visited[n->Name()]) {
visited[n->Name()] = true;
queue.push_back(n->Name());
if (!visited[hashed_name]) {
visited[hashed_name] = true;
queue.push_back(hashed_name);
}
}
}
......@@ -166,6 +174,28 @@ TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) {
RunPassAndAssert(&prog, "a", "relu", 1);
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionProjectionAsYWithElementwiseAddRelu) {
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e", "f"},
{"bias", "weights", "bias2", "weights2"});
SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
// right branch
SetOp(&prog, "conv2d",
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
{"Output", "c"});
// left branch
SetOp(&prog, "conv2d",
{{"Input", "a"}, {"Bias", "bias2"}, {"Filter", "weights2"}},
{"Output", "f"});
SetOp(&prog, "elementwise_add", {{"X", "f"}, {"Y", "c"}}, {"Out", "d"});
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
RunPassAndAssert(&prog, "a", "relu", 2);
}
TEST(ConvElementwiseAddMKLDNNFusePass,
ConvolutionAsYWithElementwiseAddReluNoBias) {
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
......
......@@ -21,7 +21,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy.";
VLOG(3) << "Applies MKL-DNN placement strategy.";
const auto& op_types_list =
Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
for (const Node* n : graph->Nodes()) {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
#include <gtest/gtest.h>
#include <boost/logic/tribool.hpp>
namespace paddle {
namespace framework {
namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, boost::tribool use_mkldnn) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type);
if (!boost::indeterminate(use_mkldnn)) op->SetAttr("use_mkldnn", use_mkldnn);
if (type == "conv2d") {
op->SetAttr("name", name);
op->SetInput("Input", {inputs[0]});
op->SetInput("Filter", {inputs[1]});
op->SetInput("Bias", {inputs[2]});
} else if (type == "relu") {
op->SetInput("X", inputs);
} else if (type == "concat") {
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0], inputs[1]});
} else if (type == "pool2d") {
op->SetInput("X", {inputs[0]});
} else {
FAIL() << "Unexpected operator type.";
}
op->SetOutput("Out", {outputs[0]});
}
// operator use_mkldnn
// ---------------------------------------
// (a,b)->concat->c none
// (c,weights,bias)->conv->f none
// f->relu->g false
// g->pool->h false
// (h,weights2,bias2)->conv->k true
// k->relu->l true
ProgramDesc BuildProgramDesc() {
ProgramDesc prog;
for (auto& v :
std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
"h", "weights2", "bias2", "k", "l"})) {
auto* var = prog.MutableBlock(0)->Var(v);
var->SetType(proto::VarType::SELECTED_ROWS);
if (v == "weights" || v == "bias") {
var->SetPersistable(true);
}
}
SetOp(&prog, "concat", "concat1", std::vector<std::string>({"a", "b"}),
std::vector<std::string>({"c"}), boost::indeterminate);
SetOp(&prog, "conv2d", "conv1",
std::vector<std::string>({"c", "weights", "bias"}),
std::vector<std::string>({"f"}), boost::indeterminate);
SetOp(&prog, "relu", "relu1", std::vector<std::string>({"f"}),
std::vector<std::string>({"g"}), false);
SetOp(&prog, "pool2d", "pool1", std::vector<std::string>({"g"}),
std::vector<std::string>({"h"}), false);
SetOp(&prog, "conv2d", "conv2",
std::vector<std::string>({"h", "weights2", "bias2"}),
std::vector<std::string>({"k"}), true);
SetOp(&prog, "relu", "relu2", std::vector<std::string>({"k"}),
std::vector<std::string>({"l"}), true);
return prog;
}
void MainTest(std::initializer_list<std::string> mkldnn_enabled_op_types,
unsigned expected_use_mkldnn_true_count) {
auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass");
pass->Set("mkldnn_enabled_op_types",
new std::unordered_set<std::string>(mkldnn_enabled_op_types));
graph = pass->Apply(std::move(graph));
unsigned use_mkldnn_true_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->HasAttr("use_mkldnn") &&
boost::get<bool>(op->GetAttr("use_mkldnn"))) {
++use_mkldnn_true_count;
}
}
}
EXPECT_EQ(use_mkldnn_true_count, expected_use_mkldnn_true_count);
}
TEST(MKLDNNPlacementPass, enable_conv_relu) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 0 pool
MainTest({"conv2d", "relu"}, 3);
}
TEST(MKLDNNPlacementPass, enable_relu_pool) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
MainTest({"relu", "pool2d"}, 4);
}
TEST(MKLDNNPlacementPass, enable_all) {
// 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
MainTest({}, 4);
}
} // namespace ir
} // namespace framework
} // namespace paddle
USE_PASS(mkldnn_placement_pass);
......@@ -290,7 +290,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
"USE_OP_DEVICE_KERNEL must be in global namespace"); \
extern int \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name(); \
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##DEFAULT_TYPE##_ = /* NOLINT */ \
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
......
......@@ -882,7 +882,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
const RuntimeContext& ctx_;
};
static void CheckTensorNANOrInf(const std::string& name,
static void CheckTensorNANOrInf(const std::string& op_type,
const std::string& name,
const framework::Tensor& tensor) {
if (tensor.memory_size() == 0) {
return;
......@@ -892,9 +893,9 @@ static void CheckTensorNANOrInf(const std::string& name,
return;
}
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
"Tensor %s contains Inf", name);
"Operator %s output Tensor %s contains Inf", op_type, name);
PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor),
"Tensor %s contains NAN", name);
"Operator %s output Tensor %s contains NAN", op_type, name);
}
void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
......@@ -988,9 +989,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto* var = exec_scope.FindVar(vname);
if (var == nullptr) continue;
if (var->IsType<framework::LoDTensor>()) {
CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
CheckTensorNANOrInf(type_, vname, var->Get<framework::LoDTensor>());
} else if (var->IsType<framework::SelectedRows>()) {
CheckTensorNANOrInf(vname, var->Get<framework::SelectedRows>().value());
CheckTensorNANOrInf(type_, vname,
var->Get<framework::SelectedRows>().value());
}
}
}
......
......@@ -24,3 +24,11 @@ limitations under the License. */
#pragma pop_macro("_XOPEN_SOURCE")
#pragma pop_macro("_POSIX_C_SOURCE")
#if !defined(PYBIND11_HIDDEN)
#ifdef _WIN32
#define PYBIND11_HIDDEN __declspec(dllexport)
#else
#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
#endif
#endif
......@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/tensor_util.h"
#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
......
......@@ -18,6 +18,7 @@
#include <limits>
#include <map>
#include <random>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/lod_tensor.h"
......@@ -139,6 +140,8 @@ class Autograd {
}
}
}
ready_op->InvokeBackwardHooks();
}
}
......@@ -156,8 +159,10 @@ class Autograd {
for (auto it : candidate->pre_ops_) {
for (OpBase* pre_op : it.second) {
if (!pre_op) continue;
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- "
<< it.first << " <---- " << pre_op->op_desc_->Type();
VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id "
<< candidate->trace_id_ << " <---- " << it.first << " <---- "
<< pre_op->op_desc_->Type() << " trace id "
<< pre_op->trace_id_;
if (visited.find(pre_op) == visited.end()) {
visited.insert(pre_op);
queue.push_back(pre_op);
......@@ -211,6 +216,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return {};
}
VLOG(3) << "apply op grad: " << op_desc_->Type();
std::vector<framework::VariableValueMap> grad_outputs;
if (backward_id_ > 0) {
VLOG(3) << "py_layer_grad";
......@@ -272,6 +278,22 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
return input_vars_;
}
void OpBase::InvokeBackwardHooks() {
VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size();
// call backward hooks
for (py::object& callable : backward_hooks_) {
callable(this);
}
}
void OpBase::RegisterBackwardHooks(const py::object& callable) {
VLOG(3) << "Register backward hooks " << trace_id_;
// TODO(minqiyang): check the callable format
backward_hooks_.push_back(callable);
}
void VarBase::RunBackward() {
if (!pre_op_) return;
......
......@@ -123,22 +123,32 @@ class VarBase {
private:
VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient)
: var_desc_(nullptr),
: name_(),
var_desc_(nullptr),
var_(var),
grads_(grad),
block_(nullptr),
persistable_(false),
stop_gradient_(stop_gradient),
pre_op_(nullptr),
pre_op_out_name_(),
pre_op_out_idx_(-1) {}
public:
virtual ~VarBase() {
// TODO(minqiyang): remove var desc from block desc
if (var_) {
delete var_;
var_ = nullptr;
}
if (grads_) {
delete grads_;
grads_ = nullptr;
}
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
}
inline OpBase* PreOp() const { return pre_op_; }
......@@ -151,6 +161,14 @@ class VarBase {
void RunBackward();
inline void ResetPreOp(OpBase* op) {
if (op == pre_op_) {
// clear pre_op info when op equals to var's pre_op
pre_op_ = nullptr;
pre_op_out_idx_ = -1;
}
}
void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
int pre_op_out_idx, bool pre_op_stop_gradient) {
pre_op_ = pre_op;
......@@ -184,11 +202,15 @@ class VarBase {
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
std::string name_;
framework::VarDesc* var_desc_;
framework::Variable* var_;
VarBase* grads_;
framework::BlockDesc* block_;
bool persistable_;
private:
bool stop_gradient_;
OpBase* pre_op_;
......@@ -199,15 +221,27 @@ class VarBase {
/* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
* gradient. This object should be managed totally by Python intepreter.
*/
class OpBase {
class PYBIND11_HIDDEN OpBase {
public:
OpBase()
: op_desc_(nullptr),
forward_id_(-1),
backward_id_(-1),
place_(platform::CPUPlace()) {}
trace_id_(-1),
place_(platform::CPUPlace()),
backward_hooks_() {}
virtual ~OpBase() {
// TODO(minqiyang): remove op_desc from block_desc in tracer
//
// reset all output vars' pre op
for (auto iter : output_vars_) {
for (VarBase* var : iter.second) {
var->ResetPreOp(this);
}
}
// release resource
for (framework::OpDesc* desc : grad_op_descs_) {
delete desc;
}
......@@ -215,6 +249,10 @@ class OpBase {
std::map<std::string, std::vector<VarBase*>> ApplyGrad();
void RegisterBackwardHooks(const py::object& callable);
void InvokeBackwardHooks();
// One of `op_desc_` or `forward_id_` is set, not both.
// For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
framework::OpDesc* op_desc_;
......@@ -225,6 +263,7 @@ class OpBase {
// Note: each fwd op corresponds to a vector of bwd ops.
std::vector<framework::OpDesc*> grad_op_descs_;
int backward_id_;
int trace_id_;
platform::Place place_;
......@@ -239,6 +278,8 @@ class OpBase {
std::vector<framework::VariableValueMap> grad_output_vars_;
framework::BlockDesc* block_;
std::vector<py::object> backward_hooks_;
};
class Layer {
......
......@@ -14,15 +14,32 @@
#include "paddle/fluid/imperative/tracer.h"
#include <memory>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string(
tracer_profile_fname, "",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
namespace paddle {
namespace imperative {
static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif
void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
......@@ -68,15 +85,36 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) {
return result;
}
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}
std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs,
framework::BlockDesc* block,
const platform::Place expected_place,
const bool stop_gradient) {
#ifdef WITH_GPERFTOOLS
if (gTracerProfilerStarted) {
ProfilerFlush();
}
#endif
std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type();
VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id "
<< op->trace_id_;
op_desc->InferShape(*block);
op_desc->InferVarType(block);
......@@ -99,11 +137,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
if (inp->PreOp() && !inp->IsStopGradient()) {
op->pre_ops_[it.first].push_back(inp->PreOp());
op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type();
} else {
op->pre_ops_[it.first].push_back(nullptr);
}
VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
<< inp->var_->IsInitialized();
<< inp->var_->IsInitialized() << " stop_gradient "
<< inp->IsStopGradient();
}
}
......@@ -155,6 +195,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
op->grad_input_vars_.resize(op->grad_op_descs_.size());
op->grad_output_vars_.resize(op->grad_op_descs_.size());
for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) {
framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
for (auto it : grad_op_desc->Inputs()) {
......@@ -167,7 +208,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
PADDLE_ENFORCE(fwd_var_it != vars.end());
// Forward inputs or outputs.
grad_in_vars.push_back(fwd_var_it->second->var_);
vars_saved_for_backward.insert(it.first);
} else {
VarBase* var = vars[var_it->second];
if (!var->grads_->var_->IsInitialized()) {
......@@ -177,6 +217,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
// Douts.
grad_in_vars.push_back(var->grads_->var_);
}
vars_saved_for_backward.insert(it.first);
}
}
......
......@@ -40,7 +40,7 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs);
class Tracer {
public:
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
explicit Tracer(framework::BlockDesc* root_block);
virtual ~Tracer() {}
......
......@@ -16,6 +16,7 @@ add_subdirectory(utils)
if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()
# add_subdirectory(anakin)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
......
cc_library(anakin_engine SRCS engine.cc)
target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
add_subdirectory(convert)
cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void FcOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto *y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(y_v);
auto *y_t = y_v->GetMutable<framework::LoDTensor>();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
auto weight_shape = framework::vectorize2int(y_t->dims());
engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
engine_->AddOpAttr(op_name, "bias_term", false);
engine_->AddOpAttr(op_name, "axis", 1);
int out_dim = weight_shape[1];
engine_->AddOpAttr(op_name, "out_dim", out_dim);
weight_shape.push_back(1);
weight_shape.push_back(1);
Shape anakin_shape(weight_shape);
framework::LoDTensor weight_tensor;
weight_tensor.Resize(y_t->dims());
TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class FcOpConverter : public AnakinOpConverter {
public:
FcOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~FcOpConverter() {}
private:
};
static Registrar<FcOpConverter> register_fc_op_converter("fc");
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "framework/core/types.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/anakin/convert/registrar.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "saber/saber_types.h"
namespace paddle {
namespace inference {
namespace anakin {
using AnakinNvEngine =
AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
class AnakinOpConverter {
public:
AnakinOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) {}
void ConvertOp(const framework::proto::OpDesc &op,
const std::unordered_set<std::string> &parameters,
const framework::Scope &scope, AnakinNvEngine *engine,
bool test_mode = false) {
framework::OpDesc op_desc(op, nullptr);
std::string op_type = op_desc.Type();
std::shared_ptr<AnakinOpConverter> it{nullptr};
if (op_type == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
std::string Y = op_desc.Input("Y")[0];
std::cout << Y << parameters.count(Y) << std::endl;
if (parameters.count(Y)) {
it = OpRegister::instance()->Get("fc");
}
}
if (!it) {
it = OpRegister::instance()->Get(op_type);
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type);
it->SetEngine(engine);
(*it)(op, scope, test_mode);
}
void ConvertBlock(const framework::proto::BlockDesc &block,
const std::unordered_set<std::string> &parameters,
const framework::Scope &scope, AnakinNvEngine *engine) {
std::unique_lock<std::mutex> lock(mutex_);
for (auto i = 0; i < block.ops_size(); i++) {
auto &op = block.ops(i);
ConvertOp(op, parameters, scope, engine);
}
}
void SetEngine(AnakinNvEngine *engine) { engine_ = engine; }
virtual ~AnakinOpConverter() {}
protected:
bool test_mode_;
AnakinNvEngine *engine_{nullptr};
private:
std::unordered_map<std::string, AnakinOpConverter *> converters_;
framework::Scope *scope_{nullptr};
std::mutex mutex_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
struct anakin_##op_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_converter() { \
::paddle::inference:: \
Registry<paddle::inference::anakin::AnakinOpConverter>::Register< \
::paddle::inference::anakin::Converter__>(#op_type__); \
} \
}; \
anakin_##op_type__##_converter anakin_##op_type__##_converter__; \
int TouchConverterRegister_anakin_##op_type__() { \
anakin_##op_type__##_converter__.Touch(); \
return 0; \
}
#define USE_ANAKIN_CONVERTER(op_type__) \
extern int TouchConverterRegister_anakin_##op_type__(); \
static int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
TouchConverterRegister_anakin_##op_type__();
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/registrar.h"
namespace paddle {
namespace inference {
namespace anakin {
std::shared_ptr<AnakinOpConverter> OpRegister::Get(const std::string &name) {
auto it = registry_.find(name);
if (it == registry_.end()) return nullptr;
return it->second();
}
OpRegister *OpRegister::instance() {
static OpRegister factory;
return &factory;
}
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <utility>
namespace paddle {
namespace inference {
namespace anakin {
class AnakinOpConverter;
class OpRegister {
public:
OpRegister() = default;
std::shared_ptr<AnakinOpConverter> Get(const std::string &name);
static OpRegister *instance();
void OpRegisterFn(const std::string &name,
std::function<std::shared_ptr<AnakinOpConverter>()> fn) {
registry_[name] = fn;
}
private:
using RegisterFnType = std::function<std::shared_ptr<AnakinOpConverter>()>;
std::map<std::string, std::function<std::shared_ptr<AnakinOpConverter>()>>
registry_;
};
template <typename T, typename... Args>
class Registrar {
public:
Registrar(const std::string &name, Args... args) {
std::shared_ptr<AnakinOpConverter> converter =
std::make_shared<T>(std::move(args)...);
OpRegister::instance()->OpRegisterFn(name,
[converter]() { return converter; });
}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
TEST(fc_op, test) {
auto fc_converter = OpRegister::instance()->Get("fc");
ASSERT_TRUE(fc_converter != nullptr);
// Registrar<FcOpConverter> register_fc("fc");
// auto fc = std::make_shared<FcOpConverter>();
std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("mul_x", {1, 1, 1, 1});
validator.DeclParamVar("mul_y", {1, 2});
validator.DeclOutputVar("mul_out", {1, 1, 1, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("mul");
desc.SetInput("X", {"mul_x"});
desc.SetInput("Y", {"mul_y"});
desc.SetOutput("Out", {"mul_out"});
int num_flatten_dims = 3;
desc.SetAttr("x_num_col_dims", num_flatten_dims);
validator.SetOp(*desc.Proto());
validator.Execute(10);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(mul);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
/*
* Get a random float value between [low, high]
*/
float random(float low, float high) {
static std::random_device rd;
static std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(low, high);
return dist(mt);
}
void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
const platform::DeviceContext& ctx) {
auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0);
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
temp_tensor.Resize(dims);
auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
for (size_t i = 0; i < num_elements; i++) {
*(temp_data + i) = random(0., 1.);
}
TensorCopySync(temp_tensor, place, tensor);
}
/*
* Help to validate the correctness between Fluid Op and the corresponding
* anakin
* layer.
*/
class AnakinConvertValidation {
using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
public:
AnakinConvertValidation() = delete;
AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
const framework::Scope& scope)
: parameters_(parameters), scope_(scope), place_(0) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
}
// Declare a Variable as input with random initialization.
void DeclInputVar(const std::string& name,
const std::vector<int> tensor_dims) {
DeclVar(name, tensor_dims);
// should decalre anakin input here.
}
void DeclParamVar(const std::string& name, const std::vector<int> dim_vec) {
DeclVar(name, dim_vec);
}
void DeclOutputVar(const std::string& name, const std::vector<int> dim_vec) {
DeclVar(name, dim_vec);
// should declare anakin output here.
}
void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
platform::CUDADeviceContext ctx(place_);
auto* x = scope_.Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec));
RandomizeTensor(x_tensor, place_, ctx);
}
void SetOp(const framework::proto::OpDesc& desc) {
op_ = framework::OpRegistry::CreateOp(desc);
op_desc_.reset(new framework::OpDesc(desc, nullptr));
// should init anakin engine here.
Singleton<AnakinOpConverter>::Global().ConvertOp(
desc, parameters_, scope_, engine_.get(), true /*test_mode*/);
engine_->Freeze();
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_,
input);
auto t_shape = framework::vectorize2int(t.dims());
engine_->SetInputShape(input, t_shape);
}
engine_->Optimize();
engine_->InitGraph();
}
// We use the set 'neglected_output' here, because some Ops like batch norm,
// the outputs specified in the op des are only used during training,
// so we should neglect those output during inference.
void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op
platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_);
// std::vector<framework::LoDTensor> input_vector;
// std::vector<framework::LoDTensor> output_vector;
std::map<std::string, framework::LoDTensor*> inputs;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto* var = scope_.FindVar(input);
auto tensor = var->GetMutable<framework::LoDTensor>();
inputs.insert({input, tensor});
}
std::map<std::string, framework::LoDTensor*> outputs;
std::vector<std::vector<float>> fluid_outputs;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> fluid_out;
auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &fluid_out);
fluid_outputs.push_back(fluid_out);
// size_t fluid_out_size = fluid_out.size();
/*for (size_t i = 0; i < fluid_out_size; i++) {
std::cout << fluid_out[i] << std::endl;
}*/
outputs.insert({output, tensor});
}
engine_->Execute(inputs, outputs);
int i_output = 0;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> anakin_out;
auto* var = scope_.FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx, &anakin_out);
size_t anakin_out_size = anakin_out.size();
auto fluid_out = fluid_outputs[i_output++];
for (size_t i = 0; i < anakin_out_size; i++) {
LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], "
<< "fluid[" << fluid_out[i] << "]";
}
}
}
framework::Scope& scope() { return scope_; }
private:
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
cudaStream_t stream_;
std::unique_ptr<framework::OperatorBase> op_;
std::unique_ptr<framework::OpDesc> op_desc_;
const std::unordered_set<std::string>& parameters_;
framework::Scope& scope_;
platform::CUDAPlace place_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/engine.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <utility>
#include "paddle/fluid/framework/ddim.h"
using anakin::Precision;
using anakin::OpRunType;
using paddle::framework::LoDTensor;
template <typename T, Precision P, OpRunType O>
using AnakinNetT = anakin::Net<T, P, O>;
template <typename T, Precision P>
using AnakinGraphT = anakin::graph::Graph<T, P>;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(bool need_summary)
: graph_(new AnakinGraphT<TargetT, PrecisionType>()),
net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::~AnakinEngine() {}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::SetInputShape(
const std::string &name, std::vector<int> shape) {
graph_->AddOpAttr<::anakin::PTuple<int>>(name, "input_shape",
std::move(shape));
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::InitGraph() {
net_->init(*graph_);
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
const std::string &name, const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs) {
PADDLE_ENFORCE(graph_->AddOp(name, type, inputs, outputs), "Add operation.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs) {
for (const auto &input : inputs) {
auto *tensor = input.second;
auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_input = net_->get_in(input.first);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape);
anakin_input->share_from(tmp_anakin_tensor);
}
for (const auto &output : outputs) {
auto *tensor = output.second;
auto *data = tensor->data<float>();
auto shape = framework::vectorize2int(tensor->dims());
::anakin::saber::Shape anakin_shape(shape);
auto *anakin_output = net_->get_out(output.first);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
anakin_shape);
anakin_output->share_from(tmp_anakin_tensor);
}
net_->prediction();
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>>
AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
auto *engine = new AnakinEngine();
engine->net_ = std::move(net_->Clone());
return std::unique_ptr<AnakinEngine>(engine);
}
template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "saber/saber_types.h"
namespace anakin {
template <typename, Precision, OpRunType>
class Net;
namespace graph {
template <typename, Precision>
class Graph;
} // namespace graph
} // namespace anakin
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionType,
::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
class AnakinEngine {
public:
explicit AnakinEngine(bool need_summary = false);
~AnakinEngine();
void InitGraph();
void SetInputShape(const std::string &name, std::vector<int> shape);
void AddOp(const std::string &name, const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs);
template <typename T>
void AddOpAttr(const std::string &op_name, const std::string &attr_name,
const T &attr_value) {
PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
"Add operation's attribution.");
}
std::unique_ptr<AnakinEngine> Clone();
void Freeze();
void Optimize();
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs);
private:
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <map>
#include "framework/core/net/net.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "paddle/fluid/inference/anakin/engine.h"
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::X86;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
class TestAnakinEngine : public ::testing::Test {
protected:
void SetUp() override;
void TearDown() override {}
protected:
using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
};
void TestAnakinEngine::SetUp() {
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
}
TEST_F(TestAnakinEngine, Execute) {
engine_->AddOp("op1", "Dense", {"x"}, {"y"});
engine_->AddOpAttr("op1", "out_dim", 2);
engine_->AddOpAttr("op1", "bias_term", false);
engine_->AddOpAttr("op1", "axis", 1);
std::vector<int> shape = {1, 1, 1, 2};
Shape tmp_shape(shape);
// PBlock<NV> weight1(tmp_shape);
auto *weight1 =
GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
// auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
cpu_data[0] = 2.;
weight1->d_tensor().set_shape(tmp_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr("op1", "weight_1", *weight1);
engine_->Freeze();
// PTuple<int> input_shape = {1};
// engine_->AddOpAttr("x", "input_shape", input_shape);
engine_->SetInputShape("x", {1, 1, 1, 1});
engine_->Optimize();
engine_->InitGraph();
framework::LoDTensor x;
framework::LoDTensor y;
x.Resize({1, 1, 1, 1});
y.Resize({1, 1, 1, 2});
auto *x_data = x.mutable_data<float>(platform::CUDAPlace());
float x_data_cpu[] = {1.};
cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice);
std::map<std::string, framework::LoDTensor *> inputs = {{"x", &x}};
auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};
engine_->Execute(inputs, outputs);
auto *y_data_gpu = y_data;
float y_data_cpu[2];
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);
LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
}
} // namespace anakin
} // namespace inference
} // namespace paddle
......@@ -13,7 +13,9 @@
// limitations under the License.
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#endif
#include <glog/logging.h>
namespace paddle {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/cudnn_desc.h"
namespace paddle {
namespace operators {
using framework::Tensor;
using platform::ActivationDescriptor;
using platform::TensorDescriptor;
template <typename Functor>
class CudnnActivationKernel
: public framework::OpKernel<Functor::ElEWISE_TYPE> {
public:
void Compute(const framework::ExecutionContext& context) const override {
framework::Tensor *X, *Out;
ExtractActivationTensor(context, X, Out);
ActivationDescriptor act_desc;
TensorDescriptor x_desc, out_desc;
x_desc.set(detail::Ref(X));
out_desc.set(detail::Ref(Out));
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/cudnn_desc.h"
namespace paddle {
namespace operators {
using framework::Tensor;
using platform::ActivationDescriptor;
using platform::TensorDescriptor;
using platform::CUDADeviceContext;
template <typename T>
struct CudnnActivationFunctor {
using ELEMENT_TYPE = T;
CudnnActivationFunctor(const CUDADeviceContext& ctx, const T& c,
const cudnnActivationMode_t& m)
: ctx_(ctx), coef_(c), mode_(m) {}
void operator()(const Tensor& x, Tensor* out) {
ActivationDescriptor act_desc;
act_desc.set(mode_, coef_);
TensorDescriptor x_desc, out_desc;
x_desc.set(x);
out_desc.set(detail::Ref(out));
PADDLE_ENFORCE(platform::dynload::cudnnActivationForward(
ctx_.cudnn_handle(), act_desc.desc(),
platform::CudnnDataType<T>::kOne(), x_desc.desc(), x.data<T>(),
platform::CudnnDataType<T>::kZero(), out_desc.desc(),
out->mutable_data<T>(ctx_.GetPlace())));
}
const CUDADeviceContext& ctx_;
const T coef_;
const cudnnActivationMode_t mode_;
};
template <typename T>
struct CudnnActivationGradFunctor {
using ELEMENT_TYPE = T;
CudnnActivationGradFunctor(const CUDADeviceContext& ctx, const T& c,
const cudnnActivationMode_t& m)
: ctx_(ctx), coef_(c), mode_(m) {}
void operator()(const Tensor& x, const Tensor& out, const Tensor dout,
Tensor* dx) {
ActivationDescriptor act_desc;
act_desc.set(mode_, coef_);
TensorDescriptor x_desc, out_desc, dout_desc, dx_desc;
x_desc.set(x);
out_desc.set(out);
dout_desc.set(dout);
dx_desc.set(detail::Ref(dx));
PADDLE_ENFORCE(platform::dynload::cudnnActivationBackward(
ctx_.cudnn_handle(), act_desc.desc(),
platform::CudnnDataType<T>::kOne(), out_desc.desc(), out.data<T>(),
dout_desc.desc(), dout.data<T>(), x_desc.desc(), x.data<T>(),
platform::CudnnDataType<T>::kZero(), dx_desc.desc(),
dx->mutable_data<T>(ctx_.GetPlace())));
}
const CUDADeviceContext& ctx_;
const T coef_;
const cudnnActivationMode_t mode_;
};
template <typename T>
struct CudnnReluFunctor : public CudnnActivationFunctor<T> {
explicit CudnnReluFunctor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
};
template <typename T>
struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
};
template <typename T>
struct CudnnRelu6Functor : public CudnnActivationFunctor<T> {
explicit CudnnRelu6Functor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {}
};
template <typename T>
struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {
}
};
template <typename T>
struct CudnnSigmoidFunctor : public CudnnActivationFunctor<T> {
explicit CudnnSigmoidFunctor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
};
template <typename T>
struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
};
template <typename T>
struct CudnnTanhFunctor : public CudnnActivationFunctor<T> {
explicit CudnnTanhFunctor(const CUDADeviceContext& ctx)
: CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
};
template <typename T>
struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
};
template <typename Functor>
class CudnnActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* X = nullptr;
framework::Tensor* Out = nullptr;
ExtractActivationTensor(context, &X, &Out);
Out->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
Functor functor(dev_ctx);
functor(detail::Ref(X), Out);
}
};
template <typename Functor>
class CudnnActivationGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor *X, *Out, *dOut;
X = Out = dOut = nullptr;
framework::Tensor* dX = nullptr;
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
dX->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
Functor functor(dev_ctx);
functor(detail::Ref(X), detail::Ref(Out), detail::Ref(dOut), dX);
}
};
} // namespace operators
} // namespace paddle
namespace plat = paddle::platform;
namespace ops = paddle::operators;
#define FOR_EACH_CUDNN_OP_FUNCTOR(__macro) \
__macro(relu, CudnnReluFunctor, CudnnReluGradFunctor); \
__macro(relu6, CudnnRelu6Functor, CudnnRelu6GradFunctor); \
__macro(sigmoid, CudnnTanhFunctor, CudnnTanhGradFunctor); \
__macro(tanh, CudnnTanhFunctor, CudnnTanhGradFunctor)
#define REGISTER_ACTIVATION_CUDNN_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_KERNEL(act_type, CUDNN, plat::CUDAPlace, \
ops::CudnnActivationKernel<ops::functor<float>>, \
ops::CudnnActivationKernel<ops::functor<double>>); \
REGISTER_OP_KERNEL( \
act_type##_grad, CUDNN, plat::CUDAPlace, \
ops::CudnnActivationGradKernel<ops::grad_functor<float>>, \
ops::CudnnActivationGradKernel<ops::grad_functor<double>>);
FOR_EACH_CUDNN_OP_FUNCTOR(REGISTER_ACTIVATION_CUDNN_KERNEL);
......@@ -16,29 +16,36 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \
void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator"); \
AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddAttr<bool>( \
"is_test", \
"(bool, default false) Set to true for inference only, false " \
"for training. Some layers may run faster when this is true.") \
.SetDefault(false); \
AddComment(OP_COMMENT); \
} \
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \
void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator"); \
AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddAttr<bool>("use_cudnn", \
"(bool, default false) Only used in cudnn kernel, need " \
"install cudnn") \
.SetDefault(false); \
AddAttr<bool>( \
"is_test", \
"(bool, default false) Set to true for inference only, false " \
"for training. Some layers may run faster when this is true.") \
.SetDefault(false); \
AddComment(OP_COMMENT); \
} \
}
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
......@@ -67,6 +74,12 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const std::string& name) {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_CUDA
auto it1 = oper.Attrs().find("use_cudnn");
if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) {
library = framework::LibraryType::kCUDNN;
}
#endif
#ifdef PADDLE_WITH_MKLDNN
auto it = oper.Attrs().find("use_mkldnn");
if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() &&
......
......@@ -11,6 +11,7 @@ limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <algorithm>
#include <string>
#include <unordered_set>
#include <utility>
......@@ -24,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/platform/float16.h"
#ifdef PADDLE_WITH_MKLDNN
......@@ -41,53 +43,115 @@ static std::unordered_set<std::string> InplaceOpSet = {
"floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid",
};
static bool IsInplace(const std::string& op) {
bool inplace = InplaceOpSet.count(op);
// for op_grad
const int kGradSuffixLen = 4;
if (op.size() > kGradSuffixLen &&
op.compare(op.size() - kGradSuffixLen - 1, kGradSuffixLen, "grad")) {
inplace =
InplaceOpSet.count(op.substr(0, op.size() - (kGradSuffixLen + 1)));
}
return inplace;
}
/* The following operator can be used to process SelectedRows, because the
* output of those operator for zero is zero too.
*/
static std::unordered_set<std::string> CanBeUsedBySelectedRows = {
"abs", "abs_grad", "square", "square_grad", "sqrt", "sqrt_grad"};
static bool IsInplace(std::string op) { return InplaceOpSet.count(op); }
template <typename DeviceContext, typename Functor>
class ActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
inline void ExtractActivationTensor(const framework::ExecutionContext& context,
const framework::Tensor** X,
framework::Tensor** Out) {
auto x_var = context.InputVar("X");
auto out_var = context.OutputVar("Out");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input Variable X, variable name = %s",
context.op().Input("X"));
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get output Variable Out, variable name = %s",
context.op().Output("Out"));
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
*X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
*Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
out_var);
} else {
*X = context.Input<framework::Tensor>("X");
*Out = context.Output<framework::Tensor>("Out");
}
PADDLE_ENFORCE(*Out != nullptr,
"Cannot get output tensor Out, variable name = %s",
context.op().Output("Out"));
}
inline void ExtractActivationGradTensor(
const framework::ExecutionContext& context, const framework::Tensor** X,
const framework::Tensor** Out, const framework::Tensor** dOut,
framework::Tensor** dX) {
auto out_var = context.InputVar("Out");
auto out_grad_var = context.InputVar(framework::GradVarName("Out"));
auto x_grad_var = context.OutputVar(framework::GradVarName("X"));
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
context.op().Input("Out"));
PADDLE_ENFORCE(out_grad_var != nullptr,
"Cannot get input Variable %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
PADDLE_ENFORCE(x_grad_var != nullptr,
"Cannot get output Variable %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
*Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
*dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
*out_grad_var);
*dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
x_grad_var);
} else {
*Out = context.Input<framework::Tensor>("Out");
*dOut = context.Input<framework::Tensor>(framework::GradVarName("Out"));
*dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
}
PADDLE_ENFORCE(*dX != nullptr,
"Cannot get output tensor %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
bool inplace = IsInplace(context.op().Type());
if (!inplace) {
auto x_var = context.InputVar("X");
auto out_var = context.OutputVar("Out");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input Variable X, variable name = %s",
"Cannot get input tensor X, variable name = %s",
context.op().Input("X"));
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get output Variable Out, variable name = %s",
context.op().Output("Out"));
framework::Tensor X, *Out;
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
X = detail::Ref(
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var),
"Cannot get input Tensor X, variable name = %s",
context.op().Input("X"));
Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
out_var);
*X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
} else {
X = detail::Ref(context.Input<framework::Tensor>("X"),
"Cannot get input Tensor X, variable name = %s",
context.op().Input("X"));
Out = context.Output<framework::Tensor>("Out");
*X = context.Input<framework::Tensor>("X");
}
} else {
VLOG(10) << " Inplace activation of Op : " << context.op().Type();
*X = *dX;
}
}
PADDLE_ENFORCE(Out != nullptr,
"Cannot get output tensor Out, variable name = %s",
context.op().Output("Out"));
template <typename DeviceContext, typename Functor>
class ActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* X = nullptr;
framework::Tensor* Out = nullptr;
ExtractActivationTensor(context, &X, &Out);
Out->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(X);
auto out = framework::EigenVector<T>::Flatten(*Out);
auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
auto* place =
context.template device_context<DeviceContext>().eigen_device();
Functor functor;
......@@ -106,55 +170,15 @@ class ActivationGradKernel
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
auto out_var = context.InputVar("Out");
auto out_grad_var = context.InputVar(framework::GradVarName("Out"));
auto x_grad_var = context.OutputVar(framework::GradVarName("X"));
PADDLE_ENFORCE(out_var != nullptr,
"Cannot get input Variable Out, variable name = %s",
context.op().Input("Out"));
PADDLE_ENFORCE(out_grad_var != nullptr,
"Cannot get input Variable %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
PADDLE_ENFORCE(x_grad_var != nullptr,
"Cannot get output Variable %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
framework::Tensor Out, dOut, *dX;
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
Out = detail::Ref(
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var),
"Cannot get input Tensor Out, variable name = %s",
context.op().Input("Out"));
dOut =
detail::Ref(paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
*out_grad_var),
"Cannot get input Tensor %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
x_grad_var);
} else {
Out = detail::Ref(context.Input<framework::Tensor>("Out"),
"Cannot get input Tensor Out, variable name = %s",
context.op().Input("Out"));
dOut = detail::Ref(
context.Input<framework::Tensor>(framework::GradVarName("Out")),
"Cannot get input Tensor %s, variable name = %s",
framework::GradVarName("Out"),
context.op().Input(framework::GradVarName("Out")));
dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
}
PADDLE_ENFORCE(dX != nullptr,
"Cannot get output tensor %s, variable name = %s",
framework::GradVarName("X"),
context.op().Output(framework::GradVarName("X")));
const framework::Tensor *X, *Out, *dOut;
framework::Tensor* dX = nullptr;
X = Out = dOut = nullptr;
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
dX->mutable_data<T>(context.GetPlace());
auto dout = framework::EigenVector<T>::Flatten(dOut);
auto out = framework::EigenVector<T>::Flatten(Out);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto dout = framework::EigenVector<T>::Flatten(detail::Ref(dOut));
auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
auto dx = framework::EigenVector<T>::Flatten(detail::Ref(dX));
auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
auto* place =
context.template device_context<DeviceContext>().eigen_device();
Functor functor;
......@@ -162,27 +186,7 @@ class ActivationGradKernel
for (auto& attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
bool inplace = functor.Inplace();
if (!inplace) {
auto x_var = context.InputVar("X");
PADDLE_ENFORCE(x_var != nullptr,
"Cannot get input tensor X, variable name = %s",
context.op().Input("X"));
framework::Tensor X;
if (CanBeUsedBySelectedRows.count(context.op().Type())) {
X = detail::Ref(
paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var));
} else {
X = detail::Ref(context.Input<framework::Tensor>("X"));
}
auto x = framework::EigenVector<T>::Flatten(X);
functor(*place, x, out, dout, dx);
} else {
VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
functor(*place, x, out, dout, dx);
}
};
......@@ -214,7 +218,6 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -269,7 +272,6 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ExpGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("exp"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -288,7 +290,6 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ReluGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("relu"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -301,8 +302,28 @@ template <typename T>
struct GeluFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
// Because the execute or device context can not be deliver here, it keep the
// marco for NVCC.
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
!defined(__OSX__) && !defined(PADDLE_WITH_CUDA)
auto x_data = x.data();
auto out_data = out.data();
int n = std::min(x.size(), out.size());
std::memset(out_data, 0, n * sizeof(T));
math::CBlas<T>::AXPY(n, static_cast<T>(M_SQRT1_2), x_data, 1, out_data, 1);
math::CBlas<T>::VMERF(n, out_data, out_data, VML_LA);
for (int i = 0; i < n; i++) {
out_data[i] += static_cast<T>(1);
}
math::CBlas<T>::VMUL(n, x_data, out_data, out_data);
for (int i = 0; i < n; i++) {
out_data[i] *= static_cast<T>(0.5);
}
#else
auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
#endif
}
};
......@@ -331,7 +352,6 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct TanhGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("tanh"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -437,7 +457,6 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct SqrtGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("sqrt"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -456,7 +475,6 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ZeroGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("ceil"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -573,7 +591,6 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
template <typename T>
struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
bool Inplace() const { return IsInplace("reciprocal"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -673,7 +690,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
bool Inplace() const { return IsInplace("relu6"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -755,7 +771,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
bool Inplace() const { return IsInplace("soft_relu"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......@@ -936,7 +951,6 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
bool Inplace() { return IsInplace("hard_sigmoid"); }
template <typename Device, typename X, typename Out, typename dOut,
typename dX>
void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
static framework::proto::VarType::Type kDefaultDtype =
framework::proto::VarType::Type::VarType_Type_BOOL;
template <typename DeviceContext, typename T>
class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto &in_var_names = context.Inputs("Input");
auto &out_var_names = context.Outputs("Output");
auto &in_vars = context.MultiInputVar("Input");
auto out_vars = context.MultiOutputVar("Output");
PADDLE_ENFORCE_GT(in_var_names.size(), static_cast<size_t>(0));
PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size());
for (size_t i = 0; i < in_var_names.size(); ++i) {
// Only support LoDTensor
PADDLE_ENFORCE_NOT_NULL(in_vars[i], "%s should not be nullptr,",
in_var_names[i]);
PADDLE_ENFORCE_NOT_NULL(out_vars[i], "%s should not be nullptr,",
out_var_names[i]);
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensor>());
PADDLE_ENFORCE(out_vars[i]->IsType<framework::LoDTensor>());
}
auto in_tensors = context.MultiInput<framework::LoDTensor>("Input");
if (context.Attr<bool>("check_name")) {
for (size_t i = 0; i < in_var_names.size(); ++i) {
PADDLE_ENFORCE_EQ(in_var_names[i], out_var_names[i]);
}
} else {
// Init the output as input
for (size_t i = 0; i < in_tensors.size(); ++i) {
out_vars[i]->GetMutable<framework::LoDTensor>()->Resize(
in_tensors[i]->dims());
}
}
auto &dev_ctx = context.template device_context<DeviceContext>();
// Get numel and dtype
size_t numel = 0;
auto dtype = kDefaultDtype;
GetMemSizeAndDtype(in_tensors, in_var_names, &numel, &dtype);
// Alloc the continuous space
auto fused_tensor = context.Output<framework::LoDTensor>("FusedOutput");
fused_tensor->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
.mutable_data(context.GetPlace(), dtype);
// Init the continuous space
auto out_tensors = context.MultiOutput<framework::LoDTensor>("Output");
int64_t offset = 0;
if (context.Attr<bool>("copy_data")) {
for (size_t i = 0; i < in_var_names.size(); ++i) {
int64_t len = out_tensors[i]->numel();
auto sub_tensor = fused_tensor->Slice(offset, offset + len);
offset += len;
framework::TensorCopy(*out_tensors[i], context.GetPlace(), dev_ctx,
&sub_tensor);
}
} else if (context.Attr<bool>("set_constant")) {
math::SetConstant<DeviceContext, T> set_constant;
set_constant(dev_ctx, fused_tensor,
static_cast<T>(context.Attr<float>("constant")));
}
// Make the outputs point to the continuous space.
offset = 0;
for (size_t i = 0; i < out_tensors.size(); ++i) {
int64_t len = out_tensors[i]->numel();
auto dim = out_tensors[i]->dims();
out_tensors[i]
->ShareDataWith(fused_tensor->Slice(offset, offset + len))
.Resize(dim);
offset += len;
VLOG(10) << "alloc_space_for_vars: output(" << out_var_names[i]
<< ") ,dim:(" << dim << ")"
<< " Address: " << out_tensors[i]->data<void>();
}
}
void GetMemSizeAndDtype(
const std::vector<const framework::LoDTensor *> &lod_tensors,
const std::vector<std::string> var_names, size_t *numel,
framework::proto::VarType::Type *dtype) const {
PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size());
*numel = 0;
for (size_t i = 0; i < var_names.size(); ++i) {
PADDLE_ENFORCE(lod_tensors[i]->IsInitialized(), "%s is not initialized.",
var_names[i]);
auto p_dtype = lod_tensors[i]->type();
if (*dtype == kDefaultDtype) {
PADDLE_ENFORCE_NE(p_dtype, kDefaultDtype, "%s's type should not be %s.",
var_names[i], kDefaultDtype);
*dtype = p_dtype;
}
PADDLE_ENFORCE_EQ(p_dtype, *dtype, "Input vars is not equal.");
auto size = lod_tensors[i]->numel();
PADDLE_ENFORCE_GT(size, 0);
VLOG(10) << "alloc_space_for_vars: input(" << var_names[i] << ") ,dim:("
<< lod_tensors[i]->dims() << ")";
*numel += size;
}
}
};
class AllocContinuousSpaceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {}
};
class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Input",
"(vector<LoDTensor>) The input tensors of"
" alloc_continuous_space operator.")
.AsDuplicable();
AddOutput("Output",
"(vector<LoDTensor>) The output "
"tensors of alloc_continuous_space operator. And the address "
"of output tensors are continuous, they are sliced from the "
"tensor of FusedOutput.")
.AsDuplicable();
AddOutput("FusedOutput",
"(LoDTensor) The output tensor "
"of alloc_continuous_space operator. And the tensors of"
" Output is sliced from the tensor of FusedOutput.");
AddAttr<bool>("copy_data", "Whether to copy the Input value to Output.")
.SetDefault(false);
AddAttr<bool>("set_constant",
"Whether to set the Output with a constant value.")
.SetDefault(false);
AddAttr<float>("constant",
"If set_constant is true, the constant value will be used "
"to set the Output.")
.SetDefault(0.0);
AddAttr<bool>("check_name",
"Whether to check the name of Input and Output to ensure "
"they are the same separately.")
.SetDefault(false);
AddComment(R"DOC(
AllocContinuousSpace Operator.
alloc_continuous_space is used to make the address of Output
continuous according to the Input. This Op will alloc a big tensor
according to the tensors of Input, the dtype is the same with those input tensors,
the size is the sum of those input tensors' numel, and the dim of the big
tensor is {sum(numel)}. And the big tensor is stored in FusedOutput.
The tensors of Output are sliced from the tensor of FusedOutput.
Note that, the dtype of Input should be the same, and the dim of Input
and Output should equal.
The tensors of Input and Output could be the same or different. And
alloc_continuous_space allows copying the value of Input to Output, or
setting the Output with a constant value.
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(alloc_continuous_space,
paddle::operators::AllocContinuousSpaceOp,
paddle::operators::AllocContinuousSpaceOpMaker);
namespace ops = paddle::operators;
REGISTER_OP_CPU_KERNEL(
alloc_continuous_space,
ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext, int>,
ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext, float>,
ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext,
double>);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL(
alloc_continuous_space,
ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext, int>,
ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext, float>,
ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext,
double>);
#endif
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/benchmark/op_tester.h"
#include <fstream>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_info.h"
......@@ -28,6 +29,7 @@ namespace operators {
namespace benchmark {
DEFINE_string(op_config_list, "", "Path of op config file.");
DEFINE_int32(specified_config_id, -1, "Test the specified op config.");
void OpTester::Init(const std::string &filename) {
Init(OpTesterConfig(filename));
......@@ -147,7 +149,7 @@ void OpTester::CreateInputVarDesc() {
var->SetShape(input->dims);
op_desc_.SetInput(name, {var_name});
inputs_.push_back(var_name);
input_lods_[var_name] = input->lod;
}
}
......@@ -162,7 +164,6 @@ void OpTester::CreateOutputVarDesc() {
var->SetDataType(framework::proto::VarType::FP32);
op_desc_.SetOutput(name, {var_name});
outputs_.push_back(var_name);
}
}
......@@ -218,16 +219,26 @@ void OpTester::CreateVariables(framework::Scope *scope) {
}
}
// Allocate memory for input tensor
for (auto &name : inputs_) {
VLOG(3) << "Allocate memory for tensor " << name;
auto &var_desc = vars_[name];
for (auto &item : input_lods_) {
// Allocate memory for input tensor
auto &var_name = item.first;
VLOG(3) << "Allocate memory for tensor " << var_name;
auto &var_desc = vars_[var_name];
std::vector<int64_t> shape = var_desc->GetShape();
auto *var = scope->Var(name);
auto *var = scope->Var(var_name);
auto *tensor = var->GetMutable<framework::LoDTensor>();
SetupTensor<float>(tensor, shape, static_cast<float>(0.0),
static_cast<float>(1.0));
VLOG(3) << "Set lod for tensor " << var_name;
std::vector<std::vector<size_t>> &lod_vec = item.second;
framework::LoD lod;
for (size_t i = 0; i < lod_vec.size(); ++i) {
lod.push_back(lod_vec[i]);
}
tensor->set_lod(lod);
}
}
......@@ -282,10 +293,32 @@ std::string OpTester::DebugString() {
}
TEST(op_tester, base) {
OpTester tester;
if (!FLAGS_op_config_list.empty()) {
tester.Init(FLAGS_op_config_list);
std::ifstream fin(FLAGS_op_config_list, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s",
FLAGS_op_config_list.c_str());
std::vector<OpTesterConfig> op_configs;
while (!fin.eof()) {
OpTesterConfig config;
bool result = config.Init(fin);
if (result) {
op_configs.push_back(config);
}
}
if (FLAGS_specified_config_id >= 0 &&
FLAGS_specified_config_id < static_cast<int>(op_configs.size())) {
OpTester tester;
tester.Init(op_configs[FLAGS_specified_config_id]);
tester.Run();
} else {
for (size_t i = 0; i < op_configs.size(); ++i) {
OpTester tester;
tester.Init(op_configs[i]);
tester.Run();
}
}
} else {
OpTester tester;
OpTesterConfig config;
config.op_type = "elementwise_add";
config.inputs.resize(2);
......@@ -294,8 +327,8 @@ TEST(op_tester, base) {
config.inputs[1].name = "Y";
config.inputs[1].dims = {64, 1};
tester.Init(config);
tester.Run();
}
tester.Run();
}
} // namespace benchmark
......
......@@ -57,8 +57,7 @@ class OpTester {
std::string type_;
framework::OpDesc op_desc_;
std::unordered_map<std::string, std::unique_ptr<framework::VarDesc>> vars_;
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
std::unordered_map<std::string, std::vector<std::vector<size_t>>> input_lods_;
std::unique_ptr<framework::OperatorBase> op_;
platform::Place place_;
std::unique_ptr<framework::Scope> scope_;
......
......@@ -33,21 +33,64 @@ static bool EndWith(const std::string& str, const std::string& substr) {
return str.rfind(substr) == (str.length() - substr.length());
}
static void EraseEndSep(std::string* str) {
std::string substr = kSepBetweenItems;
static void EraseEndSep(std::string* str,
std::string substr = kSepBetweenItems) {
if (EndWith(*str, substr)) {
str->erase(str->length() - substr.length(), str->length());
}
}
static std::vector<int64_t> ParseDims(std::string dims_str) {
std::vector<int64_t> dims;
void OpInputConfig::ParseDims(std::istream& is) {
std::string dims_str;
is >> dims_str;
dims.clear();
std::string token;
std::istringstream token_stream(dims_str);
while (std::getline(token_stream, token, 'x')) {
dims.push_back(std::stoi(token));
}
return dims;
}
void OpInputConfig::ParseLoD(std::istream& is) {
std::string lod_str;
std::string start_sep =
std::string(kStartSeparator) + std::string(kStartSeparator);
std::string end_sep = std::string(kEndSeparator) + std::string(kEndSeparator);
std::string sep;
is >> sep;
if (StartWith(sep, start_sep)) {
lod_str += sep;
while (!EndWith(sep, end_sep)) {
is >> sep;
lod_str += sep;
}
}
EraseEndSep(&lod_str);
PADDLE_ENFORCE_GE(lod_str.length(), 4U);
VLOG(4) << "lod: " << lod_str << ", length: " << lod_str.length();
// Parse the lod_str
lod.clear();
for (size_t i = 1; i < lod_str.length() - 1;) {
if (lod_str[i] == '{') {
std::vector<size_t> level;
while (lod_str[i] != '}') {
++i;
std::string number;
while (lod_str[i] >= '0' && lod_str[i] <= '9') {
number += lod_str[i];
++i;
}
level.push_back(atoi(number.c_str()));
}
lod.push_back(level);
} else if (lod_str[i] == '}') {
++i;
}
}
}
OpInputConfig::OpInputConfig(std::istream& is) {
......@@ -60,9 +103,9 @@ OpInputConfig::OpInputConfig(std::istream& is) {
is >> name;
EraseEndSep(&name);
} else if (sep == "dims" || sep == "dims:") {
std::string dims_str;
is >> dims_str;
dims = ParseDims(dims_str);
ParseDims(is);
} else if (sep == "lod" || sep == "lod:") {
ParseLoD(is);
}
}
}
......@@ -76,7 +119,7 @@ OpTesterConfig::OpTesterConfig(const std::string& filename) {
Init(fin);
}
void OpTesterConfig::Init(std::istream& is) {
bool OpTesterConfig::Init(std::istream& is) {
std::string sep;
is >> sep;
if (sep == kStartSeparator) {
......@@ -95,9 +138,40 @@ void OpTesterConfig::Init(std::istream& is) {
} else if (sep == "input" || sep == "input:") {
OpInputConfig input_config(is);
inputs.push_back(input_config);
} else if (sep == "attrs" || sep == "attrs:") {
ParseAttrs(is);
} else {
if (sep != kEndSeparator) {
return false;
}
}
}
} else {
return false;
}
return true;
}
bool OpTesterConfig::ParseAttrs(std::istream& is) {
std::string sep;
is >> sep;
if (sep == kStartSeparator) {
while (true) {
std::string key;
is >> key;
if (key == kEndSeparator) {
break;
}
std::string value;
is >> value;
EraseEndSep(&key, ":");
EraseEndSep(&value);
attrs[key] = value;
}
}
return true;
}
const OpInputConfig* OpTesterConfig::GetInput(const std::string& name) {
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <istream>
#include <string>
#include <unordered_map>
#include <vector>
namespace paddle {
......@@ -26,19 +27,27 @@ struct OpInputConfig {
OpInputConfig() {}
explicit OpInputConfig(std::istream& is);
void ParseDims(std::istream& is);
void ParseLoD(std::istream& is);
std::string name;
std::vector<int64_t> dims;
std::vector<std::vector<size_t>> lod;
};
struct OpTesterConfig {
OpTesterConfig() {}
explicit OpTesterConfig(const std::string& filename);
void Init(std::istream& is);
bool Init(std::istream& is);
bool ParseAttrs(std::istream& is);
const OpInputConfig* GetInput(const std::string& name);
std::string op_type;
std::vector<OpInputConfig> inputs;
std::unordered_map<std::string, std::string> attrs;
int device_id{-1}; // CPU: -1
int repeat{1};
int profile{0};
......
......@@ -81,6 +81,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library{framework::LibraryType::kPlain};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
auto input_data_type = ctx.Input<Tensor>("Input")->type();
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout = framework::StringToDataLayout(data_format);
......@@ -94,11 +95,14 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
customized_type_value = kConvMKLDNNFP32;
customized_type_value =
(input_data_type == framework::DataTypeTrait<int8_t>::DataType ||
input_data_type == framework::DataTypeTrait<uint8_t>::DataType)
? kConvMKLDNNINT8
: kConvMKLDNNFP32;
}
#endif
auto input_data_type = ctx.Input<Tensor>("Input")->type();
if (input_data_type != framework::proto::VarType::INT8 &&
input_data_type != framework::proto::VarType::UINT8) {
auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
......
......@@ -32,14 +32,23 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
int rank = x_dims.size();
PADDLE_ENFORCE_EQ(rank, label_dims.size(),
"Input(X) and Input(Label) shall have the same rank.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1),
"Input(X) and Input(Label) shall have the same shape "
"except the last dimension.");
bool check = true;
if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
framework::product(label_dims) <= 0)) {
check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1),
"Input(X) and Input(Label) shall have the same shape "
"except the last dimension.");
}
if (ctx->Attrs().Get<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"If Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal.");
if (check) {
PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"If Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal.");
}
} else {
PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
"If Attr(softLabel) == false, the last dimension of "
......@@ -82,20 +91,32 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
"Input(Y@Grad) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(label_dims.size(), rank,
"Input(Label) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1),
"The Input(X) and Input(Label) should have the same "
"shape except the last dimension.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(dy_dims, 0, rank - 1),
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension.");
bool check = true;
if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
framework::product(label_dims) <= 0)) {
check = false;
}
if (check) {
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_dims, 0, rank - 1),
"The Input(X) and Input(Label) should have the same "
"shape except the last dimension.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(dy_dims, 0, rank - 1),
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension.");
}
PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
"The last dimension of Input(Y@Grad) should be 1.");
if (ctx->Attrs().Get<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"When Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal.");
if (check) {
PADDLE_ENFORCE_EQ(
x_dims[rank - 1], label_dims[rank - 1],
"When Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal.");
}
} else {
PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
"When Attr(soft_label) == false, the last dimension of "
......
......@@ -140,9 +140,6 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker {
"Scales of the history data batch, "
"will apply to output when training")
.AsIntermediate();
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Data Normalization.
......
......@@ -172,6 +172,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
framework::make_ddim({1, static_cast<int>(variances.size())}),
ctx.GetPlace());
auto var_et = framework::EigenTensor<T, 2>::From(var_t);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
for (size_t i = 0; i < variances.size(); ++i) {
var_et(0, i) = variances[i];
}
......@@ -181,8 +185,15 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
vars->Resize({box_num, static_cast<int>(variances.size())});
auto e_vars = framework::EigenMatrix<T, Eigen::RowMajor>::From(*vars);
e_vars = var_et.broadcast(Eigen::DSizes<int, 2>(box_num, 1));
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
for (int i = 0; i < box_num; ++i) {
for (int j = 0; j < variances.size(); ++j) {
e_vars(i, j) = variances[j];
}
}
vars->Resize(var_dim);
}
}; // namespace operators
......
......@@ -77,8 +77,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
} else {
functor.RunMidWise(n, pre, post);
}
z->set_layout(DataLayout::kMKLDNN);
z->set_format(x->format());
z->set_mkldnn_prim_desc(x->get_mkldnn_prim_desc());
} else {
PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
x->format() != memory::format::format_undef,
......@@ -116,7 +115,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd);
// create mkldnn memory for dst
memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data);
auto dst_mem_pd = sum_pd.dst_primitive_desc();
memory dst_memory = memory(dst_mem_pd, z_data);
std::vector<primitive::at> inputs;
inputs.push_back(srcs[0]);
......@@ -129,9 +129,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
pipeline.push_back(sum_prim);
stream(stream::kind::eager).submit(pipeline).wait();
z->set_layout(DataLayout::kMKLDNN);
z->set_format(
(memory::format)dst_memory.get_primitive_desc().desc().data.format);
z->set_mkldnn_prim_desc(dst_mem_pd);
}
}
};
......@@ -152,24 +150,19 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
auto* out = dout;
auto *x = dout, *y = dout;
auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
in->set_layout(DataLayout::kMKLDNN);
in->set_format(out->format());
};
if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) {
if (dx->dims() == dy->dims()) {
auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
if (dx) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dx->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dx, dout);
dx->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc());
}
if (dy) {
blas.VCOPY(dout->numel(), dout->data<T>(),
dy->mutable_data<T>(ctx.GetPlace()));
set_mkldnn_format(dy, dout);
dy->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc());
}
}
} else {
......
......@@ -31,7 +31,7 @@ template <typename T>
struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& ctx, const T* in,
const int num, T* out) {
*out = *(std::max_element(in + 0, in + num, Compare<T>()));
*out = std::abs(*(std::max_element(in + 0, in + num, Compare<T>())));
}
};
......@@ -46,10 +46,8 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
platform::Transform<platform::CPUDeviceContext> trans;
trans(ctx, in.data<T>(), in.data<T>() + in.numel(),
out->mutable_data<T>(ctx.GetPlace()), ClipFunctor<T>(-s, s));
auto in_e = framework::EigenVector<T>::Flatten(in);
auto out_e = framework::EigenVector<T>::Flatten(*out);
out_e.device(*ctx.eigen_device()) = (bin_cnt / s * in_e).round();
out_e.device(*ctx.eigen_device()) = (bin_cnt / s * out_e).round();
}
};
......
......@@ -23,6 +23,9 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->IsRuntime()) {
return;
}
PADDLE_ENFORCE(ctx->HasInput("W"),
"Input W of FusedEmbeddingSeqPoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Ids"),
......@@ -42,36 +45,15 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
// we only support sum now
PADDLE_ENFORCE_EQ(combiner, "sum");
int64_t last_dim = table_dims[1];
for (int i = 1; i != ids_dims.size(); ++i) {
last_dim *= ids_dims[i];
}
if (ctx->IsRuntime()) {
framework::Variable* ids_var =
boost::get<framework::Variable*>(ctx->GetInputVarPtrs("Ids")[0]);
const auto& ids_lod = ids_var->Get<LoDTensor>().lod();
int64_t last_dim = FusedEmbeddingSeqPoolLastDim(table_dims, ids_dims);
// in compile time, the lod level of ids must be 1
framework::VarDesc* ids_desc =
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
// in run time, the LoD of ids must be 1
PADDLE_ENFORCE(ids_lod.size(), 1u,
"The LoD level of Input(Ids) must be 1");
PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
int64_t batch_size = ids_lod[0].size() - 1;
// in run time, the shape from Ids -> output
// should be [seq_length, 1] -> [batch_size, embedding_size]
ctx->SetOutputDim("Out", framework::make_ddim({batch_size, last_dim}));
} else {
// in compile time, the lod level of ids must be 1
framework::VarDesc* ids_desc =
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
// in compile time, the shape from Ids -> output
// should be [-1, 1] -> [-1, embedding_size]
ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
}
// in compile time, the shape from Ids -> output
// should be [-1, 1] -> [-1, embedding_size]
ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
}
protected:
......
......@@ -61,6 +61,15 @@ struct EmbeddingVSumFunctor {
}
};
inline int FusedEmbeddingSeqPoolLastDim(const framework::DDim &table_dims,
const framework::DDim &ids_dims) {
int64_t last_dim = table_dims[1];
for (int i = 1; i != ids_dims.size(); ++i) {
last_dim *= ids_dims[i];
}
return last_dim;
}
template <typename T>
class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
public:
......@@ -70,6 +79,17 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
const LoDTensor *table_var = context.Input<LoDTensor>("W");
const std::string &combiner_type = context.Attr<std::string>("combiner");
int64_t last_dim =
FusedEmbeddingSeqPoolLastDim(table_var->dims(), ids_t->dims());
const auto &ids_lod = ids_t->lod();
// in run time, the LoD of ids must be 1
PADDLE_ENFORCE(ids_lod.size(), 1u, "The LoD level of Input(Ids) must be 1");
PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
int64_t batch_size = ids_lod[0].size() - 1;
// in run time, the shape from Ids -> output
// should be [seq_length, 1] -> [batch_size, embedding_size]
output_t->Resize({batch_size, last_dim});
if (combiner_type == "sum") {
EmbeddingVSumFunctor<T> functor;
functor(context, table_var, ids_t, output_t);
......
......@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/operators/hash_op.h"
#include <string>
#include <vector>
namespace paddle {
namespace operators {
......@@ -27,6 +26,9 @@ class HashOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
if (ctx->IsRuntime()) {
return;
}
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of HashOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
......@@ -36,15 +38,8 @@ class HashOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(dims.size(), 2UL,
"The input of hash_op's dimensions must be 2");
std::vector<int64_t> out_dims;
out_dims.reserve(dims.size() + 1);
// copy all dims except the last one
for (int i = 0u; i != dims.size() - 1; ++i) {
out_dims.emplace_back(dims[i]);
}
int num_hash = ctx->Attrs().Get<int>("num_hash");
out_dims.emplace_back(num_hash);
// keep the last dim to 1
out_dims.emplace_back(1);
HashOutputSize(dims, out_dims, num_hash);
ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
ctx->ShareLoD("X", /*->*/ "Out");
......@@ -71,4 +66,4 @@ $$Out = scale * X$$
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker);
REGISTER_OP_CPU_KERNEL(hash, ops::HashKerel<int>, ops::HashKerel<int64_t>);
REGISTER_OP_CPU_KERNEL(hash, ops::HashKernel<int>, ops::HashKernel<int64_t>);
......@@ -17,21 +17,34 @@ limitations under the License. */
extern "C" {
#include <xxhash.h>
}
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
// template <typename DeviceContext, typename T>
inline void HashOutputSize(const framework::DDim& in_dims,
std::vector<int64_t>& out_dims, // NOLINT
int num_hash) {
out_dims.reserve(in_dims.size() + 1);
// copy all dims except the last one
for (int i = 0u; i != in_dims.size() - 1; ++i) {
out_dims.emplace_back(in_dims[i]);
}
out_dims.emplace_back(num_hash);
// keep the last dim to 1
out_dims.emplace_back(1);
}
template <typename T>
class HashKerel : public framework::OpKernel<T> {
class HashKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& context) const {
auto* out_t = context.Output<framework::LoDTensor>("Out");
auto* in_t = context.Input<framework::LoDTensor>("X");
int mod_by = context.Attr<int>("mod_by");
int num_hash = context.Attr<int>("num_hash");
auto* output = out_t->mutable_data<T>(context.GetPlace());
auto in_dims = in_t->dims();
auto in_lod = in_t->lod();
......@@ -39,6 +52,11 @@ class HashKerel : public framework::OpKernel<T> {
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
"The actual input data's size mismatched with LoD information.");
std::vector<int64_t> out_dims;
HashOutputSize(in_dims, out_dims, num_hash);
out_t->Resize(framework::make_ddim(out_dims));
auto* output = out_t->mutable_data<T>(context.GetPlace());
auto seq_length = in_dims[0];
auto last_dim = in_dims[in_dims.size() - 1];
auto* input = in_t->data<T>();
......@@ -49,6 +67,7 @@ class HashKerel : public framework::OpKernel<T> {
}
input += last_dim;
}
out_t->set_lod(in_t->lod());
}
};
......
......@@ -84,13 +84,13 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault("bilinear");
AddAttr<bool>(
"align_corners",
"an optinal bool. Defaults to True. "
"an optional bool. Defaults to True. "
"If True, the centers of 4 corner pixels of the input and output "
"tensors are aligned, preserving the values at the corner pixels, "
"if Flase, are not aligned")
"If False, are not aligned")
.SetDefault(true);
AddAttr<int>("align_mode",
"(int, default \'1\'), optional for bilinear interpolation"
"(int, default \'1\'), optional for bilinear interpolation, "
"can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , "
"can be \'1\' for src_idx = scale*dst_index .")
.SetDefault(1);
......
......@@ -34,9 +34,8 @@ class IsEmptyOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
framework::OpKernelType kt = framework::OpKernelType(
ctx.Input<framework::LoDTensor>("X")->type(), platform::CPUPlace());
return kt;
auto *x = ctx.Input<framework::LoDTensor>("X");
return framework::OpKernelType(x->type(), x->place());
}
};
......@@ -58,7 +57,6 @@ It will just return product(tensor.ddims()) > 0;
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(is_empty, ops::IsEmptyOp, ops::IsEmptyOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/is_empty_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
is_empty, ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, int>,
ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, int64_t>);
......@@ -28,6 +28,9 @@ class IsEmptyOpKernel : public framework::OpKernel<T> {
// get output
auto* output_tensor = context.Output<framework::LoDTensor>("Out");
// Note: is_empty is always executed on CPU and the output data should
// always be allocated for CPUPlace. We reigister CUDA kernel for this op to
// avoid the unnecessary data transform.
output_tensor->mutable_data<bool>(platform::CPUPlace())[0] =
framework::product(input_tensor->dims()) == 0;
}
......
......@@ -332,6 +332,45 @@ void BenchEmbSeqPoolKernel() {
}
}
template <jit::KernelType KT, typename T, typename PlaceType>
void BenchSgdKernel() {
const T lr = 0.1;
auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
const int64_t upper) -> std::vector<int64_t> {
PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1);
PADDLE_ENFORCE_GT(n, 0);
std::vector<int64_t> all, out;
for (int i = 0; i < n; ++i) {
all.push_back(i);
}
std::random_shuffle(all.begin(), all.end());
out.insert(out.begin(), all.begin(), all.begin() + n);
return out;
};
for (int param_h : {1, 1000}) {
for (int grad_w : {1, 2, 8, 16, 30, 256}) {
// only benchmark inplace
Tensor param;
param.Resize({param_h, grad_w});
T* param_data = param.mutable_data<T>(PlaceType());
RandomVec<T>(param_h * grad_w, param_data, -2.f, 2.f);
for (int rows_size = 1; rows_size <= std::min(param_h, 10); ++rows_size) {
Tensor grad;
grad.Resize({rows_size, grad_w});
std::vector<int64_t> rows =
UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
RandomVec<T>(rows_size * grad_w, grad.mutable_data<T>(PlaceType()),
-2.f, 2.f);
const T* grad_data = grad.data<T>();
const int64_t* rows_data = rows.data();
jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
BenchAllImpls<KT, jit::SgdTuples<T>, PlaceType>(
attr, &lr, param_data, grad_data, rows_data, param_data, &attr);
}
}
}
}
template <jit::KernelType KT, typename T, typename PlaceType>
void BenchMatMulKernel() {
for (int m : {1, 2, 3, 4}) {
......@@ -477,6 +516,9 @@ BENCH_FP32_CPU(kEmbSeqPool) {
BenchEmbSeqPoolKernel<jit::kEmbSeqPool, T, CPUPlace>();
}
// sgd function
BENCH_FP32_CPU(kSgd) { BenchSgdKernel<jit::kSgd, T, CPUPlace>(); }
// matmul
BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
......
......@@ -32,3 +32,4 @@ USE_JITKERNEL_GEN(kSeqPool)
USE_JITKERNEL_GEN(kHMax)
USE_JITKERNEL_GEN(kHSum)
USE_JITKERNEL_GEN(kEmbSeqPool)
USE_JITKERNEL_GEN(kSgd)
......@@ -31,7 +31,8 @@ namespace gen {
// Application Binary Interface
constexpr Xbyak::Operand::Code abi_param1(Xbyak::Operand::RDI),
abi_param2(Xbyak::Operand::RSI), abi_param3(Xbyak::Operand::RDX),
abi_param4(Xbyak::Operand::RCX);
abi_param4(Xbyak::Operand::RCX), abi_param5(Xbyak::Operand::R8),
abi_param6(Xbyak::Operand::R9);
constexpr Xbyak::Operand::Code g_abi_regs[] = {
Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12,
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/sgd.h"
#include <stddef.h> // offsetof
#include <vector>
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace paddle {
namespace operators {
namespace jit {
namespace gen {
void SgdJitCode::genCode() {
preCode();
constexpr int block = YMM_FLOAT_BLOCK;
constexpr int max_num_regs = 7;
const int num_block = w_ / block;
const int num_groups = num_block / max_num_regs;
const size_t block_size = sizeof(float) * block;
const size_t width_size = w_ * sizeof(float);
std::vector<int> groups(num_groups, max_num_regs);
int rest_num_regs = num_block % max_num_regs;
if (rest_num_regs > 0) {
groups.push_back(rest_num_regs);
}
vbroadcastss(ymm_lr, ptr[param_lr]);
// protect rdx
mov(reg_ptr_grad_i, param_grad);
mov(reg_ptr_rows_i, param_rows);
mov(reg_rows_size_in_byte,
qword[param_attr + offsetof(sgd_attr_t, selected_rows_size)]);
mov(rax, sizeof(int64_t));
mul(reg_rows_size_in_byte);
mov(reg_rows_size_in_byte, rax);
add(reg_rows_size_in_byte, reg_ptr_rows_i);
Label l_next_row;
L(l_next_row);
{
mov(reg_row, qword[reg_ptr_rows_i]);
mov(rax, width_size);
mul(reg_row);
mov(reg_row, rax);
mov(reg_ptr_param_i, param_param);
mov(reg_ptr_out_i, param_out);
add(reg_ptr_param_i, reg_row);
add(reg_ptr_out_i, reg_row);
size_t w_offset = 0;
for (int num_regs : groups) {
// load grad
size_t inner_offfset = w_offset;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ymm_t(reg_i), ptr[reg_ptr_grad_i + inner_offfset]);
inner_offfset += block_size;
}
// load param
inner_offfset = w_offset;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_param_i + inner_offfset]);
inner_offfset += block_size;
}
// compute out
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmulps(ymm_t(reg_i), ymm_t(reg_i), ymm_lr);
vsubps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs), ymm_t(reg_i));
}
// save out
inner_offfset = w_offset;
for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
vmovups(ptr[reg_ptr_out_i + inner_offfset], ymm_t(reg_i + num_regs));
inner_offfset += block_size;
}
w_offset += (block_size * num_regs);
}
add(reg_ptr_grad_i, width_size);
add(reg_ptr_rows_i, sizeof(int64_t));
cmp(reg_ptr_rows_i, reg_rows_size_in_byte);
jl(l_next_row, T_NEAR);
}
postCode();
}
class SgdCreator : public JitCodeCreator<sgd_attr_t> {
public:
bool UseMe(const sgd_attr_t& attr) const override {
return platform::MayIUse(platform::avx) &&
attr.grad_width % YMM_FLOAT_BLOCK == 0;
}
size_t CodeSize(const sgd_attr_t& attr) const override {
return 96 + (attr.grad_width / YMM_FLOAT_BLOCK) * 32 * 8;
}
std::unique_ptr<GenBase> CreateJitCode(
const sgd_attr_t& attr) const override {
PADDLE_ENFORCE_EQ(attr.param_width, attr.grad_width);
PADDLE_ENFORCE_LE(attr.selected_rows_size, attr.grad_height);
PADDLE_ENFORCE_GE(attr.selected_rows_size, 0);
return make_unique<SgdJitCode>(attr, CodeSize(attr));
}
};
} // namespace gen
} // namespace jit
} // namespace operators
} // namespace paddle
namespace gen = paddle::operators::jit::gen;
REGISTER_JITKERNEL_GEN(kSgd, gen::SgdCreator);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
namespace jit {
namespace gen {
class SgdJitCode : public JitCode {
public:
explicit SgdJitCode(const sgd_attr_t& attr, size_t code_size = 256 * 1024,
void* code_ptr = nullptr)
: JitCode(code_size, code_ptr), w_(attr.grad_width) {
this->genCode();
}
DECLARE_JIT_CODE(SgdJitCode);
void genCode() override;
private:
int w_;
reg64_t param_lr{abi_param1};
reg64_t param_param{abi_param2};
reg64_t param_grad{abi_param3};
reg64_t param_rows{abi_param4};
reg64_t param_out{abi_param5};
reg64_t param_attr{abi_param6};
ymm_t ymm_lr = ymm_t(15);
reg64_t reg_ptr_grad_i{r10};
reg64_t reg_ptr_rows_i{r11};
reg64_t reg_rows_size_in_byte{r12};
reg64_t reg_row{r13};
reg64_t reg_ptr_param_i{r14};
reg64_t reg_ptr_out_i{r15};
};
} // namespace gen
} // namespace jit
} // namespace operators
} // namespace paddle
......@@ -55,6 +55,7 @@ const char* to_string(KernelType kt) {
ONE_CASE(kHSum);
ONE_CASE(kSoftmax);
ONE_CASE(kEmbSeqPool);
ONE_CASE(kSgd);
default:
PADDLE_THROW("Not support type: %d, or forget to add it.", kt);
return "NOT JITKernel";
......
......@@ -181,6 +181,14 @@ inline std::ostream& operator<<(std::ostream& os,
return os;
}
inline std::ostream& operator<<(std::ostream& os, const sgd_attr_t& attr) {
os << "param_height[" << attr.param_height << "],param_width["
<< attr.param_width << "],grad_height[" << attr.grad_height
<< "],grad_width[" << attr.grad_width << "],selected_rows_size["
<< attr.selected_rows_size << "]";
return os;
}
inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) {
os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]";
return os;
......
......@@ -46,6 +46,7 @@ typedef enum {
kVMul,
kVRelu,
kVScal,
kSgd,
kVSigmoid,
kVSquare,
kVSub,
......@@ -173,6 +174,28 @@ struct EmbSeqPoolTuples {
const emb_seq_pool_attr_t*);
};
typedef struct sgd_attr_s {
int64_t param_height, param_width;
int64_t grad_height, grad_width;
int64_t selected_rows_size;
sgd_attr_s() = default;
explicit sgd_attr_s(int64_t param_h, int64_t param_w, int64_t grad_h,
int64_t grad_w, int64_t selected_rows_sz)
: param_height(param_h),
param_width(param_w),
grad_height(grad_h),
grad_width(grad_w),
selected_rows_size(selected_rows_sz) {}
} sgd_attr_t;
template <typename T>
struct SgdTuples {
typedef T data_type;
typedef sgd_attr_t attr_type;
typedef void (*func_type)(const T*, const T*, const T*, const int64_t*, T*,
const sgd_attr_t*);
};
typedef struct matmul_attr_s {
int m, n, k;
void* packed_weight{nullptr};
......
......@@ -13,6 +13,7 @@
* limitations under the License. */
#include "paddle/fluid/operators/jit/kernel_key.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
......@@ -23,14 +24,30 @@ size_t JitCodeKey<int>(const int& d) {
return d;
}
// TODO(TJ): refine and benchmark JitCodeKey generatation
constexpr int act_type_shift = 3; // suppot 2^3 act types
static inline int act_type_convert(KernelType type) {
if (type == kVIdentity) {
return 0;
} else if (type == kVExp) {
return 1;
} else if (type == kVRelu) {
return 2;
} else if (type == kVSigmoid) {
return 3;
} else if (type == kVTanh) {
return 4;
}
PADDLE_THROW("Unsupported act type %d", type);
return 0;
}
template <>
size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
size_t key = attr.d;
int gate_key = static_cast<int>(attr.act_gate) << 1;
int cand_key = static_cast<int>(attr.act_cand) << (1 + act_type_shift);
int cell_key = static_cast<int>(attr.act_cell) << (1 + act_type_shift * 2);
int gate_key = act_type_convert(attr.act_gate) << 1;
int cand_key = act_type_convert(attr.act_cand) << (1 + act_type_shift);
int cell_key = act_type_convert(attr.act_cell) << (1 + act_type_shift * 2);
return (key << (1 + act_type_shift * 3)) + gate_key + cand_key + cell_key +
attr.use_peephole;
}
......@@ -38,8 +55,8 @@ size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
template <>
size_t JitCodeKey<gru_attr_t>(const gru_attr_t& attr) {
size_t key = attr.d;
return (key << (act_type_shift * 2)) + static_cast<int>(attr.act_gate) +
(static_cast<int>(attr.act_cand) << act_type_shift);
return (key << (act_type_shift * 2)) + act_type_convert(attr.act_gate) +
(act_type_convert(attr.act_cand) << act_type_shift);
}
template <>
......@@ -61,6 +78,11 @@ size_t JitCodeKey<emb_seq_pool_attr_t>(const emb_seq_pool_attr_t& attr) {
return attr.table_width;
}
template <>
size_t JitCodeKey<sgd_attr_t>(const sgd_attr_t& attr) {
return attr.grad_width;
}
} // namespace jit
} // namespace operators
} // namespace paddle
......@@ -14,3 +14,4 @@ USE_JITKERNEL_MORE(kVTanh, mkl)
USE_JITKERNEL_MORE(kSeqPool, mkl)
USE_JITKERNEL_MORE(kSoftmax, mkl)
USE_JITKERNEL_MORE(kEmbSeqPool, mkl)
USE_JITKERNEL_MORE(kSgd, mkl)
......@@ -184,6 +184,16 @@ bool EmbSeqPoolKernel<double>::UseMe(const emb_seq_pool_attr_t& attr) const {
return true;
}
template <>
bool SgdKernel<float>::UseMe(const sgd_attr_t& attr) const {
return true;
}
template <>
bool SgdKernel<double>::UseMe(const sgd_attr_t& attr) const {
return true;
}
template <>
bool MatMulKernel<float>::UseMe(const matmul_attr_t& attr) const {
return platform::MayIUse(platform::avx);
......@@ -239,5 +249,6 @@ REGISTER_MKL_KERNEL(kVTanh, VTanh);
REGISTER_MKL_KERNEL(kSeqPool, SeqPool);
REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool);
REGISTER_MKL_KERNEL(kSoftmax, Softmax);
REGISTER_MKL_KERNEL(kSgd, Sgd);
#undef REGISTER_MKL_KERNEL
......@@ -142,6 +142,32 @@ void Softmax(const T* x, T* y, int n, int bs) {
}
}
template <typename T>
void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows,
T* out, const sgd_attr_t* attr) {
PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width);
PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height);
T scalar = -lr[0];
int width = attr->grad_width;
if (out == param) {
for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
auto h_idx = rows[i];
PADDLE_ENFORCE_LT(h_idx, attr->param_height);
PADDLE_ENFORCE_GE(h_idx, 0);
VAXPY(scalar, grad + i * width, out + h_idx * width, width);
}
} else {
for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
auto h_idx = rows[i];
PADDLE_ENFORCE_LT(h_idx, attr->param_height);
PADDLE_ENFORCE_GE(h_idx, 0);
VScal(&scalar, grad + i * width, out + h_idx * width, width);
VAdd(param + h_idx * width, out + h_idx * width, out + h_idx * width,
width);
}
}
}
#define DECLARE_MKL_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public KernelMore<tuples<T>> { \
......@@ -173,6 +199,8 @@ DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples);
DECLARE_MKL_KERNEL(Sgd, SgdTuples);
#undef DECLARE_MKL_KERNEL
} // namespace mkl
......
......@@ -33,3 +33,4 @@ USE_JITKERNEL_REFER(kHSum)
USE_JITKERNEL_REFER(kHMax)
USE_JITKERNEL_REFER(kSoftmax)
USE_JITKERNEL_REFER(kEmbSeqPool)
USE_JITKERNEL_REFER(kSgd)
......@@ -59,4 +59,6 @@ REGISTER_REFER_KERNEL(kSoftmax, Softmax);
REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool);
REGISTER_REFER_KERNEL(kSgd, Sgd);
#undef REGISTER_REFER_KERNEL
......@@ -446,6 +446,36 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out,
}
}
// SGD algorithm:
// lr is pointor of learning rate scalar
// param is an input matrix with (param_h, param_w)
// grad is an input matrix with (grad_h, grad_w), here grad_w == param_w
// selected_rows is a vectot<int64_t> with size selected_rows_size( <= grad_h )
// out is an output matrix with (param_h, param_w)
//
// support both regular and sparse grad
// regular SGD: out[:] = param[:] - lr[0] * grad[:];
// sparse SGD: out[rows[i]][:] = param[rows[i]][:] - lr[0] * grad[i][:]
//
// Note: when use sparse SGD, and if out != param,
// the out rows which are not selected have not beed changed, which maybe empty
template <typename T>
void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows,
T* out, const sgd_attr_t* attr) {
PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width);
PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height);
for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
auto h_idx = rows[i];
PADDLE_ENFORCE_LT(h_idx, attr->param_height);
PADDLE_ENFORCE_GE(h_idx, 0);
for (int64_t j = 0; j < attr->grad_width; ++j) {
out[h_idx * attr->grad_width + j] =
param[h_idx * attr->grad_width + j] -
lr[0] * grad[i * attr->grad_width + j];
}
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \
......@@ -496,6 +526,8 @@ DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples);
DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_REFER_KERNEL(Sgd, SgdTuples);
#undef DECLARE_REFER_KERNEL
} // namespace refer
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <random>
#include <string>
#include <vector>
......@@ -36,14 +37,14 @@ void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
}
template <typename T>
void ExpectEQ(const T* target, const T* refer, int n) {
void ExpectEQ(const T* target, const T* refer, size_t n) {
if (std::is_floating_point<T>::value) {
for (int i = 0; i < n; ++i) {
EXPECT_NEAR(target[i], refer[i], FLAGS_acc);
for (size_t i = 0; i < n; ++i) {
EXPECT_NEAR(target[i], refer[i], FLAGS_acc) << " at index : " << i;
}
} else {
for (int i = 0; i < n; ++i) {
EXPECT_EQ(target[i], refer[i]);
for (size_t i = 0; i < n; ++i) {
EXPECT_EQ(target[i], refer[i]) << " at index : " << i;
}
}
}
......@@ -296,6 +297,45 @@ struct TestFuncWithRefer<jit::EmbSeqPoolTuples<T>, std::vector<T>,
}
};
template <typename T>
struct TestFuncWithRefer<jit::SgdTuples<T>, T, std::vector<T>, std::vector<T>,
std::vector<int64_t>, std::vector<T>,
typename jit::SgdTuples<T>::attr_type> {
void operator()(const typename jit::SgdTuples<T>::func_type tgt, const T lr,
const std::vector<T>& param, const std::vector<T>& grad,
const std::vector<int64_t>& rows, const std::vector<T>& oref,
const typename jit::SgdTuples<T>::attr_type& attr) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(param.size(),
static_cast<size_t>(attr.param_height * attr.param_width));
EXPECT_EQ(grad.size(),
static_cast<size_t>(attr.grad_height * attr.grad_width));
EXPECT_EQ(rows.size(), static_cast<size_t>(attr.selected_rows_size));
EXPECT_EQ(param.size(), oref.size());
const T* param_data = param.data();
const T* grad_data = grad.data();
const int64_t* rows_data = rows.data();
const T* oref_data = oref.data();
std::vector<T> out(oref.size());
T* o_data = out.data();
tgt(&lr, param_data, grad_data, rows_data, o_data, &attr);
// only the selected rows should be equal
for (size_t i = 0; i < rows.size(); ++i) {
ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
oref_data + rows[i] * attr.grad_width, attr.grad_width);
}
// inplace
std::copy(param.begin(), param.end(), out.begin());
tgt(&lr, o_data, grad_data, rows_data, o_data, &attr);
for (size_t i = 0; i < rows.size(); ++i) {
ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
oref_data + rows[i] * attr.grad_width, attr.grad_width);
}
}
};
template <typename T>
struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
std::vector<T>,
......@@ -407,7 +447,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestXYZNKernel() {
void TestKernelXYZNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::XYZNTuples<T>>();
......@@ -440,7 +480,7 @@ void TestXYZNKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestAXYNKernel() {
void TestKernelAXYNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::AXYNTuples<T>>();
......@@ -466,7 +506,7 @@ void TestAXYNKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestXRNKernel() {
void TestKernelXRNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
auto last_acc = FLAGS_acc;
FLAGS_acc = 1e-4;
......@@ -484,7 +524,7 @@ void TestXRNKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestXYNKernel() {
void TestKernelXYNTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int d : TestSizes()) {
auto ref = jit::GetRefer<KT, jit::XYNTuples<T>>();
......@@ -509,10 +549,12 @@ void TestXYNKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestLSTMKernel() {
void TestKernelLSTMTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
for (int d : TestSizes()) {
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int d : test_sizes) {
for (bool use_peephole : {true, false}) {
for (auto& act_gate : all_acts) {
for (auto& act_cand : all_acts) {
......@@ -559,10 +601,12 @@ void TestLSTMKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestGRUKernel() {
void TestKernelGRUTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
for (int d : TestSizes()) {
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int d : test_sizes) {
for (auto& act_gate : all_acts) {
for (auto& act_cand : all_acts) {
const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate),
......@@ -593,14 +637,16 @@ void TestGRUKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestSeqPoolKernel() {
void TestKernelSeqPoolTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
std::vector<jit::SeqPoolType> pool_types = {
jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt};
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (auto type : pool_types) {
for (int w : TestSizes()) {
for (int w : test_sizes) {
jit::seq_pool_attr_t attr(w, type);
for (int h : TestSizes()) {
for (int h : test_sizes) {
attr.h = h;
auto ref = jit::GetRefer<KT, jit::SeqPoolTuples<T>>();
EXPECT_TRUE(ref != nullptr);
......@@ -618,11 +664,11 @@ void TestSeqPoolKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestMatMulKernel() {
void TestKernelMatMulTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
auto last_acc = FLAGS_acc;
// TODO(intel): fix MKL acc issue
// https://github.com/PaddlePaddle/Paddle/issues/15447
// export MKL_CBWR=AVX would make MKL force to use AVX
// export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic
FLAGS_acc = 1e-3;
for (int m : {1, 2, 3, 4}) {
for (int n : {1, 2, 3, 4}) {
......@@ -646,7 +692,7 @@ void TestMatMulKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestSoftmaxKernel() {
void TestKernelSoftmaxTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
for (int bs : {1, 2, 10}) {
for (int n : TestSizes()) {
......@@ -671,12 +717,14 @@ void TestSoftmaxKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestEmbSeqPoolKernel() {
void TestKernelEmbSeqPoolTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
int64_t tbl_h = 1e4;
std::vector<jit::SeqPoolType> pool_types = {
jit::SeqPoolType::kSum}; // only support sum yet
for (int tbl_w : TestSizes()) {
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int tbl_w : test_sizes) {
std::vector<T> table(tbl_h * tbl_w);
RandomVec<T>(tbl_h * tbl_w, table.data(), -2.f, 2.f);
const T* table_data = table.data();
......@@ -705,7 +753,61 @@ void TestEmbSeqPoolKernel() {
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestNCHW16CMulNCKernel() {
void TestKernelSgdTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
const T lr = 0.1;
auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
const int64_t upper) -> std::vector<int64_t> {
PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1);
PADDLE_ENFORCE_GT(n, 0);
std::vector<int64_t> all, out;
for (int i = 0; i < n; ++i) {
all.push_back(i);
}
std::random_shuffle(all.begin(), all.end());
out.insert(out.begin(), all.begin(), all.begin() + n);
return out;
};
for (int param_h : {1, 10}) {
for (int grad_w : TestSizes()) {
std::vector<T> param(param_h * grad_w);
std::vector<T> param_out(param_h * grad_w);
RandomVec<T>(param_h * grad_w, param.data(), -2.f, 2.f);
const T* param_data = param.data();
T* out_data = param_out.data();
for (int rows_size = 1; rows_size <= param_h; ++rows_size) {
std::vector<T> grad(rows_size * grad_w);
std::vector<int64_t> rows =
UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
RandomVec<T>(rows_size * grad_w, grad.data(), -2.f, 2.f);
const int64_t* rows_data = rows.data();
const T* grad_data = grad.data();
auto ref = jit::GetRefer<KT, jit::SgdTuples<T>>();
EXPECT_TRUE(ref != nullptr);
jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
ref(&lr, param_data, grad_data, rows_data, out_data, &attr);
// inplace test
std::vector<T> inp(param.size());
std::copy(param.begin(), param.end(), inp.begin());
T* inp_data = inp.data();
ref(&lr, inp_data, grad_data, rows_data, inp_data, &attr);
// only the selected rows should be equal
for (int i = 0; i < rows_size; ++i) {
ExpectEQ<T>(inp_data + rows[i] * grad_w, out_data + rows[i] * grad_w,
grad_w);
}
TestAllImpls<KT, jit::SgdTuples<T>, PlaceType, T, std::vector<T>,
std::vector<T>, std::vector<int64_t>, std::vector<T>>(
attr, lr, param, grad, rows, param_out, attr);
}
}
}
}
template <jit::KernelType KT, typename T, typename PlaceType>
void TestKernelNCHW16CMulNCTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
const int n = 3, c = 16 * 4, h = 10, w = 10;
auto ref = jit::GetRefer<KT, jit::NCHW16CMulNCTuples<T>>();
......@@ -758,7 +860,7 @@ void TestNCHW16CMulNCKernel() {
}
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestLayerNormKernel() {
void TestKernelLayerNormTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
const T epsilon = 9.99999975e-06;
for (int n : {1, 2, 10}) {
......@@ -797,11 +899,13 @@ void TestLayerNormKernel() {
}
template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
void TestCRFDecodingKernel() {
void TestKernelCRFDecodingTuples() {
VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
constexpr int state_trans_base_idx = 2;
auto test_sizes = TestSizes();
test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
for (int seq_len : {1, 11, 17, 50}) {
for (int tag_num : TestSizes()) {
for (int tag_num : test_sizes) {
auto ref = jit::GetRefer<KT, jit::CRFDecodingTuples<T>>();
EXPECT_TRUE(ref != nullptr);
int x_sz = seq_len * tag_num;
......@@ -822,143 +926,76 @@ void TestCRFDecodingKernel() {
}
}
// XYZNTuple
TEST(JITKernel, kVMul) {
TestXYZNKernel<jit::kVMul, float, CPUPlace>();
TestXYZNKernel<jit::kVMul, double, CPUPlace>();
}
TEST(JITKernel, kVAdd) {
TestXYZNKernel<jit::kVAdd, float, CPUPlace>();
TestXYZNKernel<jit::kVAdd, double, CPUPlace>();
}
TEST(JITKernel, kVAddRelu) {
TestXYZNKernel<jit::kVAddRelu, float, CPUPlace>();
TestXYZNKernel<jit::kVAddRelu, double, CPUPlace>();
}
TEST(JITKernel, kVSub) {
TestXYZNKernel<jit::kVSub, float, CPUPlace>();
TestXYZNKernel<jit::kVSub, double, CPUPlace>();
}
// AXYNTuples
TEST(JITKernel, kVScal) {
TestAXYNKernel<jit::kVScal, float, CPUPlace>();
TestAXYNKernel<jit::kVScal, double, CPUPlace>();
}
TEST(JITKernel, kVAddBias) {
TestAXYNKernel<jit::kVAddBias, float, CPUPlace>();
TestAXYNKernel<jit::kVAddBias, double, CPUPlace>();
}
// XRNTuples
TEST(JITKernel, kHMax) {
TestXRNKernel<jit::kHMax, float, CPUPlace>();
TestXRNKernel<jit::kHMax, double, CPUPlace>();
}
TEST(JITKernel, kHSum) {
TestXRNKernel<jit::kHSum, float, CPUPlace>();
TestXRNKernel<jit::kHSum, double, CPUPlace>();
}
// XYNTuples
TEST(JITKernel, kVRelu) {
TestXYNKernel<jit::kVRelu, float, CPUPlace>();
TestXYNKernel<jit::kVRelu, double, CPUPlace>();
}
TEST(JITKernel, kVIdentity) {
TestXYNKernel<jit::kVIdentity, float, CPUPlace>();
TestXYNKernel<jit::kVIdentity, double, CPUPlace>();
}
TEST(JITKernel, kVSquare) {
TestXYNKernel<jit::kVSquare, float, CPUPlace>();
TestXYNKernel<jit::kVSquare, double, CPUPlace>();
}
TEST(JITKernel, kVExp) {
TestXYNKernel<jit::kVExp, float, CPUPlace>();
TestXYNKernel<jit::kVExp, double, CPUPlace>();
}
TEST(JITKernel, kVSigmoid) {
TestXYNKernel<jit::kVSigmoid, float, CPUPlace>();
TestXYNKernel<jit::kVSigmoid, double, CPUPlace>();
}
#define TEST_CPU_KERNEL(test_tuple, kernel_type) \
TEST(JITKernel, kernel_type) { \
TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
}
TEST(JITKernel, kVTanh) {
TestXYNKernel<jit::kVTanh, float, CPUPlace>();
TestXYNKernel<jit::kVTanh, double, CPUPlace>();
}
TEST_CPU_KERNEL(XYZNTuples, kVMul);
TEST_CPU_KERNEL(XYZNTuples, kVAdd);
TEST_CPU_KERNEL(XYZNTuples, kVAddRelu);
TEST_CPU_KERNEL(XYZNTuples, kVSub);
// LSTM
TEST(JITKernel, kLSTMCtHt) {
TestLSTMKernel<jit::kLSTMCtHt, float, CPUPlace>();
TestLSTMKernel<jit::kLSTMCtHt, double, CPUPlace>();
}
TEST_CPU_KERNEL(AXYNTuples, kVScal);
TEST_CPU_KERNEL(AXYNTuples, kVAddBias);
TEST(JITKernel, kLSTMC1H1) {
TestLSTMKernel<jit::kLSTMC1H1, float, CPUPlace>();
TestLSTMKernel<jit::kLSTMC1H1, double, CPUPlace>();
}
TEST_CPU_KERNEL(XRNTuples, kHMax);
TEST_CPU_KERNEL(XRNTuples, kHSum);
// GRU
TEST(JITKernel, kGRUH1) {
TestGRUKernel<jit::kGRUH1, float, CPUPlace>();
TestGRUKernel<jit::kGRUH1, double, CPUPlace>();
}
TEST_CPU_KERNEL(XYNTuples, kVRelu);
TEST_CPU_KERNEL(XYNTuples, kVIdentity);
TEST_CPU_KERNEL(XYNTuples, kVSquare);
TEST_CPU_KERNEL(XYNTuples, kVExp);
TEST_CPU_KERNEL(XYNTuples, kVSigmoid);
TEST_CPU_KERNEL(XYNTuples, kVTanh);
TEST(JITKernel, kGRUHtPart1) {
TestGRUKernel<jit::kGRUHtPart1, float, CPUPlace>();
TestGRUKernel<jit::kGRUHtPart1, double, CPUPlace>();
}
TEST_CPU_KERNEL(LSTMTuples, kLSTMCtHt);
TEST_CPU_KERNEL(LSTMTuples, kLSTMC1H1);
TEST(JITKernel, kGRUHtPart2) {
TestGRUKernel<jit::kGRUHtPart2, float, CPUPlace>();
TestGRUKernel<jit::kGRUHtPart2, double, CPUPlace>();
}
TEST_CPU_KERNEL(GRUTuples, kGRUH1);
TEST_CPU_KERNEL(GRUTuples, kGRUHtPart1);
TEST_CPU_KERNEL(GRUTuples, kGRUHtPart2);
TEST(JITKernel, kSeqPool) {
TestSeqPoolKernel<jit::kSeqPool, float, CPUPlace>();
TestSeqPoolKernel<jit::kSeqPool, double, CPUPlace>();
}
TEST_CPU_KERNEL(NCHW16CMulNCTuples, kNCHW16CMulNC);
TEST(JITKernel, kMatMul) {
TestMatMulKernel<jit::kMatMul, float, CPUPlace>();
TestMatMulKernel<jit::kMatMul, double, CPUPlace>();
}
TEST_CPU_KERNEL(SeqPoolTuples, kSeqPool);
TEST_CPU_KERNEL(MatMulTuples, kMatMul);
TEST_CPU_KERNEL(SoftmaxTuples, kSoftmax);
TEST_CPU_KERNEL(EmbSeqPoolTuples, kEmbSeqPool);
TEST_CPU_KERNEL(SgdTuples, kSgd);
TEST_CPU_KERNEL(LayerNormTuples, kLayerNorm);
TEST_CPU_KERNEL(CRFDecodingTuples, kCRFDecoding);
TEST(JITKernel, kSoftmax) {
TestSoftmaxKernel<jit::kSoftmax, float, CPUPlace>();
TestSoftmaxKernel<jit::kSoftmax, double, CPUPlace>();
}
TEST(JITKernel_key, lstm) {
jit::lstm_attr_t attr1(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
jit::lstm_attr_t attr2(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
jit::lstm_attr_t attr3(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
jit::lstm_attr_t attr4(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh);
TEST(JITKernel, kEmbSeqPool) {
TestEmbSeqPoolKernel<jit::kEmbSeqPool, float, CPUPlace>();
TestEmbSeqPoolKernel<jit::kEmbSeqPool, double, CPUPlace>();
}
auto key1 = jit::JitCodeKey<jit::lstm_attr_t>(attr1);
auto key2 = jit::JitCodeKey<jit::lstm_attr_t>(attr2);
auto key3 = jit::JitCodeKey<jit::lstm_attr_t>(attr3);
auto key4 = jit::JitCodeKey<jit::lstm_attr_t>(attr4);
TEST(JITKernel, kNCHW16CMulNC) {
TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, float, CPUPlace>();
TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, double, CPUPlace>();
EXPECT_TRUE(key1 != key2);
EXPECT_TRUE(key2 == key3);
EXPECT_TRUE(key3 != key4);
}
TEST(JITKernel, kLayerNorm) {
TestLayerNormKernel<jit::kLayerNorm, float, paddle::platform::CPUPlace>();
TestLayerNormKernel<jit::kLayerNorm, double, paddle::platform::CPUPlace>();
}
TEST(JITKernel_key, gru) {
jit::gru_attr_t attr1(8, jit::kVSigmoid, jit::kVTanh);
jit::gru_attr_t attr2(9, jit::kVSigmoid, jit::kVTanh);
jit::gru_attr_t attr3(9, jit::kVSigmoid, jit::kVTanh);
jit::gru_attr_t attr4(9, jit::kVSigmoid, jit::kVIdentity);
TEST(JITKernel, kCRFDecoding) {
TestCRFDecodingKernel<jit::kCRFDecoding, float, paddle::platform::CPUPlace>();
TestCRFDecodingKernel<jit::kCRFDecoding, double,
paddle::platform::CPUPlace>();
}
auto key1 = jit::JitCodeKey<jit::gru_attr_t>(attr1);
auto key2 = jit::JitCodeKey<jit::gru_attr_t>(attr2);
auto key3 = jit::JitCodeKey<jit::gru_attr_t>(attr3);
auto key4 = jit::JitCodeKey<jit::gru_attr_t>(attr4);
TEST(JITKernel, pool) {
// TODO(TJ): add some test
EXPECT_TRUE(key1 != key2);
EXPECT_TRUE(key2 == key3);
EXPECT_TRUE(key3 != key4);
}
// TODO(TJ): add more test about key and pool
......@@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam(
__syncthreads();
}
if ((num_used_threads & 0x1) != 0) {
// If num_used_threads is a odd number, merge local top_beam of thread 0
// and num_used_threads - 1
if (tid_of_seq == 0) {
int index_in_sh = (num_used_threads - 1 + tid) * beam_size;
for (int i = 0; i < beam_size; i++) {
Insert(top_beam_local, top_beam[index_in_sh], beam_size);
index_in_sh++;
}
}
}
num_used_threads = num_used_threads >> 1;
if (tid_of_seq < num_used_threads) {
int index_in_sh = (num_used_threads + tid) * beam_size;
......
......@@ -184,6 +184,9 @@ class Blas {
template <typename T>
void VINV(int n, const T* a, T* y) const;
template <typename T>
void VMERF(int n, const T* a, T* y, int64_t mode) const;
private:
const DeviceContext& context_;
};
......@@ -290,6 +293,11 @@ class BlasT : private Blas<DeviceContext> {
Base()->template VINV<T>(args...);
}
template <typename... ARGS>
void VMERF(ARGS... args) const {
Base()->template VMERF<T>(args...);
}
private:
const Blas<DeviceContext>* Base() const {
return static_cast<const Blas<DeviceContext>*>(this);
......
......@@ -123,6 +123,11 @@ struct CBlas<float> {
static void VINV(ARGS... args) {
platform::dynload::vsInv(args...);
}
template <typename... ARGS>
static void VMERF(ARGS... args) {
platform::dynload::vmsErf(args...);
}
};
template <>
......@@ -223,6 +228,11 @@ struct CBlas<double> {
static void VINV(ARGS... args) {
platform::dynload::vdInv(args...);
}
template <typename... ARGS>
static void VMERF(ARGS... args) {
platform::dynload::vmdErf(args...);
}
};
#else
......@@ -625,6 +635,19 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
#endif
}
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::VMERF(int n, const T *a, T *y,
int64_t mode) const {
#ifdef PADDLE_WITH_MKLML
CBlas<T>::VMERF(n, a, y, mode);
#else
for (int i = 0; i < n; ++i) {
y[i] = std::erf(a[i]);
}
#endif
}
} // namespace math
} // namespace operators
} // namespace paddle
......@@ -96,8 +96,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
std::vector<int> src_tz = framework::vectorize2int(x->dims());
auto src_format =
src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format();
auto src_format = x->format();
const std::string key = gethash(src_tz, algorithm);
const std::string key_src_data =
......@@ -127,10 +126,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
if (p_fwd == nullptr) {
// create mkldnn memory for input X
auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), src_format);
auto src_memory = std::shared_ptr<memory>(
new memory({src_md, mkldnn_engine}, to_void_cast(x_data)));
new memory(x->get_mkldnn_prim_desc(), to_void_cast(x_data)));
// save src_memory to be referred in backward path
dev_ctx.SetBlob(key_src_mem, src_memory);
......@@ -177,8 +174,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
pipeline.push_back(*p_fwd);
stream(stream::kind::eager).submit(pipeline).wait();
y->set_layout(DataLayout::kMKLDNN);
y->set_format(GetMKLDNNFormat(*dst_memory));
y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc());
}
template <typename T>
......@@ -196,9 +192,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims());
auto diff_y_format =
diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format();
const std::string key = gethash(diff_dst_tz, algorithm);
const std::string key_src_data =
key + ctx.op().Input("Out") + "@eltwise_fwd_src_data";
......@@ -210,8 +203,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem";
const std::string key_fwd_pd =
key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd";
const std::string key_with_layouts =
key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format);
const std::string key_with_layouts = key + std::to_string(*p_src_layout) +
"-" + std::to_string(diff_y->format());
const std::string key_diff_src_mem =
key_with_layouts + "@eltwise_diff_src_mem";
const std::string key_diff_dst_mem =
......@@ -234,10 +227,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
if (p_grad == nullptr) {
// create mkldnn memory for input diff_y
auto diff_dst_md = platform::MKLDNNMemDesc(
diff_dst_tz, platform::MKLDNNGetDataType<T>(), diff_y_format);
auto diff_dst_memory = std::shared_ptr<memory>(
new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data)));
new memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data)));
dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory);
// retrieve eltwise primitive desc from device context
......@@ -281,8 +272,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
pipeline.push_back(*p_grad);
stream(stream::kind::eager).submit(pipeline).wait();
diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format(GetMKLDNNFormat(*diff_src_memory));
diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
}
template <typename T, mkldnn::algorithm algorithm>
......
......@@ -206,17 +206,14 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu;
// create mkldnn memory from input x tensor
mkldnn::memory::format input_format =
platform::MKLDNNFormatForSize(src_tz.size(), x->format());
// keys for backward pass
const std::string key = BatchNormMKLDNNHandler::GetHash(
src_tz, epsilon, flags, global_stats, input_format,
src_tz, epsilon, flags, global_stats, x->format(),
ctx.op().Output("SavedMean"));
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
auto user_src_md = platform::MKLDNNMemDesc(
{src_tz}, platform::MKLDNNGetDataType<T>(), input_format);
auto user_src_md = x->get_mkldnn_prim_desc().desc();
// create primitive descriptor for batch norm forward
using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>;
......@@ -230,8 +227,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine,
key);
auto src_memory =
handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data));
auto src_memory = handler.AcquireSrcMemory(x->get_mkldnn_prim_desc(),
to_void_cast(x_data));
// crate mkldnn memory for weights(scale/shift)
auto scaleshift_memory =
......@@ -265,8 +262,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
variance_memory, false);
}
y->set_layout(DataLayout::kMKLDNN);
y->set_format(platform::GetMKLDNNFormat(*dst_memory));
y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc());
std::vector<mkldnn::primitive> pipeline;
pipeline.push_back(*batch_norm_p);
......@@ -336,9 +332,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>;
mkldnn::memory::format dst_format =
platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format());
mkldnn::memory::format input_format =
platform::MKLDNNFormatForSize(src_tz.size(), x->format());
......@@ -346,14 +339,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// keys from forward pass
const std::string key = BatchNormMKLDNNHandler::GetHash(
src_tz, epsilon, flags, false, input_format,
src_tz, epsilon, flags, false, x->format(),
ctx.op().Input("SavedMean"));
const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
// keys for primitives reuse
const std::string key_with_hash =
key + BatchNormMKLDNNHandler::GetHash(src_tz, epsilon, flags, false,
input_format);
x->format());
const std::string key_batch_norm_bwd_p =
key_with_hash + "@batch_norm_bwd_p";
const std::string key_batch_norm_src_mem_p =
......@@ -373,9 +366,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
primitive reorder_diff_dst;
bool is_diff_dst_reordered = false;
auto user_diff_dst_memory = memory(
{{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine},
to_void_cast(diff_y_data));
auto user_diff_dst_memory =
memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data));
// MKLDNN requires a single piece of memory for scale and shift/bias data
const size_t scaleshift_size = 2 * ic;
......@@ -459,10 +451,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
dev_ctx.SetBlob(key_batch_norm_diff_dst_mem_p, diff_dst_memory);
// set layout/format of output tensors
diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc()
.desc()
.data.format);
diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
} else {
// primitives already exist
UpdateMemoryData(dev_ctx, key_batch_norm_src_mem_p, to_void_cast(x_data));
......@@ -487,10 +476,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
}
// set layout/format of output tensors
diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc()
.desc()
.data.format);
diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
}
// execute optional reorder and batch_norm backward primitive
......
......@@ -47,11 +47,6 @@ static memory::primitive_desc CreateMemPrimDesc(const Tensor& input,
return mem_prim_desc;
}
static mkldnn::memory::format GetDstMemFormat(
const concat::primitive_desc& concat_pd) {
return (memory::format)concat_pd.dst_primitive_desc().desc().data.format;
}
static platform::CPUPlace GetCpuPlace(
const paddle::framework::ExecutionContext& ctx) {
auto place = ctx.GetPlace();
......@@ -139,8 +134,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto concat = prim_creator.CreateConcatPrimitive(concat_pd, output, place);
stream(stream::kind::eager).submit({concat}).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetDstMemFormat(concat_pd));
output->set_mkldnn_prim_desc(concat_pd.dst_primitive_desc());
}
};
} // namespace operators
......
......@@ -282,8 +282,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline.push_back(*conv_p);
stream(stream::kind::eager).submit(pipeline).wait();
auto dst_mpd = dst_memory_p->get_primitive_desc();
output->set_mkldnn_prim_desc(dst_mpd);
output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc());
}
void ComputeINT8(const paddle::framework::ExecutionContext& ctx) const {
const bool is_test = ctx.Attr<bool>("is_test");
......@@ -972,8 +971,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
pipeline.push_back(*conv_bwd_data_p);
input_grad->set_layout(DataLayout::kMKLDNN);
input_grad->set_format(GetMKLDNNFormat(*diff_src_memory_p));
input_grad->set_mkldnn_prim_desc(diff_src_memory_p->get_primitive_desc());
}
stream(stream::kind::eager).submit(pipeline).wait();
}
......@@ -991,12 +989,12 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
::paddle::platform::CPUPlace, U8,
ops::kConvMKLDNNFP32,
ops::kConvMKLDNNINT8,
ops::ConvMKLDNNOpKernel<uint8_t, float>);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
::paddle::platform::CPUPlace, S8,
ops::kConvMKLDNNFP32,
ops::kConvMKLDNNINT8,
ops::ConvMKLDNNOpKernel<int8_t, float>);
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d_grad, MKLDNN,
......
......@@ -221,8 +221,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
pipeline.push_back(*conv_p);
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc());
}
private:
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace paddle {
namespace operators {
......@@ -30,6 +31,18 @@ using framework::DataLayout;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
const mkldnn::memory::data_type& src_dt,
const std::vector<int>& src_tz, const float scale_data) {
std::string key;
key.reserve(platform::MKLDNNHandler::MaxKeyLength);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(src_dt));
platform::MKLDNNHandler::AppendKeyDims(&key, src_tz);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data));
platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output"));
return key;
}
template <typename T>
class DeQuantOpKernel : public framework::OpKernel<T> {
public:
......@@ -51,31 +64,55 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
mkldnn::memory::data_type src_dt =
paddle::framework::ToMKLDNNDataType(input->type());
mkldnn::memory::format src_fmt = input->format();
std::string key = CreateKey(ctx, src_dt, src_tz, reorder_scale[0]);
const std::string key_prim = key + "@reorder_p";
const std::string key_src_mem = key + "@src_mem";
const std::string key_dst_mem = key + "@dst_mem";
std::shared_ptr<mkldnn::memory> src_memory;
std::shared_ptr<mkldnn::memory> dst_memory;
std::shared_ptr<reorder> reorder_p;
reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
if (reorder_p == nullptr) {
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
memory::format::nchw);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
dst_memory = std::make_shared<mkldnn::memory>(
dst_pd, to_void_cast<float>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, *dst_memory));
dev_ctx.SetBlob(key_prim, reorder_p);
dev_ctx.SetBlob(key_src_mem, src_memory);
dev_ctx.SetBlob(key_dst_mem, dst_memory);
} else {
src_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_src_mem));
src_memory->set_data_handle(to_void_cast<T>(input_data));
dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem));
dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
}
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, reorder_scale);
auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
memory::format::nchw);
auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<float>(output_data));
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, dst_pd, attri));
auto reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_format(GetMKLDNNFormat(dst_memory));
output->set_format(GetMKLDNNFormat(*dst_memory));
}
};
......
......@@ -81,10 +81,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto e_mid = framework::EigenTensor<T, 4>::From(*mid);
e_mid = e_mid.constant(k);
auto dims = paddle::framework::vectorize2int(x->dims());
auto src_md = paddle::platform::MKLDNNMemDesc(
dims, mkldnn::memory::data_type::f32, x->format());
auto src_md = x->get_mkldnn_prim_desc().desc();
auto forward_desc = mkldnn::lrn_forward::desc{mkldnn::prop_kind::forward,
mkldnn::lrn_across_channels,
......@@ -94,7 +91,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
beta,
k};
auto src_memory_pd = mkldnn::memory::primitive_desc{src_md, mkldnn_engine};
auto src_memory_pd = x->get_mkldnn_prim_desc();
if (!is_test) {
const std::string key = ctx.op().Output("Out");
......@@ -111,16 +108,15 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
src_memory->set_data_handle(
static_cast<void*>(const_cast<T*>(input_data)));
auto dst_memory = mkldnn::memory(forward_pd->dst_primitive_desc(),
static_cast<void*>(output_data));
auto dst_memory_pd = forward_pd->dst_primitive_desc();
auto dst_memory =
mkldnn::memory(dst_memory_pd, static_cast<void*>(output_data));
auto workspace_memory = insert_to_context<mkldnn::memory>(
key_workspace_memory, dev_ctx,
forward_pd->workspace_primitive_desc());
run_primitive(*forward_pd, *src_memory, *workspace_memory, dst_memory);
out->set_layout(framework::DataLayout::kMKLDNN);
out->set_format(platform::GetMKLDNNFormat(dst_memory));
out->set_mkldnn_prim_desc(dst_memory_pd);
} else {
auto forward_pd =
mkldnn::lrn_forward::primitive_desc{forward_desc, mkldnn_engine};
......@@ -128,13 +124,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
src_memory_pd, static_cast<void*>(const_cast<T*>(input_data))};
auto workspace_memory =
mkldnn::memory{forward_pd.workspace_primitive_desc()};
auto dst_memory_pd = forward_pd.dst_primitive_desc();
auto dst_memory = mkldnn::memory(forward_pd.dst_primitive_desc(),
static_cast<void*>(output_data));
run_primitive(forward_pd, src_memory, workspace_memory, dst_memory);
out->set_layout(framework::DataLayout::kMKLDNN);
out->set_format(platform::GetMKLDNNFormat(dst_memory));
out->set_mkldnn_prim_desc(dst_memory_pd);
}
}
};
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace paddle {
namespace operators {
......@@ -29,23 +30,23 @@ using mkldnn::stream;
using platform::to_void_cast;
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static std::string gethash(const memory::dims& input_dims,
const std::string& pooling_type,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const memory::data_type& dt,
const std::string& suffix) {
auto dims2str = [](const memory::dims& operand_dims) {
std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) {
dstr += std::to_string(operand_dims[i]) + "-";
}
return dstr;
};
return dims2str(input_dims) + dims2str(ksize) + dims2str(strides) +
dims2str(paddings) + std::to_string(dt) + pooling_type + suffix;
std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
const memory::dims& input_dims,
const std::string& pooling_type,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const memory::data_type& dt, const std::string& suffix) {
std::string key;
key.reserve(platform::MKLDNNHandler::MaxKeyLength);
platform::MKLDNNHandler::AppendKeyDims(&key, input_dims);
platform::MKLDNNHandler::AppendKey(&key, pooling_type);
platform::MKLDNNHandler::AppendKeyVec(&key, ksize);
platform::MKLDNNHandler::AppendKeyVec(&key, strides);
platform::MKLDNNHandler::AppendKeyVec(&key, paddings);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key, suffix);
return key;
}
static inline int ComputeCeiledOutput(int input_size, int kernel_size,
......@@ -114,8 +115,8 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn::memory::data_type dt =
paddle::framework::ToMKLDNNDataType(input->type());
const std::string key = gethash(src_tz, pooling_type, ksize, strides,
paddings, dt, ctx.op().Output("Out"));
const std::string key = CreateKey(ctx, src_tz, pooling_type, ksize, strides,
paddings, dt, ctx.op().Output("Out"));
const std::string key_pool_p = key + "@pool_p";
const std::string key_pool_pd = key + "@pool_pd";
const std::string key_pool_src_mem_p = key + "@pool_src_mem_p";
......@@ -294,8 +295,8 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// Get an unique name from "argument" name of "Out" variable
// This name will be used as key when referring info from device context
const std::string key =
gethash(diff_src_tz, pooling_type, ksize, strides, paddings,
memory::data_type::f32, ctx.op().Input("Out"));
CreateKey(ctx, diff_src_tz, pooling_type, ksize, strides, paddings,
memory::data_type::f32, ctx.op().Input("Out"));
const std::string key_pool_bwd_p = key + "@pool_bwd_p";
const std::string key_pool_diff_src_mem_p = key + "@pool_diff_src_mem_p";
const std::string key_pool_diff_dst_mem_p = key + "@pool_diff_dst_mem_p";
......
......@@ -30,6 +30,18 @@ using framework::DataLayout;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
const std::vector<int>& src_tz, const float scale_data,
const bool is_negative) {
std::string key;
key.reserve(platform::MKLDNNHandler::MaxKeyLength);
platform::MKLDNNHandler::AppendKeyDims(&key, src_tz);
platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data));
platform::MKLDNNHandler::AppendKey(&key, std::to_string(is_negative));
platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output"));
return key;
}
template <typename T>
class QuantOpKernel : public framework::OpKernel<T> {
public:
......@@ -47,32 +59,61 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>();
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
input->format());
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
auto src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
bool is_negative = ctx.Attr<bool>("is_negative_input");
std::shared_ptr<mkldnn::memory::primitive_desc> dst_pd;
std::string key = CreateKey(ctx, src_tz, scale_data, is_negative);
const std::string key_prim = key + "@reorder_p";
const std::string key_src_mem = key + "@src_mem";
const std::string key_dst_mem = key + "@dst_mem";
std::shared_ptr<mkldnn::memory> src_memory;
std::shared_ptr<mkldnn::memory> dst_memory;
if (is_negative) {
platform::ConvMKLDNNHandler::SetDstMemory<int8_t>(
ctx, output, dst_tz, engine, dst_pd, dst_memory);
std::shared_ptr<reorder> reorder_p;
reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
if (reorder_p == nullptr) {
mkldnn::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {scale_data});
auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
input->format());
auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
src_memory =
std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
std::shared_ptr<primitive::at> src_memory_p =
std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
std::shared_ptr<mkldnn::memory::primitive_desc> dst_pd;
if (is_negative) {
platform::ConvMKLDNNHandler::SetDstMemory<int8_t>(
ctx, output, dst_tz, engine, dst_pd, dst_memory);
} else {
platform::ConvMKLDNNHandler::SetDstMemory<uint8_t>(
ctx, output, dst_tz, engine, dst_pd, dst_memory);
}
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, *dst_pd, attri));
reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, *dst_memory));
dev_ctx.SetBlob(key_prim, reorder_p);
dev_ctx.SetBlob(key_src_mem, src_memory);
dev_ctx.SetBlob(key_dst_mem, dst_memory);
} else {
platform::ConvMKLDNNHandler::SetDstMemory<uint8_t>(
ctx, output, dst_tz, engine, dst_pd, dst_memory);
src_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_src_mem));
src_memory->set_data_handle(to_void_cast<T>(input_data));
dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem));
auto place = ctx.GetPlace();
if (is_negative) {
dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
} else {
dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
}
}
auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
new reorder::primitive_desc(src_pd, *dst_pd, attri));
auto reorder_p = std::shared_ptr<reorder>(
new reorder(*reorder_pd, *src_memory_p, *dst_memory));
pipeline.push_back(*reorder_p);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
......
......@@ -158,6 +158,14 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
auto softmax_p =
handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p);
// We cannot use softmax_dst_memory_p to get prim desc as
// it contains flattened dims (2D) while output tensor can
// have 2,3,4+ dims
auto output_mem_pd = paddle::platform::create_prim_desc_from_dims(
paddle::framework::vectorize2int(output->dims()),
mkldnn::memory::format::blocked);
output->set_mkldnn_prim_desc(output_mem_pd);
std::vector<primitive> pipeline{
*(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
stream(stream::kind::eager).submit(pipeline).wait();
......
......@@ -79,15 +79,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
......@@ -115,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
auto dst_mem_pd = sum_pd.dst_primitive_desc();
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
dst_mem.reset(new memory(dst_mem_pd));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
dst_mem.reset(new memory(dst_mem_pd, output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
......@@ -145,107 +136,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0;
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
std::vector<int64_t> in_dim;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() > 0) {
in_dim = framework::vectorize(sel_row.value().dims());
break;
}
}
if (in_dim.empty()) {
VLOG(3) << "WARNING: all the inputs are empty";
in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
} else {
in_dim[0] = static_cast<int64_t>(first_dim);
}
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
out_value->mutable_data<T>(ctx.GetPlace());
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
framework::ToTypeName(out_var->Type()));
output->set_mkldnn_prim_desc(dst_mem_pd);
} else { // Fallback to naive version
// TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support
SumKernel<CPUDeviceContext, T> reference_kernel;
reference_kernel.Compute(ctx);
}
}
};
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <algorithm>
#include <functional>
#include <memory>
#include <vector>
#include "ngraph/ngraph.hpp"
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
......
......@@ -16,7 +16,10 @@ limitations under the License. */
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
......@@ -483,7 +486,8 @@ void NgraphEngine::Run(const framework::Scope& scope,
}
}
backend_->call(backend_->compile(ngraph_function_), t_out, t_in);
auto handle = backend_->compile(ngraph_function_);
handle->call_with_validate(t_out, t_in);
} // NgraphEngine::Run
} // namespace operators
} // namespace paddle
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle {
namespace operators {
namespace ngraphs {
void BuildAdamNode(
const std::shared_ptr<framework::OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto op_attrs = framework::AttrReader(op->Attrs());
auto beta1pow = platform::GetInputNode(op, "Beta1Pow", ngb_node_map);
auto beta2pow = platform::GetInputNode(op, "Beta2Pow", ngb_node_map);
auto grad = platform::GetInputNode(op, "Grad", ngb_node_map);
auto learning_rate = platform::GetInputNode(op, "LearningRate", ngb_node_map);
auto moment1 = platform::GetInputNode(op, "Moment1", ngb_node_map);
auto moment2 = platform::GetInputNode(op, "Moment2", ngb_node_map);
auto param = platform::GetInputNode(op, "Param", ngb_node_map);
auto epsilon = op_attrs.Get<float>("epsilon");
auto beta2 = op_attrs.Get<float>("beta2");
auto beta1 = op_attrs.Get<float>("beta1");
auto moment1_shape = moment1->get_shape();
auto grad_shape = grad->get_shape();
auto moment1out = std::make_shared<ngraph::op::Add>(
ElementwiseScalar<ngraph::op::Multiply>(beta1, moment1),
ElementwiseScalar<ngraph::op::Multiply>(1. - beta1, grad));
auto grad_square = std::make_shared<ngraph::op::Multiply>(grad, grad);
auto moment2out = std::make_shared<ngraph::op::Add>(
ElementwiseScalar<ngraph::op::Multiply>(beta2, moment2),
ElementwiseScalar<ngraph::op::Multiply>(1. - beta2, grad_square));
auto node_sqrt = std::make_shared<ngraph::op::Sqrt>(
ElementwiseScalar<ngraph::op::Subtract>(1., beta2pow));
auto lr = std::make_shared<ngraph::op::Divide>(
node_sqrt, ElementwiseScalar<ngraph::op::Subtract>(1., beta1pow));
auto updated_lr = std::make_shared<ngraph::op::Multiply>(learning_rate, lr);
auto moment2_sqrt = std::make_shared<ngraph::op::Sqrt>(moment2out);
auto param_grad = std::make_shared<ngraph::op::Divide>(
moment1out, ElementwiseScalar<ngraph::op::Add>(epsilon, moment2_sqrt));
auto delta = ElementwiseScalar<ngraph::op::Multiply>(updated_lr, param_grad);
auto param_out = std::make_shared<ngraph::op::Subtract>(param, delta);
platform::SetOutputNode(op, "Moment1Out", moment1out, ngb_node_map);
platform::SetOutputNode(op, "Moment2Out", moment2out, ngb_node_map);
platform::SetOutputNode(op, "ParamOut", param_out, ngb_node_map);
}
} // namespace ngraphs
} // namespace operators
} // namespace paddle
REGISTER_NG_OP(adam, BuildAdamNode);
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
......
/*Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle {
namespace operators {
namespace ngraphs {
void BuildConcatNode(
const std::shared_ptr<framework::OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
std::vector<std::shared_ptr<ngraph::Node>> args;
for (auto& var_name_item : op->Inputs()) {
for (auto& var_name : var_name_item.second) {
auto& node0 = ngb_node_map->at(var_name);
args.push_back(node0);
}
}
auto op_attrs = framework::AttrReader(op->Attrs());
const size_t axis = op_attrs.Get<int>("axis");
auto out = std::make_shared<ngraph::op::Concat>(args, axis);
platform::SetOutputNode(op, "Out", out, ngb_node_map);
}
} // namespace ngraphs
} // namespace operators
} // namespace paddle
REGISTER_NG_OP(concat, BuildConcatNode);
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
......
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace paddle {
......@@ -53,3 +54,5 @@ void BuildSumNode(
} // namespace ngraphs
} // namespace operators
} // namespace paddle
REGISTER_NG_OP(sum, BuildSumNode);
......@@ -14,7 +14,9 @@ limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
#include "paddle/fluid/platform/ngraph_helper.h"
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
namespace paddle {
namespace operators {
......@@ -32,53 +33,57 @@ class SGDOpKernel : public framework::OpKernel<T> {
if (param_var->IsType<framework::LoDTensor>()) {
const auto *param = ctx.Input<framework::Tensor>("Param");
auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
// Actually, all tensors are LoDTensor except SelectedRows.
if (grad_var->IsType<framework::LoDTensor>()) {
param_out->mutable_data<T>(ctx.GetPlace());
const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto p = framework::EigenVector<T>::Flatten(*param);
auto g = framework::EigenVector<T>::Flatten(*grad);
auto o = framework::EigenVector<T>::Flatten(*param_out);
auto *lr = learning_rate->data<T>();
o = p - lr[0] * g;
auto sz = param_out->numel();
PADDLE_ENFORCE_EQ(param->numel(), sz);
PADDLE_ENFORCE_EQ(grad->numel(), sz);
jit::sgd_attr_t attr(1, sz, 1, sz, 1);
const T *lr = learning_rate->data<T>();
const T *param_data = param->data<T>();
const T *grad_data = grad->data<T>();
int64_t rows_idx = 0;
T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
auto sgd =
jit::Get<jit::kSgd, jit::SgdTuples<T>, platform::CPUPlace>(attr);
sgd(lr, param_data, grad_data, &rows_idx, out_data, &attr);
} else if (grad_var->IsType<framework::SelectedRows>()) {
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ(param, param_out);
const auto *grad = ctx.Input<framework::SelectedRows>("Grad");
auto &grad_rows = grad->rows();
// for distributed training, a sparse var may be empty,
// just skip updating.
if (grad->rows().size() == 0) {
if (grad_rows.size() == 0) {
return;
}
auto grad_height = grad->height();
auto out_dims = param_out->dims();
PADDLE_ENFORCE_EQ(grad_height, out_dims[0]);
PADDLE_ENFORCE_EQ(grad->height(), out_dims[0]);
auto &grad_value = grad->value();
auto &grad_rows = grad->rows();
size_t grad_row_numel = grad_value.numel() / grad_rows.size();
PADDLE_ENFORCE_EQ(static_cast<int64_t>(grad_row_numel),
param_out->numel() / grad_height);
auto *grad_data = grad_value.data<T>();
auto *out_data = param_out->data<T>();
auto *lr = learning_rate->data<T>();
for (size_t i = 0; i < grad_rows.size(); i++) {
PADDLE_ENFORCE(grad_rows[i] < grad_height,
"Input rows index should less than height");
for (size_t j = 0; j < grad_row_numel; j++) {
out_data[grad_rows[i] * grad_row_numel + j] -=
lr[0] * grad_data[i * grad_row_numel + j];
}
}
const T *param_data = param->data<T>();
const T *grad_data = grad_value.data<T>();
const T *lr = learning_rate->data<T>();
const int64_t *rows_data = grad_rows.data();
T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
jit::sgd_attr_t attr;
attr.param_height = out_dims[0];
attr.param_width = param_out->numel() / attr.param_height;
attr.grad_height = grad_rows.size(); // note: it is not grad->height()
attr.grad_width = grad_value.numel() / attr.grad_height;
attr.selected_rows_size = grad_rows.size();
PADDLE_ENFORCE_EQ(attr.grad_width, attr.param_width);
auto sgd =
jit::Get<jit::kSgd, jit::SgdTuples<T>, platform::CPUPlace>(attr);
sgd(lr, param_data, grad_data, rows_data, out_data, &attr);
} else {
PADDLE_THROW("Unsupported Variable Type of Grad");
}
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/operators/reader/buffered_reader.h"
#include <memory>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
......
......@@ -22,6 +22,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
if (ctx->IsRuntime()) {
return;
}
PADDLE_ENFORCE(
ctx->HasInput("X"),
"Input(X) of SequecceEnumerate operator should not be null.");
......@@ -33,9 +36,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(
x_dims.size(), 2,
"Input(X) of SequenceEnumerate operator's rank should be 2.");
PADDLE_ENFORCE_EQ(
x_dims[1], 1,
"Input(X) of SequenceEnumerate operator's 2nd dimension should be 1.");
PADDLE_ENFORCE_EQ(x_dims[1], 1,
"Input(X) of SequenceEnumerate operator's 2nd "
"dimension should be 1.");
const auto win_size = ctx->Attrs().Get<int>("win_size");
ctx->SetOutputDim("Out", {x_dims[0], win_size});
......
......@@ -65,6 +65,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
auto lod0 = in_lod[0];
auto in_len = in->numel();
auto in_data = in->data<T>();
out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace());
// Copy LoD to GPU
const size_t* dev_in_lod_ptr = lod0.CUDAData(context.GetPlace());
......@@ -72,6 +73,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
in_data, dev_in_lod_ptr, lod0.size(), win_size, pad_value, out_data);
out->set_lod(in->lod());
}
};
......
......@@ -39,6 +39,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
// Generate enumerate sequence set
auto lod0 = in_lod[0];
auto in_data = in->data<T>();
out->Resize({in_dims[0], win_size});
auto out_data = out->mutable_data<T>(context.GetPlace());
for (size_t i = 0; i < lod0.size() - 1; ++i) {
for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) {
......@@ -49,6 +50,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
}
}
}
out->set_lod(in->lod());
}
};
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
......@@ -88,6 +89,49 @@ void GetOutputOffset(const framework::Vector<size_t>& x_lod,
}
}
template <typename T>
static int ExpandByMemoryCopy(const platform::CUDADeviceContext& context,
const LoDTensor& x, LoDTensor* out,
const framework::Vector<size_t>& x_lod,
const framework::Vector<size_t>& ref_lod,
bool do_copy) {
auto out_data = out->data<T>();
auto x_data = x.data<T>();
auto& gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace());
int x_item_length = x.numel() / x.dims()[0];
int out_offset = 0;
int num_copys = 0;
for (size_t i = 1; i < ref_lod.size(); ++i) {
int repeat_num = ref_lod[i] - ref_lod[i - 1];
int x_start = x_lod[i - 1];
int x_end = x_lod[i];
int x_seq_len = x_end - x_start;
if (repeat_num > 0) {
if (do_copy) {
int out_start = out_offset;
if (out->lod().size() == 1) {
out_start = out->lod()[0][out_offset];
}
for (int j = 0; j < repeat_num; j++) {
for (int k = 0; k < x_seq_len; k++) {
memory::Copy(
gpu_place,
out_data + (out_start + j * x_seq_len + k) * x_item_length,
gpu_place, x_data + (x_start + k) * x_item_length,
sizeof(T) * x_item_length, context.stream());
}
}
} else {
num_copys += repeat_num * x_seq_len;
}
}
out_offset += repeat_num;
}
return num_copys;
}
template <typename T>
struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
void operator()(
......@@ -95,22 +139,40 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
const framework::Vector<size_t>& x_lod, /*expand source lod*/
const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
LoDTensor* out) {
int x_item_length = x.numel() / x.dims()[0];
framework::Vector<size_t> out_offset(x_lod.size());
GetOutputOffset(x_lod, ref_lod, &out_offset);
int thread_x = std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
int thread_y = 16;
int thread_z = 1024 / thread_x / thread_y;
int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1);
int num_copys =
ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, false);
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (num_copys < 5) {
ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, true);
} else {
int x_item_length = x.numel() / x.dims()[0];
size_t x_lod_size = x_lod.size();
framework::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size());
GetOutputOffset(x_lod, ref_lod, &out_offset);
for (size_t i = 0; i < x_lod_size; ++i) {
out_offset[x_lod_size + i] = x_lod[i];
}
for (size_t i = 0; i < ref_lod.size(); ++i) {
out_offset[2 * x_lod_size + i] = ref_lod[i];
}
sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), x_lod.CUDAData(context.GetPlace()),
ref_lod.CUDAData(context.GetPlace()),
out_offset.CUDAData(context.GetPlace()), x_lod.size(), x_item_length,
out->mutable_data<T>(context.GetPlace()));
const size_t* out_offset_data = out_offset.CUDAData(context.GetPlace());
const size_t* x_lod_data = out_offset_data + x_lod_size;
const size_t* ref_lod_data = out_offset_data + 2 * x_lod_size;
int thread_x =
std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
int thread_y = 16;
int thread_z = 1024 / thread_x / thread_y;
int block_x = static_cast<int>(ref_lod.size());
dim3 block_size(thread_x, thread_y, thread_z);
dim3 grid_size(block_x, 1);
sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>(
x.data<T>(), x_lod_data, ref_lod_data, out_offset_data, x_lod_size,
x_item_length, out->mutable_data<T>(context.GetPlace()));
}
}
};
......
......@@ -46,10 +46,10 @@ class SoftmaxWithCrossEntropyOpMaker
.SetDefault(false);
AddAttr<bool>(
"numeric_stable_mode",
"(bool, default: false), A flag to indicate whether to use more "
"(bool, default: true), A flag to indicate whether to use more "
"numerically stable algorithm. This flag is only valid when "
"soft_label is false and GPU is used.")
.SetDefault(false);
.SetDefault(true);
AddAttr<int>(
"ignore_index",
"(int, default -100), Specifies a target value that is ignored and"
......
......@@ -117,11 +117,11 @@ class TeacherStudentSigmoidLossOpMaker
"[N x 1]. The teacher student sigmoid loss.");
AddAttr<float>(
"soft_max_up_bound",
"fp32, if input > soft_max_up_bound, will be bound, default 15.0")
"fp32, if input > soft_max_up_bound, input will be bound, default 15.0")
.SetDefault(15.0);
AddAttr<float>(
"soft_max_lower_bound",
"fp32, if input < soft_max_lower_bound, will be bound, default -15.0")
AddAttr<float>("soft_max_lower_bound",
"fp32, if input < soft_max_lower_bound, input will be "
"bound, default -15.0")
.SetDefault(-15.0);
AddComment(R"DOC(
TeacherStudentSigmoidLoss Operator.
......@@ -134,7 +134,7 @@ we add another label(z') to original.
label = {-2, -1, [0, 2]}
when z' is not exist, clk = 0 : label = -2;
when z' is not exist, clk = 1 : label = -1;
when z' is exist , clk = 0 : label = 0 + z';
when z' is exist , clk = 0 : label = 0 + z';
when z' is exist , clk = 1 : label = 1 + z';
)DOC");
......
......@@ -82,6 +82,7 @@ nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_
cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
nv_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda)
nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
cc_library(timer SRCS timer.cc)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <memory>
#include <numeric>
#include <string>
#include <vector>
#include "paddle/fluid/platform/cudnn_helper.h"
namespace paddle {
namespace platform {
using framework::Tensor;
template <typename T>
cudnnDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(type);
}
template <>
cudnnDataType_t ToCudnnDataType(const framework::proto::VarType::Type& t) {
cudnnDataType_t type = CUDNN_DATA_FLOAT;
switch (t) {
case framework::proto::VarType::FP16:
type = CUDNN_DATA_HALF;
break;
case framework::proto::VarType::FP32:
type = CUDNN_DATA_FLOAT;
break;
case framework::proto::VarType::FP64:
type = CUDNN_DATA_DOUBLE;
break;
default:
break;
}
return type;
}
class ActivationDescriptor {
public:
using T = cudnnActivationStruct;
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t));
t = nullptr;
}
}
};
ActivationDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
template <typename T>
void set(cudnnActivationMode_t mode, const T& coef) {
CUDNN_ENFORCE(dynload::cudnnSetActivationDescriptor(
desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef)));
}
T* desc() { return desc_.get(); }
T* desc() const { return desc_.get(); }
private:
std::unique_ptr<T, Deleter> desc_;
};
class TensorDescriptor {
public:
using T = cudnnTensorStruct;
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t));
t = nullptr;
}
}
};
TensorDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
T* desc() const { return desc_.get(); }
void set(const Tensor& tensor, const int groups = 1) {
auto dims = framework::vectorize2int(tensor.dims());
std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1;
for (int i = dims.size() - 2; i >= 0; i--) {
strides[i] = dims[i + 1] * strides[i + 1];
}
std::vector<int> dims_with_group(dims.begin(), dims.end());
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(),
dims_with_group.data(), strides.data()));
}
private:
std::unique_ptr<T, Deleter> desc_;
};
} // namespace platform
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/cudnn_desc.h"
#include <gtest/gtest.h>
namespace paddle {
namespace platform {
TEST(TensorDescriptor, Empty) {
ActivationDescriptor a;
TensorDescriptor t;
TensorDescriptor t1;
TensorDescriptor *t11 = new TensorDescriptor();
delete t11;
std::unique_ptr<TensorDescriptor> tt(new TensorDescriptor());
}
TEST(TensorDescriptor, Normal) {
framework::Tensor tt;
tt.Resize({2, 3, 4});
tt.mutable_data<float>(platform::CPUPlace());
TensorDescriptor desc;
desc.set(tt);
EXPECT_TRUE(desc.desc() != nullptr);
}
} // namespace platform
} // namespace paddle
......@@ -99,6 +99,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(cudnnDestroy); \
__macro(cudnnSetStream); \
__macro(cudnnActivationForward); \
__macro(cudnnActivationBackward); \
__macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize); \
......
......@@ -86,6 +86,8 @@ extern void* mklml_dso_handle;
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(vmsErf); \
__macro(vmdErf); \
__macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
......
......@@ -14,6 +14,9 @@ limitations under the License. */
#pragma once
#include <string>
#ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h>
#endif
namespace paddle {
namespace platform {
......
......@@ -271,7 +271,6 @@ class MKLDNNHandler {
AppendKey(key, suffix);
}
protected:
static void AppendKeyDims(std::string* key,
const mkldnn::memory::dims& dims) {
for (unsigned int i = 0; i < dims.size(); i++) {
......@@ -289,6 +288,7 @@ class MKLDNNHandler {
key->append(s);
}
protected:
static std::string dims2str(const mkldnn::memory::dims& operand_dims) {
std::string dstr = "";
for (size_t i = 0; i < operand_dims.size(); ++i) {
......@@ -302,6 +302,9 @@ class MKLDNNHandler {
mkldnn::engine engine_;
std::string key_;
bool is_reusing_;
public:
static constexpr int MaxKeyLength = 256;
};
class TransposeMKLDNNHandler : public MKLDNNHandler {
......
......@@ -33,7 +33,7 @@ class Layer : public imperative::Layer {
}
};
class PyOpBase : public imperative::OpBase {
class PYBIND11_HIDDEN PyOpBase : public imperative::OpBase {
public:
using imperative::OpBase::OpBase; // Inherit constructors
};
......
......@@ -14,6 +14,7 @@
#include "paddle/fluid/pybind/ir.h"
#include <algorithm>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
......@@ -116,7 +117,7 @@ void BindNode(py::module *m) {
.def("is_var", &Node::IsVar)
.def("is_ctrl_var", &Node::IsCtrlVar)
.def("clear_inputs", [](Node &self) { self.inputs.clear(); })
.def("inputs_remove",
.def("remove_input",
[](Node &self, int node_id) {
auto pos = std::find_if(
self.inputs.begin(), self.inputs.end(),
......@@ -125,7 +126,7 @@ void BindNode(py::module *m) {
self.inputs.erase(pos);
}
})
.def("inputs_remove",
.def("remove_input",
[](Node &self, Node &node) {
auto pos =
std::find(self.inputs.begin(), self.inputs.end(), &node);
......@@ -133,10 +134,10 @@ void BindNode(py::module *m) {
self.inputs.erase(pos);
}
})
.def("inputs_append",
.def("append_input",
[](Node &self, Node &node) { self.inputs.push_back(&node); })
.def("clear_outputs", [](Node &self) { self.outputs.clear(); })
.def("outputs_remove",
.def("remove_output",
[](Node &self, int node_id) {
auto pos = std::find_if(
self.outputs.begin(), self.outputs.end(),
......@@ -145,7 +146,7 @@ void BindNode(py::module *m) {
self.outputs.erase(pos);
}
})
.def("outputs_remove",
.def("remove_output",
[](Node &self, Node &node) {
auto pos =
std::find(self.outputs.begin(), self.outputs.end(), &node);
......@@ -153,7 +154,7 @@ void BindNode(py::module *m) {
self.outputs.erase(pos);
}
})
.def("outputs_append",
.def("append_output",
[](Node &self, Node &node) { self.outputs.push_back(&node); })
.def_readwrite("inputs", &Node::inputs)
.def_readwrite("outputs", &Node::outputs);
......
......@@ -189,8 +189,6 @@ void BindBlockDesc(pybind11::module *m) {
return self.HasVar(name);
},
pybind11::return_value_policy::reference)
.def("_clear_block", [](pd::BlockDesc &self) { return self.Clear(); },
pybind11::return_value_policy::reference)
.def("_rename_var",
[](pd::BlockDesc &self, const pybind11::bytes &byte_name,
const pybind11::bytes &byte_name_new) {
......
......@@ -86,6 +86,14 @@ bool IsCompiledWithCUDA() {
#endif
}
bool IsCompiledWithMKLDNN() {
#ifndef PADDLE_WITH_MKLDNN
return false;
#else
return true;
#endif
}
bool IsCompiledWithBrpc() {
#ifndef PADDLE_WITH_DISTRIBUTE
return false;
......@@ -169,6 +177,23 @@ PYBIND11_MODULE(core, m) {
py::return_value_policy::take_ownership)
.def("value", [](const imperative::VarBase &self) { return self.var_; },
py::return_value_policy::reference)
.def_property("name",
[](const imperative::VarBase &self) { return self.name_; },
[](imperative::VarBase &self, const std::string &name) {
self.name_ = name;
})
.def_property("block",
[](const imperative::VarBase &self) { return self.block_; },
[](imperative::VarBase &self, framework::BlockDesc *block) {
self.block_ = block;
},
py::return_value_policy::reference)
.def_property(
"persistable",
[](const imperative::VarBase &self) { return self.persistable_; },
[](imperative::VarBase &self, const bool persistable) {
self.persistable_ = persistable;
})
.def_property(
"desc",
[](const imperative::VarBase &self) { return self.var_desc_; },
......@@ -185,6 +210,10 @@ PYBIND11_MODULE(core, m) {
py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
.def(py::init<>())
.def("register_backward_hooks",
[](imperative::OpBase &self, const py::object &callable) {
self.RegisterBackwardHooks(callable);
})
.def_property(
"desc", [](const imperative::OpBase &self) { return self.op_desc_; },
[](imperative::OpBase &self, framework::OpDesc *op_desc) {
......@@ -193,6 +222,16 @@ PYBIND11_MODULE(core, m) {
}
},
py::return_value_policy::reference)
.def_property("_trace_id",
[](const imperative::OpBase &self) {
pybind11::gil_scoped_release release;
return self.trace_id_;
},
[](imperative::OpBase &self, int trace_id) {
pybind11::gil_scoped_release release;
self.trace_id_ = trace_id;
},
py::return_value_policy::reference)
.def_property(
"forward_id",
[](const imperative::OpBase &self) { return self.forward_id_; },
......@@ -405,11 +444,11 @@ PYBIND11_MODULE(core, m) {
Set LoD of the LoDTensor according to recursive sequence length.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that
there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths.
recursive_sequence_lengths (List[List[int]]): sequence lengths.
)DOC")
.def("lod",
[](LoDTensor &self) -> std::vector<std::vector<size_t>> {
......@@ -440,7 +479,7 @@ PYBIND11_MODULE(core, m) {
Return the sequence length of the LoDTensor corresponding to LoD.
Returns:
out (List[List[int]): the sequence lengths.
out (List[List[int]): the sequence lengths.
)DOC")
.def("has_valid_recursive_sequence_lengths",
[](LoDTensor &self) -> bool {
......@@ -592,29 +631,29 @@ All parameter, weight, gradient are variables in Paddle.
},
py::arg("name"),
R"DOC(
Find or create variable named :code:`name` in the current scope.
Find or create variable named :code:`name` in the current scope.
If the variable named :code:`name` does not exist in the
If the variable named :code:`name` does not exist in the
current scope, the variable would be created. Otherwise,
return the existing variable.
return the existing variable.
Args:
name (str): the variable name.
name (str): the variable name.
Returns:
out (core.Variable): the found or created variable.
out (core.Variable): the found or created variable.
)DOC",
py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::arg("name"),
R"DOC(
Find variable named :code:`name` in the current scope or
Find variable named :code:`name` in the current scope or
its parent scope. Return None if not found.
Args:
name (str): the variable name.
Returns:
out (core.Variable|None): the found variable or None.
out (core.Variable|None): the found variable or None.
)DOC",
py::return_value_policy::reference)
.def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
......@@ -638,7 +677,7 @@ All parameter, weight, gradient are variables in Paddle.
},
R"DOC(
Create a new scope.
Returns:
out (core._Scope): the created scope.
)DOC",
......@@ -849,6 +888,7 @@ All parameter, weight, gradient are variables in Paddle.
[](bool init_p2p) { framework::InitDevices(init_p2p); });
m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN);
m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
m.def("is_compiled_with_dist", IsCompiledWithDIST);
#ifdef PADDLE_WITH_CUDA
......
因为 它太大了无法显示 source diff 。你可以改为 查看blob
......@@ -259,6 +259,7 @@ function check_style() {
eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
fi
pip install cpplint
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf ${PADDLE_ROOT} $GOPATH/src/github.com/PaddlePaddle/Paddle
......@@ -414,10 +415,11 @@ function assert_api_not_changed() {
source .env/bin/activate
pip install ${PADDLE_ROOT}/build/python/dist/*whl
python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid,paddle.reader > new.spec
if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then
# Use sed to make python2 and python3 sepc keeps the same
sed -i 's/arg0: str/arg0: unicode/g' new.spec
sed -i "s/\(.*Transpiler.*\).__init__ ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec
sed -i "s/\(.*Transpiler.*\).__init__ (ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec
fi
# ComposeNotAligned has significant difference between py2 and py3
sed -i '/.*ComposeNotAligned.*/d' new.spec
......@@ -431,8 +433,8 @@ function assert_api_spec_approvals() {
BRANCH="develop"
fi
API_FILES=("cmake/external"
"paddle/fluid/API.spec"
API_FILES=("paddle/fluid/API.spec"
"python/paddle/fluid/parallel_executor.py"
"paddle/fluid/framework/operator.h"
"paddle/fluid/framework/tensor.h"
"paddle/fluid/framework/lod_tensor.h"
......@@ -451,12 +453,21 @@ function assert_api_spec_approvals() {
echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308`
else
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
fi
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then
if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
echo "You must have panyx0718 and shanyi15 approval for the api change! ${API_FILE}"
else
echo "You must have panyx0718 approval for the api change! ${API_FILE}"
exit 1
fi
exit 1
fi
fi
done
......@@ -471,19 +482,6 @@ function assert_api_spec_approvals() {
exit 1
fi
fi
pip install ${PADDLE_ROOT}/build/opt/paddle/share/wheels/*.whl
CHECK_DOCK_MD5=`python ${PADDLE_ROOT}/tools/check_doc_approval.py`
if [ "True" != ${CHECK_DOCK_MD5} ]; then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have shanyi15 approval for the api doc change! "
exit 1
fi
echo ${CHECK_DOCK_MD5} >/root/.cache/doc_md5.txt
fi
}
......
......@@ -125,14 +125,13 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads)
sysstr = platform.system()
read_env_flags = [
'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_mkldnn',
'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem',
'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size",
'eager_delete_tensor_gb', 'fast_eager_deletion_mode',
'allocator_strategy', 'reader_queue_speed_test_mode',
'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
'inner_op_parallelism', 'enable_parallel_graph',
'multiple_of_cupti_buffer_size'
'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_ngraph',
'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
'fast_eager_deletion_mode', 'allocator_strategy',
'reader_queue_speed_test_mode', 'print_sub_graph_dir',
'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism',
'enable_parallel_graph', 'multiple_of_cupti_buffer_size'
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
......@@ -140,6 +139,9 @@ def __bootstrap__():
if os.name != 'nt':
read_env_flags.append('cpu_deterministic')
if core.is_compiled_with_mkldnn():
read_env_flags.append('use_mkldnn')
if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline')
read_env_flags.append('rpc_server_profile_path')
......
......@@ -17,7 +17,6 @@ import os
import six
import sys
from .. import compat as cpt
from . import framework
from . import core
from . import framework
......@@ -36,6 +35,30 @@ def _place_obj(place):
return p
def _is_pserver_mode(main_program):
main = main_program if main_program \
else default_main_program()
for op in main.global_block().ops:
if op.type in ["send", "recv"]:
return True
return False
def get_available_places(use_cuda):
if use_cuda:
gpus_env = os.getenv("FLAGS_selected_gpus")
if gpus_env:
gpus = [int(s) for s in gpus_env.split(",")]
else:
gpus = [i for i in six.moves.range(core.get_cuda_device_count())]
places = [core.CUDAPlace(i) for i in gpus]
else:
cpu_num = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
assert places, "no place for execution"
return places
class CompiledProgram(object):
"""
Compiles to Graph for execution.
......@@ -127,8 +150,7 @@ class CompiledProgram(object):
self._exec_strategy = ExecutionStrategy()
if self._build_strategy is None:
self._build_strategy = BuildStrategy()
self._build_strategy.is_distribution = framework.is_pserver_mode(
self._program)
self._build_strategy.is_distribution = _is_pserver_mode(self._program)
return self
def with_inference_optimize(self, config):
......@@ -153,9 +175,9 @@ class CompiledProgram(object):
def _with_distributed(self):
raise NotImplementedError()
def _compile_data_parallel(self):
def _compile_data_parallel(self, use_cuda=False, scope=None):
if self._share_vars_from:
if self._scope:
if scope:
sys.stderr.write("share_vars_from is set, scope is ignored.\n")
if not self._share_vars_from._is_data_parallel:
raise ValueError("share_vars_from is not data parallel. Cannot "
......@@ -166,23 +188,11 @@ class CompiledProgram(object):
"var to share.")
self._local_scopes = self._share_vars_from._executor.local_scopes()
else:
assert scope is not None, ""
self._local_scopes = []
self._exec_strategy.use_cuda = isinstance(self._place, core.CUDAPlace)
if self._exec_strategy.use_cuda:
gpus_env = os.getenv("FLAGS_selected_gpus")
if gpus_env:
gpus = [int(s) for s in gpus_env.split(",")]
else:
gpus = [
i for i in six.moves.range(core.get_cuda_device_count())
]
self._places = [core.CUDAPlace(i) for i in gpus]
else:
cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
assert self._places, "no place for execution"
self._exec_strategy.use_cuda = use_cuda
self._places = get_available_places(self._exec_strategy.use_cuda)
if self._exec_strategy.num_threads == 0:
if self._exec_strategy.use_cuda:
......@@ -197,9 +207,11 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if self._build_strategy.memory_optimize is None:
self._build_strategy.memory_optimize = False if self._program and self._program._is_mem_optimized else True
self._build_strategy.memory_optimize = False \
if self._program and self._program._is_mem_optimized else True
if self._build_strategy.enable_inplace is None:
self._build_strategy.enable_inplace = False if self._program and self._program._is_mem_optimized else True
self._build_strategy.enable_inplace = False \
if self._program and self._program._is_mem_optimized else True
# TODO(wuyi): trainer endpoings should be passed in through
# build_strategy, not program.xxx.
......@@ -221,12 +233,12 @@ class CompiledProgram(object):
places = list(map(_place_obj, self._places))
return core.ParallelExecutor(
places,
set(self._persistable_vars),
cpt.to_text(self._loss_name)
if self._loss_name else six.u(''), self._scope, self._local_scopes,
self._exec_strategy, self._build_strategy, self._graph)
return core.ParallelExecutor(places,
set(self._persistable_vars),
cpt.to_text(self._loss_name)
if self._loss_name else six.u(''), scope,
self._local_scopes, self._exec_strategy,
self._build_strategy, self._graph)
def _compile_inference(self):
return core.create_paddle_predictor(self._infer_config)
......@@ -253,7 +265,9 @@ class CompiledProgram(object):
self._scope = scope
self._place = place
if self._is_data_parallel:
self._executor = self._compile_data_parallel()
self._executor = self._compile_data_parallel(
use_cuda=isinstance(self._place, core.CUDAPlace),
scope=self._scope)
elif self._is_inference:
self._executor = self._compile_inference()
else:
......
......@@ -17,7 +17,9 @@ import numpy as np
import six
from ..... import compat as cpt
from .... import core
from .... import Executor
from ....framework import IrGraph
from ....framework import IrNode
from ....framework import Program
from ....initializer import Constant
from .... import unique_name
......@@ -31,7 +33,7 @@ __all__ = [
class QuantizationTransformPass(object):
def __init__(self,
scope=None,
program_exe=None,
place=None,
weight_bits=8,
activation_bits=8,
activation_quantize_type='abs_max',
......@@ -45,7 +47,7 @@ class QuantizationTransformPass(object):
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to
initialize these new parameters.
program_exe(fluid.Executor): program_exe is used to initialize new
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new
parameters described above.
weight_bits (int): quantization bit number for weights,
the bias is not quantized.
......@@ -71,13 +73,13 @@ class QuantizationTransformPass(object):
from paddle.fluid import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
exe = fluid.Executor(fluid.CPUPlace())
place = fluid.CPUPlace()
transform_pass = QuantizationTransformPass(fluid.global_scope(),
exe)
place)
transform_pass.apply(graph)
"""
self._scope = scope
self._program_exe = program_exe
self._place = place
self._weight_bits = weight_bits
self._activation_bits = activation_bits
......@@ -118,7 +120,7 @@ class QuantizationTransformPass(object):
self._is_test = graph.is_test()
# marked the variable which has been dequantized.
dequantized_vars = collections.OrderedDict()
persistable_vars = [p.name() for p in graph.all_persistable_vars()]
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
def _transform_forward(graph, op):
for var_node in op.inputs:
......@@ -149,7 +151,7 @@ class QuantizationTransformPass(object):
if not self._is_test:
self._create_global_step(graph)
ops = graph.all_ops()
ops = graph.all_op_nodes()
# The process of _transform_forward and _transform_backward is needed in two for loops.
# The loop for transforming the forward graph:
for op in ops:
......@@ -163,8 +165,8 @@ class QuantizationTransformPass(object):
if len(self._need_initialized) > 0:
assert self._scope is not None, \
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
assert self._program_exe is not None, \
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
assert self._place is not None, \
'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
init_program = Program()
for var_desc, initializer in six.iteritems(self._need_initialized):
var = init_program.global_block().create_var(
......@@ -175,7 +177,8 @@ class QuantizationTransformPass(object):
lod_level=var_desc.lod_level(),
persistable=var_desc.persistable())
initializer(var, init_program.global_block())
self._program_exe.run(program=init_program, scope=self._scope)
exe = Executor(self._place)
exe.run(program=init_program, scope=self._scope)
return graph
......@@ -183,11 +186,11 @@ class QuantizationTransformPass(object):
if self._weight_quantize_type == 'range_abs_max' or \
self._activation_quantize_type == 'range_abs_max':
counter_name = cpt.to_text('@STEP_COUNTER@')
for node in graph.all_vars():
for node in graph.all_var_nodes():
if node.name() == counter_name:
self._global_step = node
if self._global_step is None:
global_step_in = graph.create_param_node(
global_step_in = graph.create_persistable_node(
name=counter_name,
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
......@@ -228,14 +231,14 @@ class QuantizationTransformPass(object):
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_var_node = graph.create_var_node(
name=self._quantized_scale_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
quant_op_node = graph.create_op_node(
op_type='fake_quantize_abs_max',
attrs={
......@@ -258,15 +261,15 @@ class QuantizationTransformPass(object):
quant_var_node = graph.create_var_node(
name=self._quantized_var_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
scale_in_node = graph.create_param_node(
scale_in_node = graph.create_persistable_node(
name=self._quantized_scale_name(var_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=var_node.var().dtype())
var_dtype=var_node.dtype())
self._need_initialized[scale_in_node.var()] = Constant(value=0.001)
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
......@@ -275,11 +278,11 @@ class QuantizationTransformPass(object):
if not self._is_test:
# The name of scales_var_node maybe 'scales_0', 'scales_1', etc.
scales_node = graph.create_param_node(
scales_node = graph.create_persistable_node(
name=unique_name.generate('scales'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[self._window_size],
var_dtype=var_node.var().dtype())
var_dtype=var_node.dtype())
self._need_initialized[scales_node.var()] = Constant(value=0)
inputs['Iter'] = self._global_step
outputs['OutScales'] = scales_node
......@@ -314,9 +317,9 @@ class QuantizationTransformPass(object):
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(var_node.name()),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=var_node.var().dtype())
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=var_node.dtype())
max_range = (1 << (quant_bits - 1)) - 1
dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs',
......@@ -400,22 +403,22 @@ class QuantizationFreezePass(object):
Args:
graph(IrGraph): the applied graph.
"""
persistable_vars = [p.name() for p in graph.all_persistable_vars()]
ops = graph.all_ops()
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
ops = graph.all_op_nodes()
for op_node in ops:
op_name = op_node.name()
if op_name in self._fake_quant_op_names:
input_arg_name = op_node.op().input('X')[0]
input_arg_name = op_node.input('X')[0]
if input_arg_name in persistable_vars:
if self._weight_quantize_type == 'abs_max':
param = self._load_var(input_arg_name)
scale_v = np.max(np.abs(param))
else:
scale_v = self._load_var(op_node.op().output('OutScale')
[0])[0]
scale_v = self._load_var(
op_node.output('OutScale')[0])[0]
self._var_scale_map[input_arg_name] = scale_v
else:
scale_v = graph.var_node(op_node.op().output('OutScale')[0])
scale_v = graph.var_node(op_node.output('OutScale')[0])
self._var_scale_map[input_arg_name] = scale_v
if input_arg_name in persistable_vars:
self._remove_fake_quant_and_dequant_op(graph, op_node)
......@@ -425,13 +428,13 @@ class QuantizationFreezePass(object):
self._weight_bits)
self._restore_var(input_arg_name, quantized_param_v)
ops = graph.all_ops()
ops = graph.all_op_nodes()
for op_node in ops:
op_name = op_node.name()
if op_name in self._fake_dequant_op_names:
self._remove_fake_quant_and_dequant_op(graph, op_node)
ops = graph.all_ops()
ops = graph.all_op_nodes()
for op_node in ops:
op_name = op_node.name()
if op_name in self._quantizable_ops:
......@@ -451,8 +454,8 @@ class QuantizationFreezePass(object):
return graph
def _remove_fake_quant_and_dequant_op(self, graph, op_node):
k = op_node.op().output('Out')[0]
v = op_node.op().input('X')[0]
k = op_node.output('Out')[0]
v = op_node.input('X')[0]
if v not in self._op_input_rename_map:
self._op_input_rename_map[k] = v
else:
......@@ -462,7 +465,7 @@ class QuantizationFreezePass(object):
def _insert_post_dequant_op(self, graph, op_node):
max_range = None
scale_var_node = None
persistable_vars = [p.name() for p in graph.all_persistable_vars()]
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
for var_node in op_node.inputs:
name = var_node.name()
if name in self._op_input_rename_map:
......@@ -480,7 +483,7 @@ class QuantizationFreezePass(object):
original_var_name)
max_range = param_range * act_range / scale_v
else:
assert isinstance(scale_v, core.Node)
assert isinstance(scale_v, IrNode)
scale_var_node = self._var_scale_map[original_var_name]
if len(op_node.outputs) != 1:
......@@ -490,9 +493,9 @@ class QuantizationFreezePass(object):
output_var_node = op_node.outputs[0]
dequant_var_node = graph.create_var_node(
name=self._dequantized_var_name(output_var_node.name()),
var_type=output_var_node.var().type(),
shape=output_var_node.var().shape(),
var_dtype=output_var_node.var().dtype())
var_type=output_var_node.type(),
shape=output_var_node.shape(),
var_dtype=output_var_node.dtype())
dequant_op_node = graph.create_op_node(
op_type='fake_dequantize_max_abs',
attrs={
......@@ -517,14 +520,19 @@ class QuantizationFreezePass(object):
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_ops()
ops = graph.all_op_nodes()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_unused_vars = graph.all_vars() - all_used_vars
all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars)
def _original_var_name(self, var_name):
......@@ -583,8 +591,8 @@ class ConvertToInt8Pass(object):
Args:
graph(IrGraph): the applied graph.
"""
persistable_vars = [p.name() for p in graph.all_persistable_vars()]
ops = graph.all_ops()
persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
ops = graph.all_op_nodes()
input_map = {}
for op_node in ops:
op_name = op_node.name()
......@@ -605,10 +613,10 @@ class ConvertToInt8Pass(object):
def _convert_to_int8(self, graph, var_node):
int8_var_node_name = var_node.name() + ".int8"
int8_var_node = graph.create_param_node(
int8_var_node = graph.create_persistable_node(
name=cpt.to_text(int8_var_node_name),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_type=var_node.type(),
shape=var_node.shape(),
var_dtype=core.VarDesc.VarType.INT8)
array = self._load_var(var_node.name())
self._scope.var(int8_var_node_name)
......@@ -624,14 +632,19 @@ class ConvertToInt8Pass(object):
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_ops()
ops = graph.all_op_nodes()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_unused_vars = graph.all_vars() - all_used_vars
all_used_vars = {n.node for n in all_used_vars}
all_unused_vars = {
n
for n in filter(lambda node: node.node not in all_used_vars,
graph.all_var_nodes())
}
graph.safe_remove_nodes(all_unused_vars)
......@@ -655,11 +668,11 @@ class TransformForMobilePass(object):
Args:
graph(IrGraph): the graph will be transformed.
"""
ops = graph.all_ops()
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in self._fake_quant_op_names:
op_node.op().set_type('quantize')
op_node.set_type('quantize')
quant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs:
graph.link_to(input_node, quant_node)
......@@ -667,7 +680,7 @@ class TransformForMobilePass(object):
graph.link_to(quant_node, output_node)
graph.safe_remove_nodes(op_node)
if name in self._fake_dequant_op_names:
op_node.op().set_type('dequantize')
op_node.set_type('dequantize')
dequant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs:
graph.link_to(input_node, dequant_node)
......
......@@ -61,16 +61,16 @@ class TestGraph(unittest.TestCase):
opt.minimize(loss)
graph = IrGraph(core.Graph(main.desc), for_test=False)
marked_nodes = set()
for op in graph.all_ops():
for op in graph.all_op_nodes():
if op.name().find('conv2d') > -1:
marked_nodes.add(op)
graph.draw('.', 'residual', marked_nodes)
self.assertFalse(graph.has_circle())
self.assertEqual(graph.graph_num(), 1)
nodes = graph.topology_sort()
self.assertEqual(len(nodes), len(graph.all_ops()))
self.assertEqual(len(nodes), len(graph.all_op_nodes()))
nodes_map = graph.build_adjacency_list()
self.assertEqual(len(nodes_map), len(graph.all_ops()))
self.assertEqual(len(nodes_map), len(graph.all_op_nodes()))
nodes_num = len(graph.all_nodes())
graph.safe_remove_nodes(marked_nodes)
self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
......
......@@ -130,15 +130,16 @@ class TestQuantizationTransformPass(unittest.TestCase):
loss = linear_fc(3)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace())
place = fluid.CPUPlace()
exe = fluid.Executor(place)
graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(),
program_exe=exe,
place=place,
activation_quantize_type=quant_type)
transform_pass.apply(graph)
marked_nodes = set()
for op in graph.all_ops():
for op in graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes)
......@@ -146,7 +147,7 @@ class TestQuantizationTransformPass(unittest.TestCase):
self.check_program(transform_pass, program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
val_marked_nodes = set()
for op in val_graph.all_ops():
for op in val_graph.all_op_nodes():
if op.name().find('quantize') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes)
......@@ -166,15 +167,16 @@ class TestQuantizationTransformPass(unittest.TestCase):
loss = residual_block(2)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
exe = fluid.Executor(fluid.CPUPlace())
place = fluid.CPUPlace()
exe = fluid.Executor(place)
graph = IrGraph(core.Graph(main.desc), for_test=False)
transform_pass = QuantizationTransformPass(
scope=fluid.global_scope(),
program_exe=exe,
place=place,
activation_quantize_type=quant_type)
transform_pass.apply(graph)
marked_nodes = set()
for op in graph.all_ops():
for op in graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
......@@ -182,7 +184,7 @@ class TestQuantizationTransformPass(unittest.TestCase):
self.check_program(transform_pass, program)
val_graph = IrGraph(core.Graph(program.desc), for_test=False)
val_marked_nodes = set()
for op in val_graph.all_ops():
for op in val_graph.all_op_nodes():
if op.name().find('quantize') > -1:
val_marked_nodes.add(op)
val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
......@@ -231,17 +233,17 @@ class TestQuantizationFreezePass(unittest.TestCase):
with fluid.scope_guard(scope):
exe.run(startup)
transform_pass = QuantizationTransformPass(
scope=scope, program_exe=exe, activation_quantize_type=quant_type)
scope=scope, place=place, activation_quantize_type=quant_type)
transform_pass.apply(main_graph)
transform_pass.apply(test_graph)
dev_name = '_gpu_' if use_cuda else '_cpu_'
marked_nodes = set()
for op in main_graph.all_ops():
for op in main_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
marked_nodes = set()
for op in test_graph.all_ops():
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
......@@ -251,11 +253,6 @@ class TestQuantizationFreezePass(unittest.TestCase):
iters = 5
batch_size = 8
#train_exe = fluid.ParallelExecutor(
# main_program=quantized_main_program,
# use_cuda=bool(use_cuda),
# loss_name=loss.name,
# scope=scope)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
......@@ -269,9 +266,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
loss_v = exe.run(program=quantized_main_program,
feed=feeder.feed(data),
fetch_list=[loss])
#loss_v = train_exe.run(feed=feeder.feed(data),
# fetch_list=[loss.name])
#print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
test_data = next(test_reader())
with fluid.program_guard(quantized_test_program):
......@@ -287,7 +282,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
freeze_pass = QuantizationFreezePass(scope=scope, place=place)
freeze_pass.apply(test_graph)
marked_nodes = set()
for op in test_graph.all_ops():
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
......@@ -299,21 +294,21 @@ class TestQuantizationFreezePass(unittest.TestCase):
feed=feeder.feed(test_data),
fetch_list=[loss])
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
#print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
#print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
# Maybe failed, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
#print('{}: {}'.format('w_quant' + dev_name + quant_type,
# np.sum(w_quant)))
print('{}: {}'.format('w_freeze' + dev_name + quant_type,
np.sum(w_freeze)))
print('{}: {}'.format('w_quant' + dev_name + quant_type,
np.sum(w_quant)))
# Convert parameter to 8-bit.
convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
convert_int8_pass.apply(test_graph)
marked_nodes = set()
for op in test_graph.all_ops():
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_int8' + dev_name + quant_type, marked_nodes)
......@@ -330,14 +325,14 @@ class TestQuantizationFreezePass(unittest.TestCase):
w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
#print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
#print('{}: {}'.format('w_freeze' + dev_name + quant_type,
# np.sum(w_freeze)))
print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
print('{}: {}'.format('w_freeze' + dev_name + quant_type,
np.sum(w_freeze)))
mobile_pass = TransformForMobilePass()
mobile_pass.apply(test_graph)
marked_nodes = set()
for op in test_graph.all_ops():
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_mobile' + dev_name + quant_type,
......
......@@ -261,45 +261,42 @@ def _as_lodtensor(data, place):
class Executor(object):
"""
An Executor in Python, only support the single-GPU running. For multi-cards, please refer to
ParallelExecutor.
Python executor takes a program, add feed operators and fetch operators to this program according
An Executor in Python, supports single/multiple-GPU running, and single/multiple-CPU running.
Python executor takes a program, adds feed operators and fetch operators to this program according
to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides
the variables(or names) that user want to get after program run. Note: the executor will run all
the variables(or names) that user wants to get after program runs. Note: the executor will run all
operators in the program but not only the operators dependent by the fetch_list.
It store the global variables into the global scope, and create a local scope for the temporary
variables. The local scope contents will be discarded after every minibatch forward/backward finished.
But the global scope variables will be persistent through different runs.
All of ops in program will be running in sequence.
It stores the global variables into the global scope, and creates a local scope for the temporary
variables. The contents in local scope may be discarded after every minibatch forward/backward
finished. But the global scope variables will be persistent through different runs.
Example:
.. code-block:: python
# First create the Executor.
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
# Run the startup program once and only once.
# Not need to optimize/compile the startup program.
exe.run(fluid.default_startup_program())
# Run the main program directly without compile.
loss, = exe.run(fluid.default_main_program(),
feed=feed_dict,
fetch_list=[loss.name])
# Or, compiled the program and run. See `CompiledProgram` for more detail.
compiled_prog = compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel(
loss_name=loss.name)
loss, = exe.run(compiled_prog,
feed=feed_dict,
fetch_list=[loss.name])
.. code-block:: python
# First create the Executor.
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
# Run the startup program once and only once.
# Not need to optimize/compile the startup program.
exe.run(fluid.default_startup_program())
# Run the main program directly without compile.
loss, = exe.run(fluid.default_main_program(),
feed=feed_dict,
fetch_list=[loss.name])
# Or, compiled the program and run. See `CompiledProgram` for more detail.
compiled_prog = compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel(
loss_name=loss.name)
loss, = exe.run(compiled_prog,
feed=feed_dict,
fetch_list=[loss.name])
Args:
place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device
Note: For debugging complicated network in parallel-GPUs, you can test it on the executor.
They has the exactly same arguments, and expected the same results.
"""
def __init__(self, place):
......@@ -382,6 +379,12 @@ class Executor(object):
]
return outs
'''
TODO(typhoonzero): Define "no longer use" meaning? Can user create
a new Executor for the same program and run?
TODO(panyx0718): Why ParallelExecutor doesn't have close?
'''
def close(self):
"""
Close this executor.
......@@ -389,9 +392,6 @@ class Executor(object):
You can no longer use this executor after calling this method.
For the distributed training, this method would free the resource on PServers related to
the current Trainer.
TODO(typhoonzero): Define "no longer use" meaning? Can user create
a new Executor for the same program and run?
TODO(panyx0718): Why ParallelExecutor doesn't have close?
Example:
>>> cpu = core.CPUPlace()
......
......@@ -87,15 +87,6 @@ def _current_expected_place():
return _imperative_current_expected_place_
def is_pserver_mode(main_program):
main = main_program if main_program \
else default_main_program()
for op in main.global_block().ops:
if op.type in ["send", "recv"]:
return True
return False
class NameScope(object):
def __init__(self, name="", parent=None):
self._children = dict()
......@@ -393,6 +384,9 @@ class Variable(object):
if not self._ivar:
self._ivar = core.VarBase(stop_gradient)
self._ivar.desc = self.desc
self._ivar.block = block.desc
self._ivar.name = name
self._ivar.persistable = persistable
if persistable:
self.block.vars[name] = self
else:
......@@ -721,7 +715,9 @@ class Operator(object):
out_arg_names = []
for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name))
arg.op = self
# TODO(minqiyang): could we remove variable's op in static mode?
if not _in_imperative_mode():
arg.op = self
self.desc.set_output(out_proto.name, out_arg_names)
if op_attrs is not None:
......@@ -1200,15 +1196,6 @@ class Block(object):
else:
raise ValueError("Var {0} is not found recursively".format(name))
def _clear_block(self):
# TODO(minqiyang): move this to backward_hooks
self.desc._clear_block()
for name in self.vars.keys():
assert self.vars[name].persistable
del self.ops[:]
def all_parameters(self):
return list(self.iter_parameters())
......@@ -1345,26 +1332,13 @@ class Block(object):
#
# TODO(minqiyang): add op stop_gradient support in static mode too.
# currently, we only support stop_gradient in imperative mode.
self._trace_op(op, kwargs.get("stop_gradient", False))
self.ops.append(op)
_imperative_tracer().trace_op(op,
kwargs.get("stop_gradient", False))
else:
self.ops.append(op)
return op
def _trace_op(self, op, stop_gradient=False):
backward_refs = _imperative_tracer().trace(
op.iop, op.inputs, op.outputs, self.desc,
_imperative_current_expected_place_, stop_gradient)
# TODO(minqiyang): support backward_hooks to eager remove backward_refs
op.backward_refs = defaultdict(list)
for k, v in six.iteritems(op.inputs):
if k in backward_refs:
op.backward_refs[k] = op.inputs[k]
for k, v in six.iteritems(op.outputs):
if k in backward_refs:
op.backward_refs[k] = op.outputs[k]
def _insert_op(self, index, *args, **kwargs):
"""
Insert a Operator according to the giving arguments.
......@@ -1417,9 +1391,11 @@ class Block(object):
inputs=kwargs.get("inputs", None),
outputs=kwargs.get("outputs", None),
attrs=kwargs.get("attrs", None))
self.ops.insert(0, op)
if _in_imperative_mode():
self._trace_op(op, kwargs.get("stop_gradient", False))
_imperative_tracer().trace_op(op,
kwargs.get("stop_gradient", False))
else:
self.ops.insert(0, op)
return op
def _sync_with_cpp(self):
......@@ -1566,10 +1542,397 @@ class Block(object):
return ret_var
class IrNode(object):
"""
Python IrNode. Beneath it is a core.Node, which is used for Ir Pass.
"""
def __init__(self, node):
"""
Construct an IrNode using core.Node.
Args:
node(core.Node): C++ Node.
"""
assert isinstance(node,
core.Node), 'node must be the instance of core.Node.'
self.node = node
def name(self):
"""
Return the node name.
Returns:
str: node name.
"""
return self.node.name()
def node_type(self):
"""
Return the node type.
Returns:
core.Node.Type: node type(core.Node.Type.Operation or core.Node.Type.Variable).
"""
return self.node.node_type()
def var(self):
"""
Return the node variable description.
Returns:
core.VarDesc: node variable description.
"""
return self.node.var()
def op(self):
"""
Return the node operator description.
Returns:
core.OpDesc: node operator description.
"""
return self.node.op()
def id(self):
"""
Return the node id.
Returns:
int: node id.
"""
return self.node.id()
def is_op(self):
"""
If the node is an operator, then return true.
Returns:
bool: indicate whether the node is an operator.
"""
return self.node.is_op()
def is_var(self):
"""
If the node is a variable, then return true.
Returns:
bool: indicate whether the node is a variable.
"""
return self.node.is_var()
def is_ctrl_var(self):
"""
If the node is a control dependence variable, then return true.
Returns:
bool: indicate whether the node is a control dependence variable.
"""
return self.node.is_ctrl_var()
def clear_inputs(self):
"""
Clear the node inputs. After executing the `clear_inputs` function,
the node inputs will be empty.
"""
self.node.clear_inputs()
def remove_input_by_id(self, node_id):
"""
Remove a node from inputs by the given node id.
Args:
node_id(int): the given node id.
"""
self.node.remove_input(node_id)
def remove_input(self, node):
"""
Remove a node from inputs.
Args:
node(IrNode): the node being removed.
"""
self.node.remove_input(node.node)
def append_input(self, node):
"""
Append a node in inputs.
Args:
node(IrNode): the node being appended.
"""
self.node.append_input(node.node)
def clear_outputs(self):
"""
Clear the node outputs. After executing the `clear_outputs` function,
the node outputs will be empty.
"""
self.node.clear_outputs()
def remove_output_by_id(self, node_id):
"""
Remove a node from outputs by the given node id.
Args:
node_id(int): the given node id.
"""
self.node.remove_output(node_id)
def remove_output(self, node):
"""
Remove a node from outputs.
Args:
node(IrNode): the node being removed.
"""
self.node.remove_output(node.node)
def append_output(self, node):
"""
Append a node in outputs.
Args:
node(IrNode): the node being appended.
"""
self.node.append_output(node.node)
@property
def inputs(self):
"""
Return the node inputs.
Returns:
list(IrNode): node inputs wrapped by IrNode.
"""
return [IrNode(n) for n in self.node.inputs]
@property
def outputs(self):
"""
Return the node outputs.
Returns:
list(IrNode): node outputs wrapped by IrNode.
"""
return [IrNode(n) for n in self.node.outputs]
class IrVarNode(IrNode):
"""
Python IrVarNode. Beneath it is a core.Node, it inherits from IrNode.
"""
def __init__(self, node):
"""
Construct an IrVarNode using core.Node.
Args:
node(core.Node): C++ Node.
"""
assert isinstance(node, core.Node) and node.is_var(), \
'node must be the instance of core.Node and it must be a variable node.'
super(IrVarNode, self).__init__(node)
self.node = node
def set_shape(self, shape):
"""
Set the node variable shape.
Args:
shape(list): shape to be set.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
self.node.var().set_shape(shape)
def persistable(self):
"""
If the variable node is a persistable variable, then return true.
Returns:
bool: indicate whether the variable is persistable.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().persistable()
def type(self):
"""
Return the variable type.
Returns:
core.VarDesc.VarType: the variable type.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().type()
def dtype(self):
"""
Return the variable data type.
Returns:
core.VarDesc.VarType: the variable data type.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().dtype()
def shape(self):
"""
Return the variable shape.
Returns:
list: the variable shape.
"""
assert self.node.var() is not None, \
"The node variable description cannot be None."
return self.node.var().shape()
@property
def inputs(self):
"""
Return the node inputs.
Returns:
list(IrOpNode): node inputs wrapped by IrOpNode.
"""
return [IrOpNode(n) for n in self.node.inputs]
@property
def outputs(self):
"""
Return the node outputs.
Returns:
list(IrOpNode): node outputs wrapped by IrOpNode.
"""
return [IrOpNode(n) for n in self.node.outputs]
class IrOpNode(IrNode):
"""
Python IrOpNode. Beneath it is a core.Node, it inherits from IrNode.
"""
def __init__(self, node):
"""
Construct an IrOpNode using core.Node.
Args:
node(core.Node): C++ Node.
"""
assert isinstance(node, core.Node) and node.is_op(), \
'node must be the instance of core.Node and it must be a operator node.'
super(IrOpNode, self).__init__(node)
self.node = node
def rename_input(self, old_input_name, new_input_name):
"""
Rename the input of this node.
Args:
old_input_name(str): the old input name.
new_input_name(str): the new input name.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
self.node.op()._rename_input(old_input_name, new_input_name)
def input(self, name):
"""
Get the argument name list by the parameter name for input.
Args:
name(str): the parameter name.
Returns:
list(str): the argument name list.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
return self.node.op().input(name)
def output(self, name):
"""
Get the argument name list by the parameter name for output.
Args:
name(str): the parameter name.
Returns:
list(str): the argument name list.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
return self.node.op().output(name)
def set_type(self, new_type):
"""
Change the operator type into new type.
Args:
new_type(str): new operator type to be set.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
return self.node.op().set_type(new_type)
def set_attr(self, name, val):
"""
Set the value of attribute by attribute's name.
Args:
name(str): the attribute name.
val(bool|int|str|float|list): the value of the attribute.
"""
self._update_desc_attr(name, val)
def _update_desc_attr(self, name, val):
"""
Update the value of the op desc's attribute by attribute's name.
"""
assert self.node.op() is not None, \
"The node operator description cannot be None."
desc = self.node.op()
if isinstance(val, Block):
desc.set_block_attr(name, val.desc)
elif isinstance(val, list) and val and \
all(isinstance(v, Block) for v in val):
desc.set_blocks_attr(name, [v.desc for v in val])
elif isinstance(val, core.BlockDesc) or \
isinstance(val, core.ProgramDesc):
desc.set_serialized_attr(name, val.serialize_to_string())
else:
desc._set_attr(name, val)
@property
def inputs(self):
"""
Return the node inputs.
Returns:
list(IrVarNode): node inputs wrapped by IrVarNode.
"""
return [IrVarNode(n) for n in self.node.inputs]
@property
def outputs(self):
"""
Return the node outputs.
Returns:
list(IrVarNode): node outputs wrapped by IrVarNode.
"""
return [IrVarNode(n) for n in self.node.outputs]
class IrGraph(object):
"""
Python IrGraph. Beneath it is a core.Graph, which is used for
create a c++ Ir Pass Graph. An IrGraph is just a graph view of
creating a c++ Ir Pass Graph. An IrGraph is just a graph view of
a Program. In an IrGraph, both Variables and Operators are graph
nodes.
"""
......@@ -1597,15 +1960,15 @@ class IrGraph(object):
"""
Return all nodes included in the graph as a set.
"""
return {node for node in self.graph.nodes()}
return {IrNode(node) for node in self.graph.nodes()}
def all_vars(self):
def all_var_nodes(self):
"""
Return all variable nodes included in the graph as a set.
"""
return {node for node in self.graph.nodes() if node.is_var()}
return {IrVarNode(node) for node in self.graph.nodes() if node.is_var()}
def all_persistable_vars(self):
def all_persistable_nodes(self):
"""
Return all persistable variable nodes included in the graph as a set.
"""
......@@ -1614,13 +1977,13 @@ class IrGraph(object):
if node.is_var() and node.var() is not None and node.var(
).persistable():
persistable_nodes.add(node)
return persistable_nodes
return {IrVarNode(p) for p in persistable_nodes}
def all_ops(self):
def all_op_nodes(self):
"""
Return all operator nodes included in the graph as a set.
"""
return {node for node in self.graph.nodes() if node.is_op()}
return {IrOpNode(node) for node in self.graph.nodes() if node.is_op()}
def var_node(self, name):
"""
......@@ -1634,14 +1997,14 @@ class IrGraph(object):
doesn't have a variable with the giving name.
Returns:
core.Node: the variable node with the giving name.
IrVarNode: the variable node with the giving name.
"""
if not isinstance(name, six.string_types):
raise TypeError(
"var require string as parameter, but get %s instead." %
(type(name)))
target_var_node = None
var_nodes = self.all_vars()
var_nodes = self.all_var_nodes()
for var_node in var_nodes:
if var_node.name() == name:
target_var_node = var_node
......@@ -1649,7 +2012,7 @@ class IrGraph(object):
raise ValueError("var_node %s not in this graph" % name)
return target_var_node
def create_param_node(self, name, var_type, shape, var_dtype):
def create_persistable_node(self, name, var_type, shape, var_dtype):
"""
Create a persistable variable node in the graph. In IrGraph,
it can not distinguish between persistable variables and parameters.
......@@ -1661,14 +2024,14 @@ class IrGraph(object):
var_dtype(core.VarDesc.VarType): the data type of the persistable variable node.
Returns:
core.Node: the created persistable variable node.
IrVarNode: the created persistable variable node.
"""
var_desc = core.VarDesc(name)
var_desc.set_type(var_type)
var_desc.set_shape(shape)
var_desc.set_dtype(var_dtype)
var_desc.set_persistable(True)
return self.graph.create_var_node(var_desc)
return IrVarNode(self.graph.create_var_node(var_desc))
def create_var_node(self, name, var_type, shape, var_dtype):
"""
......@@ -1682,14 +2045,14 @@ class IrGraph(object):
var_dtype(core.VarDesc.VarType): the data type of the variable node.
Returns:
core.Node: the created variable node.
IrVarNode: the created variable node.
"""
var_desc = core.VarDesc(name)
var_desc.set_type(var_type)
var_desc.set_shape(shape)
var_desc.set_dtype(var_dtype)
return self.graph.create_var_node(var_desc)
return IrVarNode(self.graph.create_var_node(var_desc))
def create_var_node_from_desc(self, var_desc):
"""
......@@ -1700,9 +2063,9 @@ class IrGraph(object):
var_desc(core.VarDesc): the giving variable description.
Returns:
core.Node: the created variable node.
IrVarNode: the created variable node.
"""
return self.graph.create_var_node(var_desc)
return IrVarNode(self.graph.create_var_node(var_desc))
def create_op_node(self, op_type, attrs, inputs, outputs):
"""
......@@ -1715,7 +2078,7 @@ class IrGraph(object):
outputs(dict): the outpus of the operator node.
Returns:
core.Node: the created operator node.
IrOpNode: the created operator node.
"""
op_desc = core.OpDesc()
op_desc.set_type(op_type)
......@@ -1731,7 +2094,7 @@ class IrGraph(object):
var_nodes = [var_nodes]
op_desc.set_output(output_name,
[var_node.name() for var_node in var_nodes])
return self.graph.create_op_node(op_desc)
return IrOpNode(self.graph.create_op_node(op_desc))
def create_op_node_from_desc(self, op_desc):
"""
......@@ -1741,40 +2104,40 @@ class IrGraph(object):
op_desc(core.VarDesc): the giving operator description.
Returns:
core.Node: the created operator node.
IrOpNode: the created operator node.
"""
return self.graph.create_op_node(op_desc)
return IrOpNode(self.graph.create_op_node(op_desc))
def update_input_link(self, old_input_node, new_input_node, op_node):
"""
Update the input's link of a operator node.
Args:
old_input_node(core.Node): the old input node of the giving op_node.
new_input_node(core.Node): the new input node of the giving op_node.
op_node(core.Node): the operator node that is needed to update input's link.
old_input_node(IrNode): the old input node of the giving op_node.
new_input_node(IrNode): the new input node of the giving op_node.
op_node(IrOpNode): the operator node that is needed to update input's link.
"""
assert old_input_node in self.graph.nodes() and new_input_node in \
self.graph.nodes() and op_node in self.graph.nodes(), \
assert old_input_node.node in self.graph.nodes() and new_input_node.node in \
self.graph.nodes() and op_node.node in self.graph.nodes(), \
'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.'
old_input_node.outputs_remove(op_node)
op_node.inputs_remove(old_input_node)
new_input_node.outputs_append(op_node)
op_node.inputs_append(new_input_node)
op_node.op()._rename_input(old_input_node.name(), new_input_node.name())
old_input_node.remove_output(op_node)
op_node.remove_input(old_input_node)
new_input_node.append_output(op_node)
op_node.append_input(new_input_node)
op_node.rename_input(old_input_node.name(), new_input_node.name())
def link_to(self, node_in, node_out):
"""
Connect two nodes.
Args:
node_in(core.Node): the input node.
node_out(core.Node): the output node.
node_in(IrNode): the input node.
node_out(IrNode): the output node.
"""
assert node_in in self.graph.nodes() and node_out in self.graph.nodes(), \
assert node_in.node in self.graph.nodes() and node_out.node in self.graph.nodes(), \
'The two arguments(node_in&node_out) must be in the graph nodes.'
node_in.outputs_append(node_out)
node_out.inputs_append(node_in)
node_in.append_output(node_out)
node_out.append_input(node_in)
def safe_remove_nodes(self, remove_nodes):
"""
......@@ -1789,7 +2152,8 @@ class IrGraph(object):
remove_nodes = set(remove_nodes)
else:
remove_nodes = {remove_nodes}
core.graph_safe_remove_nodes(self.graph, remove_nodes)
original_nodes = {n.node for n in remove_nodes}
core.graph_safe_remove_nodes(self.graph, original_nodes)
def has_circle(self):
"""
......@@ -1816,18 +2180,23 @@ class IrGraph(object):
Notes: the `graph` cannot contain a circle.
Returns:
set(core.Node): nodes in topology order.
set(IrNode): nodes in topology order.
"""
return core.topology_sort(self.graph)
ordered_nodes = core.topology_sort(self.graph)
return {IrNode(n) for n in ordered_nodes}
def build_adjacency_list(self):
"""
Build an adjacency list of operations for the `graph`.
Returns:
dict{core.Node: set(core.Node)}: the adjacency list.
dict{IrNode: set(IrNode)}: the adjacency list.
"""
return core.build_adjacency_list(self.graph)
adj_list = core.build_adjacency_list(self.graph)
wrapped_adj_list = dict()
for k, v in six.iteritems(adj_list):
wrapped_adj_list[IrNode(k)] = {IrNode(n) for n in v}
return wrapped_adj_list
def draw(self, save_path, name, marked_nodes=None, remove_ctr_var=True):
"""
......@@ -1837,7 +2206,7 @@ class IrGraph(object):
Args:
save_path(str): the save path of drawn graph.
name(str): the name of drawn graph.
marked_nodes(set(core.Node)): nodes that are needed to be marked.
marked_nodes(set(IrNode)): nodes that are needed to be marked.
Default value is None.
remove_ctr_var(bool): If it is set True, all control variable nodes
in the graph will be removed. Default value is True.
......@@ -1852,20 +2221,22 @@ class IrGraph(object):
print('The {} is saved as the dot filetype.'.format(
dot_file_path))
remove_ctr_vars = set()
if remove_ctr_var:
remove_ctr_vars = set()
for node in self.graph.nodes():
for node in self.all_var_nodes():
if node.is_ctrl_var():
remove_ctr_vars.add(node)
self.safe_remove_nodes(remove_ctr_vars)
ops_num = 0
for node in self.graph.nodes():
if node.is_op():
ops_num += 1
print('Total ops num = {}.'.format(ops_num))
print('Total ops num = {}.'.format(len(self.all_op_nodes())))
if marked_nodes is not None:
if not isinstance(marked_nodes, set):
marked_nodes = set(marked_nodes)
if isinstance(marked_nodes, Iterable):
marked_nodes = set(marked_nodes)
else:
marked_nodes = {marked_nodes}
marked_nodes = {n.node for n in marked_nodes}
remove_ctr_vars = {n.node for n in remove_ctr_vars}
marked_nodes = marked_nodes - remove_ctr_vars
if self.graph.has('__graphviz__marked_node__'):
self.graph.erase('__graphviz__marked_node__')
......
......@@ -23,7 +23,11 @@ from .layers import *
from . import nn
from .nn import *
from . import tracer
from .tracer import *
__all__ = []
__all__ += layers.__all__
__all__ += base.__all__
__all__ += nn.__all__
__all__ += tracer.__all__
......@@ -16,6 +16,7 @@ import numpy as np
from paddle.fluid import core
from paddle.fluid import framework
from .tracer import Tracer
__all__ = ['enabled', 'guard', 'to_variable']
......@@ -28,7 +29,7 @@ def enabled():
def guard(place=None):
train = framework.Program()
startup = framework.Program()
tracer = core.Tracer(train.current_block().desc)
tracer = Tracer(train.current_block().desc)
if place is None:
if core.is_compiled_with_cuda():
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import six
from collections import defaultdict
from paddle.fluid import core
from paddle.fluid import framework
__all__ = ['Tracer']
def release_op(op):
del framework._imperative_tracer()._ops[op._trace_id]
class Tracer(core.Tracer):
"""
Python wrapper of imperative tracer
"""
def __init__(self, block):
super(Tracer, self).__init__(block)
self._ops = defaultdict()
self._trace_id = 0
def trace_op(self, op, stop_gradient=False):
# record op's trace id
op.iop._trace_id = self._trace_id
# trace op and save it
backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.block.desc,
framework._current_expected_place(),
stop_gradient)
if not stop_gradient:
self._trace_id += 1
self._ops[op.iop._trace_id] = op
# register backward hooks and variables if needed
if len(backward_refs) > 0:
op.iop.register_backward_hooks(release_op)
# TODO(minqiyang): remove all inputs and outputs after seperate
# var and grad
op.backward_refs = defaultdict(list)
for k, v in six.iteritems(op.inputs):
if k in backward_refs:
op.backward_refs[k] = op.inputs[k]
for k, v in six.iteritems(op.outputs):
if k in backward_refs:
op.backward_refs[k] = op.outputs[k]
......@@ -19,6 +19,7 @@ import numpy as np
from .wrapped_decorator import signature_safe_contextmanager
from .core import VarDesc
from . import unique_name
from .imperative import base as imperative_base
__all__ = [
'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
......@@ -165,7 +166,8 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu()
},
stop_gradient=True)
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -244,7 +246,8 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -322,7 +325,8 @@ class NormalInitializer(Initializer):
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -400,7 +404,8 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -505,7 +510,8 @@ class XavierInitializer(Initializer):
"seed": self._seed
},
stop_gradient=True)
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -605,7 +611,8 @@ class MSRAInitializer(Initializer):
"seed": self._seed
},
stop_gradient=True)
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -703,7 +710,8 @@ class BilinearInitializer(Initializer):
'shape': list(shape),
value_name: values
})
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......@@ -761,7 +769,8 @@ class NumpyArrayInitializer(Initializer):
value_name: values
},
stop_gradient=True)
var.op = op
if not imperative_base.enabled():
var.op = op
return op
......
......@@ -468,9 +468,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None):
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
# `prog` can be a program defined by the user
prog = fluid.default_main_program()
fluid.io.save_persistables(executor=exe, dirname=param_path,
main_program=None)
main_program=prog)
"""
if main_program and main_program._is_distributed:
......
......@@ -28,10 +28,12 @@ from . import ops
from . import tensor
from ..initializer import init_on_cpu
from ..framework import default_main_program, Parameter, unique_name, name_scope
import math
__all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS'
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay'
]
......@@ -307,6 +309,41 @@ def piecewise_decay(boundaries, values):
return lr
def cosine_decay(learning_rate, step_each_epoch, epochs):
"""
Applies cosine decay to the learning rate.
when training a model, it is often recommended to lower the learning rate as the
training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy.
decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1)
Args:
learning_rate(Variable|float): The initial learning rate.
step_each_epoch(int): the number of steps in an epoch.
epochs(int): the number of epochs.
Returns:
Variable: The decayed learning rate.
Examples:
..code-block:: python
base_lr = 0.1
lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
with default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter()
cur_epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
ops.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay):
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
......
......@@ -1767,7 +1767,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
return softmax_out
def softmax(input, use_cudnn=True, name=None):
def softmax(input, use_cudnn=False, name=None):
"""
The input of the softmax operator is a tensor of any rank. The output tensor
has the same shape as the input.
......@@ -1795,7 +1795,8 @@ def softmax(input, use_cudnn=True, name=None):
Args:
input (Variable): The input variable.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed.
library is installed. To improve numerical stablity, set use_cudnn to \
False by default. Default: False
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
......@@ -3041,7 +3042,6 @@ def data_norm(input,
param_attr=None,
data_layout='NCHW',
in_place=False,
use_mkldnn=False,
name=None,
moving_mean_name=None,
moving_variance_name=None,
......@@ -3075,7 +3075,6 @@ def data_norm(input,
param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
data_layout(string, default NCHW): NCHW|NHWC
in_place(bool, Default False): Make the input and output of batch norm reuse memory.
use_mkldnn(bool, Default false): ${use_mkldnn_comment}
name(string, Default None): A name for this layer(optional). If set None, the layer
will be named automatically.
moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
......@@ -3156,8 +3155,7 @@ def data_norm(input,
outputs={"Y": data_norm_out,
"Means": means,
"Scales": scales},
attrs={"epsilon": epsilon,
"use_mkldnn": use_mkldnn})
attrs={"epsilon": epsilon})
return helper.append_activation(data_norm_out)
......@@ -5756,7 +5754,7 @@ def softmax_with_cross_entropy(logits,
label,
soft_label=False,
ignore_index=kIgnoreIndex,
numeric_stable_mode=False,
numeric_stable_mode=True,
return_softmax=False):
"""
**Softmax With Cross Entropy Operator.**
......@@ -5820,7 +5818,7 @@ def softmax_with_cross_entropy(logits,
When soft_label is True or CPU is used,
the algorithm is always numerically stable.
Note that the speed may be slower when use
stable algorithm. Default: False
stable algorithm. Default: True
return_softmax (bool): A flag indicating whether to return the softmax
along with the cross entropy loss. Default: False
......@@ -6846,56 +6844,58 @@ def image_resize(input,
Example:
For scale:
if align_corners = True && out_size > 1 :
.. code-block:: text
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
For scale:
if align_corners = True && out_size > 1 :
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
scale_factor = float(in_size/out_size)
Nearest neighbor interpolation:
if:
align_corners = False
Nearest neighbor interpolation:
if:
align_corners = False
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
H_out = floor (H_{in} * scale_{factor})
W_out = floor (W_{in} * scale_{factor})
else:
align_corners = True
else:
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
Bilinear interpolation:
Bilinear interpolation:
if:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
if:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
else:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
else:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
For details of nearest neighbor interpolation, please refer to Wikipedia:
https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.
......@@ -7050,41 +7050,39 @@ def resize_bilinear(input,
Align_corners and align_mode are optinal parameters,the calculation
method of interpolation can be selected by them.
Align_corners and align_mode are optinal parameters,the calculation method
of interpolation can be selected by them.
Example:
For scale:
if align_corners = True && out_size > 1 :
.. code-block:: text
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
For scale:
scale_factor = float(in_size/out_size)
if align_corners = True && out_size > 1 :
Bilinear interpolation:
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
if:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
Bilinear interpolation:
if:
align_corners = False , align_mode = 0
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = (H_{in}+0.5) * scale_{factor} - 0.5
W_out = (W_{in}+0.5) * scale_{factor} - 0.5
else:
else:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
H_out = H_{in} * scale_{factor}
W_out = W_{in} * scale_{factor}
......@@ -7136,42 +7134,44 @@ def resize_nearest(input,
align_corners=True):
"""
Resize input by performing nearest neighbor interpolation in both the
3rd dimention(in height direction) and the 4th dimention(in width
direction) based on given output shape which specified by actual_shape,
3rd dimension(in height direction) and the 4th dimension(in width
direction) based on given output shape which is specified by actual_shape,
out_shape and scale in priority order.
Example:
For scale:
if align_corners = True && out_size > 1 :
.. code-block:: text
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
For scale:
scale_factor = float(in_size/out_size)
Nearest neighbor interpolation:
if:
align_corners = False
if align_corners = True && out_size > 1 :
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
scale_factor = (in_size-1.0)/(out_size-1.0)
else:
scale_factor = float(in_size/out_size)
Nearest neighbor interpolation:
if:
align_corners = False
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
H_out = floor(H_{in} * scale_{factor})
W_out = floor(W_{in} * scale_{factor})
else:
align_corners = True
else:
align_corners = True
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
input : (N,C,H_in,W_in)
output: (N,C,H_out,W_out) where:
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
H_out = round(H_{in} * scale_{factor})
W_out = round(W_{in} * scale_{factor})
For details of nearest neighbor interpolation, please refer to Wikipedia:
......@@ -9945,6 +9945,7 @@ def teacher_student_sigmoid_loss(input,
Examples:
.. code-block:: python
cost = fluid.layers.teacher_student_sigmoid_loss(input=similarity, label=label)
"""
helper = LayerHelper('teacher_student_sigmoid_loss', **locals())
......
......@@ -13,15 +13,11 @@
# limitations under the License.
from __future__ import print_function
import multiprocessing
from . import core
from . import framework
from . import executor
from .. import compat as cpt
import warnings
from . import compiler
import sys
import six
import os
__all__ = ['ParallelExecutor']
......@@ -92,104 +88,31 @@ class ParallelExecutor(object):
num_trainers=1,
trainer_id=0,
scope=None):
# step1: get places, the places are used in run too.
self._places = []
if use_cuda:
gpus_env = os.getenv("FLAGS_selected_gpus")
if gpus_env:
gpus = [int(s) for s in gpus_env.split(",")]
else:
gpus = [
i for i in six.moves.range(core.get_cuda_device_count())
]
self._places = [core.CUDAPlace(i) for i in gpus]
else:
cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
assert self._places, "no place for execution"
sys.stderr.write(
'ParallelExecutor is deprecated. '
'Please use CompiledProgram and Executor. CompiledProgram '
'is a central place for optimization and Executor is the '
'unified executor. Example can be found in compiler.py.\n')
# step2: init exec_strategy
if exec_strategy is None:
exec_strategy = ExecutionStrategy()
exec_strategy.use_cuda = use_cuda
if exec_strategy.num_threads == 0:
if use_cuda:
# Experiments on se-resnext shows that too many threads hurt
# performance. Worth tunning for other models in the future.
exec_strategy.num_threads = len(self._places) * 4
else:
cpu_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exec_strategy.num_threads = cpu_num * 2
# step3: init build_strategy
if build_strategy is None:
build_strategy = BuildStrategy()
build_strategy.num_trainers = num_trainers
build_strategy.trainer_id = trainer_id
# FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode,
# num_trainers is 1, so the current fields of build_strategy doesn't tell if
# it's distributed model.
build_strategy.is_distribution = framework.is_pserver_mode(
main_program) or num_trainers > 1
# step4: get main_program, scope, local_scopes
main = main_program if main_program \
else framework.default_main_program()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if build_strategy.memory_optimize is None:
build_strategy.memory_optimize = False if main._is_mem_optimized else True
if build_strategy.enable_inplace is None:
build_strategy.enable_inplace = False if main._is_mem_optimized else True
scope = scope if scope is not None else executor.global_scope()
if share_vars_from and not isinstance(share_vars_from,
ParallelExecutor):
raise TypeError("share_vars_from must be ParallelExecutor.")
local_scopes = share_vars_from.executor.local_scopes()\
if share_vars_from else []
# step5: check trainers_endpoints, it is used for distribution.
trainers_endpoints = main._trainers_endpoints
if num_trainers > 1 and trainers_endpoints:
assert num_trainers == len(
trainers_endpoints), "num_trainers == len(endpoints)"
build_strategy.trainers_endpoints = trainers_endpoints
# step6: get persistable_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars = set([
cpt.to_text(v.name) for v in [
var for var in main.list_vars()
if var.persistable and var.type != core.VarDesc.VarType.RAW
]
])
def place_obj(place):
p = core.Place()
p.set_place(place)
return p
places = list(map(place_obj, self._places))
# step7: init ParallelExecutor
# ParallelExecutor API will be deprecated, don't support parallel graph.
self._graphs = []
if build_strategy.async_mode:
for _ in range(cpu_num):
self._graphs.append(core.Graph(main.desc))
else:
self._graphs.append(core.Graph(main.desc))
self._places = compiler.get_available_places(use_cuda)
self._scope = scope if scope is not None else executor.global_scope()
self.executor = core.ParallelExecutor(
places, persistable_vars,
cpt.to_text(loss_name) if loss_name else six.u(''), scope,
local_scopes, exec_strategy, build_strategy, self._graphs)
self.scope = scope
main_program = main_program if main_program is not None \
else framework.default_main_program()
self._compiled_program = compiler.CompiledProgram(main_program)
self._compiled_program.with_data_parallel(
loss_name=loss_name,
build_strategy=build_strategy,
exec_strategy=exec_strategy,
share_vars_from=share_vars_from)
self._place = core.CUDAPlace(0) if use_cuda else core.CPUPlace()
self._executor = executor.Executor(self._place)
self._compiled_program._compile(place=self._place, scope=self._scope)
def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=True):
"""
......@@ -256,56 +179,11 @@ class ParallelExecutor(object):
loss = pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name]))
"""
if feed is None and feed_dict is not None:
feed = feed_dict
print(
"`feed_dict` is deprecated. Please use `feed=`",
file=sys.stderr)
if isinstance(feed, dict):
feed_tensor_dict = dict()
for feed_name in feed:
feed_tensor = feed[feed_name]
if not isinstance(feed_tensor, core.LoDTensor):
feed_tensor = core.LoDTensor()
# always set to CPU place, since the tensor need to be splitted
# it is fast in CPU
feed_tensor.set(feed[feed_name], core.CPUPlace())
feed_tensor_dict[feed_name] = feed_tensor
self.executor.feed_and_split_tensor_into_local_scopes(
feed_tensor_dict)
elif isinstance(feed, list) or isinstance(feed, tuple):
if len(feed) != len(self._places):
raise ValueError(
"Feed a list of tensor, the list should be the same size as places"
)
res = list()
for i, each in enumerate(feed):
if not isinstance(each, dict):
raise TypeError(
"Each element of feed list should be a dict")
res_dict = dict()
for feed_name in each:
tensor = each[feed_name]
if not isinstance(tensor, core.LoDTensor):
tmp = core.LoDTensor()
tmp.set(tensor, self._places[i])
tensor = tmp
res_dict[feed_name] = tensor
res.append(res_dict)
self.executor.feed_tensors_into_local_scopes(res)
fetch_var_name = 'fetch'
self.executor.run(fetch_list, fetch_var_name)
arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array()
if return_numpy:
return executor.as_numpy(arr)
return [arr[i] for i in range(len(arr))]
return self._executor.run(program=self._compiled_program,
scope=self._scope,
feed=feed,
fetch_list=fetch_list,
return_numpy=return_numpy)
@property
def device_count(self):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import unittest
import time
import math
import multiprocessing
import numpy as np
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler
# open eager delete mode
os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
os.environ['FLAGS_fast_eager_deletion_mode'] = 'true'
os.environ['CPU_NUM'] = '2'
class BuildIrMemOptBase(unittest.TestCase):
def check_network_convergence(self,
network,
use_cuda=True,
memory_opt=True,
use_ir_memory_optimize=True,
enable_inplace=True,
iter=5):
if use_cuda and not core.is_compiled_with_cuda():
print('Skip use_cuda=True because Paddle is not compiled with cuda')
return
if os.name == 'nt':
print(
'Skip use_parallel_executor=True because Paddle comes without parallel support on windows'
)
return
fluid.default_startup_program().random_seed = 100
fluid.default_main_program().random_seed = 100
batch_size = 32
batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
# build network
word_dict = paddle.dataset.imdb.word_dict()
train_reader = paddle.batch(
paddle.dataset.imdb.train(word_dict), batch_size=batch_size)
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cost = network(data, label, len(word_dict))
optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(cost)
if memory_opt:
fluid.memory_optimize(fluid.default_main_program())
# execution
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
reader = feeder.decorate_reader(train_reader, multi_devices=True)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_cp = compiler.CompiledProgram(fluid.default_main_program())
train_cp = train_cp.with_data_parallel(loss_name=cost.name)
fetch_list = [cost.name]
begin = time.time()
first_loss, last_loss = None, None
step_id = 0
custom_iter = getattr(self, "iter", None)
if not custom_iter == None:
iter = custom_iter
for data in reader():
ret = exe.run(train_cp, feed=data, fetch_list=fetch_list)
print(ret)
step_id += 1
if step_id == 1:
first_loss = ret[0]
if step_id == iter:
last_loss = ret[0]
break
end = time.time()
print("%.4f Instance per second" % (
(batch_size * iter) / (end - begin)))
print(first_loss, last_loss)
avg_last_loss_val = np.array(last_loss).mean()
avg_first_loss_val = np.array(first_loss).mean()
if math.isnan(float(avg_last_loss_val)) or math.isnan(
float(avg_first_loss_val)):
sys.exit("got NaN loss, training failed.")
return first_loss, last_loss
class TestIrMemOptBase(BuildIrMemOptBase):
def setUp(self):
self.network = None
def test_network(self):
if self.network is None or not core.is_compiled_with_cuda():
return
baseline_first_loss, baseline_last_loss = None, None
for use_cuda in [True]:
for use_python_mem_opt in [True, False]:
print(
'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'.
format(self.network.__name__, use_cuda, use_python_mem_opt,
not use_python_mem_opt))
with fluid.program_guard(fluid.Program(), fluid.Program()):
with fluid.scope_guard(core.Scope()):
if use_cuda is True and use_python_mem_opt is True:
baseline_first_loss, baseline_last_loss = self.check_network_convergence(
self.network,
use_cuda=use_cuda,
memory_opt=use_python_mem_opt)
else:
cur_first_loss, cur_last_loss = self.check_network_convergence(
self.network,
use_cuda=use_cuda,
memory_opt=use_python_mem_opt)
self.assertAlmostEquals(
np.mean(baseline_last_loss),
np.mean(cur_last_loss),
delta=1e-2)
self.assertAlmostEquals(
np.mean(baseline_first_loss),
np.mean(cur_first_loss),
delta=1e-2)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid.core as core
import paddle.fluid as fluid
def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out,
out_grad, x_grad):
def __assert_close(tensor, np_array, msg, atol=1e-4):
test_case.assertTrue(
np.allclose(
np.array(tensor), np_array, atol=atol), msg)
place = core.CPUPlace()
var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad}
var_names = list(var_dict.keys())
ground_truth = {name: var_dict[name] for name in var_names}
program = fluid.Program()
with fluid.program_guard(program):
block = program.global_block()
for name in ground_truth:
block.create_var(
name=name, dtype=np.float32, shape=ground_truth[name].shape)
op = block.append_op(
type=op_type,
inputs={'X': block.var('x'), },
outputs={'Out': block.var('out')},
attrs={'use_mkldnn': True})
# Generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc,
set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode('ascii'))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode('ascii'))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
# Do at least 2 iterations
for i in range(2):
out = exe.run(
program,
feed={name: var_dict[name]
for name in ['x', 'out@GRAD']},
fetch_list=['x@GRAD', 'out'])
__assert_close(x_grad, out[0], 'x@GRAD')
......@@ -19,7 +19,7 @@ import numpy as np
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest
from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs
import paddle.fluid as fluid
from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
class TestMKLDNNReluDim2(TestRelu):
......@@ -98,62 +98,24 @@ class TestMKLDNNAbsDim4(TestAbs):
# Check if primitives already exist in backward
class TestMKLDNNReluPrimitivesAlreadyExist(unittest.TestCase):
def __assert_close(self, tensor, np_array, msg, atol=1e-4):
self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)
def test_check_forward_backward(self):
place = core.CPUPlace()
class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase):
def setUp(self):
super(TestMKLDNNAbsPrimitivesAlreadyExist, self).setUp()
np.random.seed(123)
x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32)
out = np.abs(x)
out_grad = np.random.random_sample(x.shape).astype(np.float32)
x_grad = out_grad * np.sign(x) # Abs grad calculation
var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad}
var_names = list(var_dict.keys())
ground_truth = {name: var_dict[name] for name in var_names}
program = fluid.Program()
with fluid.program_guard(program):
block = program.global_block()
for name in ground_truth:
block.create_var(
name=name, dtype='float32', shape=ground_truth[name].shape)
relu_op = block.append_op(
type="abs",
inputs={"X": block.var('x'), },
outputs={"Out": block.var('out')},
attrs={"use_mkldnn": True})
# Generate backward op_desc
grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
relu_op.desc, set(), [])
grad_op_desc = grad_op_desc_list[0]
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(grad_op_desc)
for var_name in grad_op_desc.output_arg_names():
block.desc.var(var_name.encode("ascii"))
grad_op_desc.infer_var_type(block.desc)
grad_op_desc.infer_shape(block.desc)
for arg in grad_op_desc.output_arg_names():
grad_var = block.desc.find_var(arg.encode("ascii"))
grad_var.set_dtype(core.VarDesc.VarType.FP32)
exe = fluid.Executor(place)
# Do at least 2 iterations
for i in range(2):
out = exe.run(
program,
feed={name: var_dict[name]
for name in ['x', 'out@GRAD']},
fetch_list=['x@GRAD'])
self.__assert_close(x_grad, out[0], "x@GRAD")
self.op_type = 'abs'
self.x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32)
self.out = np.abs(self.x)
self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
self.x_grad = self.__abs_bwd(self.x, self.out_grad)
# Abs grad calculation
def __abs_bwd(self, x, out_grad):
return out_grad * np.sign(x)
def test_check(self):
check_if_mkldnn_primitives_exist_in_bwd(
self, self.op_type, self.x, self.out, self.out_grad, self.x_grad)
if __name__ == '__main__':
......
......@@ -15,44 +15,139 @@
from __future__ import print_function
import unittest
import numpy as np
from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.op_test import OpTest
from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp
class TestMKLDNN(TestConv2dOp):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
def conv2d_bias_naive(out, bias):
_, out_c, _, _ = out.shape
for l in range(out_c):
out[:, l, :, :] = out[:, l, :, :] + bias[l]
return out
class TestMKLDNNWithPad(TestWithPad):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
def conv2d_residual_naive(out, residual):
assert out.shape == residual.shape
out = np.add(out, residual)
return out
class TestMKLDNNWithStride(TestWithStride):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
class TestConv2dMKLDNNOp(TestConv2dOp):
def init_group(self):
self.groups = 1
class TestMKLDNNWithGroup(TestWithGroup):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
self.use_mkldnn = True
self._cpu_only = True
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 3, 3]
class TestMKLDNNWith1x1(TestWith1x1):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
def setUp(self):
self.fuse_bias = False
self.bias_size = None
self.fuse_relu = False
self.fuse_residual_connection = False
self.input_residual_size = None
TestConv2dOp.setUp(self)
output = self.outputs['Output']
class TestMKLDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
def init_kernel_type(self):
self.use_mkldnn = True
self.data_format = "NCHW"
#mkldnn only support either conv-sum-relu, or conv-relu.
if self.fuse_bias and self.bias_size is not None:
bias = np.random.random(self.bias_size).astype(self.dtype)
output = conv2d_bias_naive(output, bias)
output = output.astype(self.dtype)
self.attrs['fuse_bias'] = self.fuse_bias
self.inputs['Bias'] = OpTest.np_dtype_to_fluid_dtype(bias)
if self.fuse_residual_connection and self.input_residual_size is not None:
input_residual = np.random.random(self.input_residual_size).astype(
self.dtype)
output = conv2d_residual_naive(output, input_residual)
self.attrs[
'fuse_residual_connection'] = self.fuse_residual_connection
self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
input_residual)
if self.fuse_relu:
output = np.maximum(output, 0).astype(self.dsttype)
output = output.astype(self.dtype)
self.attrs['fuse_bias'] = self.fuse_bias
self.attrs['fuse_relu'] = self.fuse_relu
self.attrs['fuse_residual_connection'] = self.fuse_residual_connection
self.outputs['Output'] = output
class TestWithFuse(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [1, 1]
self.fuse_bias = True
self.bias_size = [6]
self.fuse_residual_connection = True
self.input_residual_size = [2, 6, 5, 5]
def test_check_grad(self):
pass
def test_check_grad_no_filter(self):
pass
def test_check_grad_no_input(self):
pass
class TestWithPadWithBias(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [1, 1]
self.input_size = [2, 3, 6, 6]
class TestWithStride(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.pad = [1, 1]
self.stride = [2, 2]
self.input_size = [2, 3, 6, 6]
class TestWithGroup(TestConv2dMKLDNNOp):
def init_group(self):
self.groups = 3
class TestWith1x1(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.filter_size = [6, 3, 1, 1]
class TestWithInput1x1Filter1x1(TestConv2dMKLDNNOp):
def init_test_case(self):
TestConv2dMKLDNNOp.init_test_case(self)
self.input_size = [2, 3, 1, 1] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] // self.groups
self.filter_size = [6, f_c, 1, 1]
def init_group(self):
self.groups = 3
if __name__ == '__main__':
......
......@@ -18,6 +18,24 @@ import unittest
from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5
def create_test_mkldnn_use_ceil_class(parent):
class TestMKLDNNPool2DUseCeilCase(parent):
def init_kernel_type(self):
self.use_mkldnn = True
def init_ceil_mode(self):
self.ceil_mode = True
cls_name = "{0}_{1}".format(parent.__name__, "MKLDNNCeilModeCast")
TestMKLDNNPool2DUseCeilCase.__name__ = cls_name
globals()[cls_name] = TestMKLDNNPool2DUseCeilCase
create_test_mkldnn_use_ceil_class(TestPool2D_Op)
create_test_mkldnn_use_ceil_class(TestCase1)
create_test_mkldnn_use_ceil_class(TestCase2)
def create_test_mkldnn_class(parent):
class TestMKLDNNCase(parent):
def init_kernel_type(self):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from paddle.fluid.tests.unittests.op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp, stable_softmax
from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_mkldnn = True
class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
# Check if primitives already exist in backward
class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase):
def setUp(self):
super(TestSoftmaxMKLDNNPrimitivesAlreadyExist, self).setUp()
np.random.seed(123)
self.op_type = 'softmax'
self.x = np.random.uniform(-1, 1, 2).astype(np.float32)
self.out = stable_softmax(self.x)
self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
self.x_grad = self.__softmax_bwd(self.out, self.out_grad)
# Softmax grad calculation
def __softmax_bwd(self, out, out_grad):
return out * (out_grad - np.dot(out, out_grad))
def test_check(self):
check_if_mkldnn_primitives_exist_in_bwd(
self, self.op_type, self.x, self.out, self.out_grad, self.x_grad)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from paddle.fluid.tests.unittests.test_adam_op import TestAdamOp1, TestAdamOp2, TestAdamOpMultipleSteps, TestSparseAdamOp
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
from paddle.fluid.tests.unittests.test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3
if __name__ == '__main__':
unittest.main()
......@@ -26,6 +26,7 @@ class TestActivation(OpTest):
self.op_type = "exp"
self.dtype = np.float32
self.init_dtype()
self.init_kernel_type()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.exp(x)
......@@ -44,6 +45,9 @@ class TestActivation(OpTest):
def init_dtype(self):
self.dtype = np.float32
def init_kernel_type(self):
pass
class TestSigmoid(TestActivation):
def setUp(self):
......@@ -601,6 +605,25 @@ class TestSwish(TestActivation):
self.check_grad(['X'], 'Out', max_relative_error=0.008)
#------------------ Test Cudnn Activation----------------------
def create_test_act_cudnn_class(parent, atol=1e-3, grad_atol=1e-3):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestActCudnn(parent):
def init_kernel_type(self):
self.attrs = {"use_cudnn": True}
cls_name = "{0}_{1}".format(parent.__name__, "cudnn")
TestActCudnn.__name__ = cls_name
globals()[cls_name] = TestActCudnn
create_test_act_cudnn_class(TestRelu)
create_test_act_cudnn_class(TestRelu6)
create_test_act_cudnn_class(TestSigmoid)
create_test_act_cudnn_class(TestTanh)
#------------------ Test Fp16 ----------------------
def create_test_act_fp16_class(parent,
atol=1e-3,
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
class TestAllocContinuousSpace(OpTest):
def setUp(self):
self.op_type = "alloc_continuous_space"
self.dtype = np.float32
attrs = self.init_attr()
self.copy_data = attrs["copy_data"]
self.constant = attrs["constant"]
self.set_constant = attrs["set_constant"]
self.Inputs = self.init_input()
self.FusedOutput = self.init_output(self.Inputs, self.set_constant,
self.constant)
self.inputs = {'Input': self.Inputs}
self.attrs = attrs
self.outputs = {'Output': self.Inputs, 'FusedOutput': self.FusedOutput}
def init_dtype(self):
self.dtype = np.float32
def init_input(self):
inputs = []
inputs.append(("x1", np.random.random([20, 3]).astype(self.dtype)))
inputs.append(("x2", np.random.random([20]).astype(self.dtype)))
inputs.append(("x3", np.random.random([1]).astype(self.dtype)))
inputs.append(("x4", np.random.random([200, 30]).astype(self.dtype)))
inputs.append(("x5", np.random.random([30]).astype(self.dtype)))
inputs.append(("x6", np.random.random([1]).astype(self.dtype)))
return inputs
def init_attr(self):
return {"copy_data": True, "set_constant": False, "constant": 0.0}
def init_output(self, input_list, set_constant, constant):
inputs = [input[1].flatten() for input in input_list]
output = np.concatenate(inputs)
if set_constant:
output = np.ones((len(output))) * constant
return output
def test_check_output(self):
self.check_output()
class TestAllocContinuousSpace2(TestAllocContinuousSpace):
def init_attr(self):
return {"copy_data": False, "set_constant": True, "constant": 0.5}
def test_check_output(self):
self.check_output(no_check_set=["Output"])
if __name__ == '__main__':
unittest.main()
......@@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
train_reader, multi_devices=use_parallel_executor)
exe = fluid.Executor(place)
fluid.default_startup_program().random_seed = 1
fluid.default_main_program().random_seed = 1
exe.run(fluid.default_startup_program())
train_cp = compiler.CompiledProgram(fluid.default_main_program())
......
......@@ -35,7 +35,7 @@ class TestFakeQuantizeOp(OpTest):
self.check_output()
class TestFakeQuantizeOp(OpTest):
class TestFakeQuantizeRangeAbsMaxOp(OpTest):
def setUp(self):
self.op_type = "fake_quantize_range_abs_max"
self.attrs = {
......@@ -43,8 +43,10 @@ class TestFakeQuantizeOp(OpTest):
'window_size': int(1),
'is_test': False
}
x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
x = x.astype("float32")
self.inputs = {
'X': np.random.random((8, 16, 7, 7)).astype("float32"),
'X': x,
'Iter': np.zeros(1).astype("int64"),
'InScale': np.zeros(1).astype("float32")
}
......@@ -62,5 +64,36 @@ class TestFakeQuantizeOp(OpTest):
self.check_output()
class TestFakeQuantizeRangeAbsMaxOp2(OpTest):
def setUp(self):
self.op_type = "fake_quantize_range_abs_max"
self.attrs = {
'bit_length': int(8),
'window_size': int(1),
'is_test': True
}
x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
x = x.astype("float32")
scale = np.max(np.abs(x)).astype("float32") - 1.0
out_scales = np.zeros(self.attrs['window_size']).astype("float32")
out_scales[0] = scale
self.inputs = {
'X': x,
'Iter': np.zeros(1).astype("int64"),
'InScale': scale.astype("float32")
}
xs = np.clip(x, -scale, scale)
qs = np.round(xs / scale * ((1 << (self.attrs['bit_length'] - 1)) - 1))
self.outputs = {
'Out': qs,
'OutScale': scale.astype("float32"),
'OutScales': out_scales,
}
def test_check_output(self):
self.check_output(no_check_set=set(['OutScale', 'OutScales']))
if __name__ == "__main__":
unittest.main()
......@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
import numpy as np
......@@ -142,8 +144,6 @@ class TestImperativeMnist(unittest.TestCase):
sgd.minimize(avg_loss)
mnist.clear_gradients()
fluid.default_main_program().global_block()._clear_block()
dy_param_value = {}
for param in mnist.parameters():
dy_param_value[param.name] = param._numpy()
......
......@@ -243,7 +243,9 @@ class TestImperativePtbRnn(unittest.TestCase):
dy_loss = None
last_hidden = None
last_cell = None
for i in range(2):
batch_num = 50
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
......@@ -302,7 +304,7 @@ class TestImperativePtbRnn(unittest.TestCase):
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(2):
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
......
......@@ -231,7 +231,7 @@ class TestImperativeResnet(unittest.TestCase):
seed = 90
batch_size = train_parameters["batch_size"]
batch_num = 2
batch_num = 20
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
......@@ -286,8 +286,6 @@ class TestImperativeResnet(unittest.TestCase):
optimizer.minimize(avg_loss)
resnet.clear_gradients()
fluid.default_main_program().global_block()._clear_block()
dy_param_value = {}
for param in resnet.parameters():
dy_param_value[param.name] = param._numpy()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# nlp model stack of op operate on lod. It's a classical test case in optimize pass.
from __future__ import print_function
import paddle.fluid as fluid
import unittest
from ir_memory_optimize_net_base import TestIrMemOptBase
def lstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return avg_cost
class TestIrMemOptRNN(TestIrMemOptBase):
def setUp(self):
self.network = lstm_net
if __name__ == "__main__":
unittest.main()
......@@ -13,21 +13,44 @@
# limitations under the License.
import os
import sys
import unittest
from timeit import default_timer as timer
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.dataset.wmt16 as wmt16
os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
os.environ[
'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from test_parallel_executor_transformer import TestTransformer
from test_parallel_executor_transformer import transformer
from test_parallel_executor_transformer import transformer, ModelHyperParams, transformer_model, transformer, prepare_batch_input
from parallel_executor_test_base import TestParallelExecutorBase
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class TestTransformerWithIR(TestTransformer):
class TestTransformerWithIR(TestParallelExecutorBase):
@classmethod
def setUpClass(cls):
os.environ['CPU_NUM'] = str(4)
reader = paddle.batch(
wmt16.train(ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=transformer_model.batch_size)
with fluid.recordio_writer.create_recordio_writer(
os.environ.get("RECORDIO_FILENAME")) as writer:
for batch in reader():
for tensor in prepare_batch_input(
batch, ModelHyperParams.src_pad_idx,
ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head):
t = fluid.LoDTensor()
t.set(tensor, fluid.CPUPlace())
writer.append_tensor(t)
writer.complete_append_tensor()
def test_main(self):
if core.is_compiled_with_cuda():
# check python transpiler
......@@ -35,13 +58,15 @@ class TestTransformerWithIR(TestTransformer):
transformer,
use_cuda=True,
memory_opt=True,
use_ir_memory_optimize=False)
use_ir_memory_optimize=False,
iter=2)
# check IR memory optimize
self.check_network_convergence(
transformer,
use_cuda=True,
memory_opt=False,
use_ir_memory_optimize=True)
use_ir_memory_optimize=True,
iter=2)
if __name__ == '__main__':
......
......@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values):
return values[len(values) - 1]
def cosine_decay(global_step, learning_rate, step_each_epoch, epochs):
cur_epoch = math.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * 0.5 * (
math.cos(cur_epoch * math.pi / epochs) + 1)
return decayed_lr
class TestLearningRateDecay(unittest.TestCase):
def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
places = [fluid.CPUPlace()]
......@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase):
"boundaries": [3, 6, 9],
"values": [0.1, 0.2, 0.3, 0.4]
}),
(cosine_decay, layers.cosine_decay, {
"learning_rate": 0.1,
"step_each_epoch": 100,
"epochs": 120
}),
]
for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
......
......@@ -24,17 +24,28 @@ from op_test import OpTest
class TestSGDOp(OpTest):
def setUp(self):
self.op_type = "sgd"
w = np.random.random((102, 105)).astype("float32")
g = np.random.random((102, 105)).astype("float32")
self.conf()
w = np.random.random((self.h, self.w)).astype("float32")
g = np.random.random((self.h, self.w)).astype("float32")
lr = np.array([0.1]).astype("float32")
self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
self.outputs = {'ParamOut': w - lr * g}
def conf(self):
self.h = 102
self.w = 105
def test_check_output(self):
self.check_output()
class TestSGDOpCase8X(TestSGDOp):
def conf(self):
self.h = 10
self.w = 64
class TestSparseSGDOp(unittest.TestCase):
def check_with_place(self, place):
scope = core.Scope()
......@@ -42,12 +53,12 @@ class TestSparseSGDOp(unittest.TestCase):
# create and initialize Grad Variable
height = 10
rows = [0, 4, 7]
row_numel = 12
self.conf()
grad_selected_rows = scope.var('Grad').get_selected_rows()
grad_selected_rows.set_height(height)
grad_selected_rows.set_rows(rows)
np_array = np.ones((len(rows), row_numel)).astype("float32")
np_array = np.ones((len(rows), self.row_numel)).astype("float32")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
......@@ -56,7 +67,7 @@ class TestSparseSGDOp(unittest.TestCase):
# create and initialize Param Variable
param = scope.var('Param').get_tensor()
param_array = np.full((height, row_numel), 5.0).astype("float32")
param_array = np.full((height, self.row_numel), 5.0).astype("float32")
param.set(param_array, place)
# create and initialize LeraningRate Variable
......@@ -98,6 +109,14 @@ class TestSparseSGDOp(unittest.TestCase):
for place in places:
self.check_with_place(place)
def conf(self):
self.row_numel = 12
class TestSparseSGDOpCase8X(TestSparseSGDOp):
def conf(self):
self.row_numel = 16
class TestSGDOpOptimizeSelectedRows(unittest.TestCase):
def check_with_place(self, place):
......
......@@ -144,15 +144,5 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp):
return [2, 3, 4, 5]
class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_mkldnn = True
class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
def get_x_shape(self):
return [2, 3, 4, 5]
if __name__ == "__main__":
unittest.main()
......@@ -15,6 +15,7 @@
from __future__ import print_function
import os
import sys
import numpy as np
from .. import core
from ..framework import Program
......@@ -50,6 +51,9 @@ class InferenceTranspiler(object):
place (Place): inference place
scope (Scope|None): inference Scope
'''
sys.stderr.write("InferenceTranspiler is deprecated since it's not "
"safe. Users should be "
"responsible for constructing the inference program\n")
if not isinstance(program, Program):
raise TypeError("program should be as Program type")
if not isinstance(place, core.CPUPlace) and not isinstance(
......
......@@ -15,6 +15,7 @@
from __future__ import print_function
import six
import sys
from collections import defaultdict, MutableSet
from .. import core
from ... import compat as cpt
......@@ -509,6 +510,8 @@ def memory_optimize(input_program,
Returns:
None
"""
sys.stderr.write('memory_optimize is deprecated. '
'Use CompiledProgram and Executor\n')
def to_name_str(var):
if isinstance(var, Variable):
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import os
import six
class PlotData(object):
......@@ -60,9 +61,9 @@ class Ploter(object):
def append(self, title, step, value):
"""
Feed data
Args:
Feed data
Args:
title: assign the group data to this subtitle.
step: the x_axis of data.
value: the y_axis of data.
......@@ -71,9 +72,9 @@ class Ploter(object):
.. code-block:: python
plot_curve = Ploter("Curve 1","Curve 2")
plot_curve.append(title="Curve 1",step=1,value=1)
"""
assert isinstance(title, basestring)
assert self.__plot_data__.has_key(title)
"""
assert isinstance(title, six.string_types)
assert title in self.__plot_data__
data = self.__plot_data__[title]
assert isinstance(data, PlotData)
data.append(step, value)
......@@ -89,7 +90,7 @@ class Ploter(object):
.. code-block:: python
plot_curve = Ploter()
plot_cure.plot()
"""
"""
if self.__plot_is_disabled__():
return
......
......@@ -122,7 +122,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
def create_dataset_from_list(self, path):
data = []
label_set = []
for line in open(file_list):
for line in open(path):
items = line.rstrip.split()
image_path = items[0]
label_name = items[1]
......@@ -141,7 +141,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
path: the path of the image dataset.
"""
if self.from_list:
return create_dataset_from_list(path)
return self.create_dataset_from_list(path)
label_set = preprocess_util.get_label_set_from_dir(path)
data = []
for l_name in list(label_set.keys()):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import ast
import hashlib
import importlib
import paddle.fluid
files = [
"paddle.fluid", "paddle.fluid.average", "paddle.fluid.backward",
"paddle.fluid.clip", "paddle.fluid.data_feeder", "paddle.fluid.executor",
"paddle.fluid.initializer", "paddle.fluid.io", "paddle.fluid.layers",
"paddle.fluid.metrics", "paddle.fluid.nets", "paddle.fluid.optimizer",
"paddle.fluid.profiler", "paddle.fluid.recordio_writer",
"paddle.fluid.regularizer", "paddle.fluid.transpiler"
]
def md5(doc):
hash = hashlib.md5()
hash.update(str(doc))
return hash.hexdigest()
def get_module():
for fi in files:
fi_lib = importlib.import_module(fi)
doc_function = getattr(fi_lib, "__all__")
for api in doc_function:
api_name = fi + "." + api
try:
doc_module = getattr(eval(api_name), "__doc__")
except:
pass
doc_md5_code = md5(doc_module)
doc_dict[api_name] = doc_md5_code
def doc_md5_dict(doc_md5_path):
with open(doc_md5_path, "rb") as f:
doc_md5 = f.read()
doc_md5_dict = ast.literal_eval(doc_md5)
return doc_md5_dict
def check_doc_md5():
for k, v in doc_dict.items():
try:
if doc_ci_dict[k] != v:
return doc_dict
except:
return doc_dict
return True
if __name__ == "__main__":
doc_dict = {}
doc_ci_dict = {}
doc_md5_file = "/root/.cache/doc_md5.txt"
if not os.path.exists(doc_md5_file):
os.mknod(doc_md5_file)
else:
doc_ci_dict = doc_md5_dict(doc_md5_file)
get_module()
if not os.path.getsize(doc_md5_file):
with open(doc_md5_file, 'w') as f:
f.write(str(doc_dict))
check_dic = True
print(check_dic)
else:
check_dic = check_doc_md5()
print(check_dic)
#!/bin/bash
TOTAL_ERRORS=0
if [[ ! $TRAVIS_BRANCH ]]; then
# install cpplint on local machine.
if [[ ! $(which cpplint) ]]; then
pip install cpplint
fi
# diff files on local machine.
files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}')
else
# diff files between PR and latest commit on Travis CI.
branch_ref=$(git rev-parse "$TRAVIS_BRANCH")
head_ref=$(git rev-parse HEAD)
files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}')
fi
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
if [[ $file =~ ^(paddle/legacy/api/.*|paddle/legacy/capi/.*|paddle/contrib/.*|paddle/legacy/cuda/.*|paddle/legacy/function/.*|paddle/legacy/gserver/.*|paddle/legacy/math/.*|paddle/legacy/optimizer/.*|paddle/legacy/parameter/.*|paddle/legacy/pserver/.*|paddle/legacy/trainer/.*|paddle/legacy/utils/.*|paddle/testing/TestUtil.*|patches/grpc/.*) ]]; then
for file in $files; do
if [[ $file =~ ^(patches/grpc/.*) ]]; then
continue;
else
cpplint --filter=-readability/fn_size $file;
......@@ -13,4 +25,3 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
done
exit $TOTAL_ERRORS
......@@ -26,4 +26,10 @@ for each_diff in result:
print(each_diff)
if error:
print(
'''If you modify/add/delete the API files, including code and comment, please follow these steps in order to pass the CI:
1. cd ${paddle_path}, compile paddle;
2. pip install build/python/dist/(build whl package);
3. run "python tools/print_signatures.py paddle.fluid, paddle.reader > paddle/fluid/API.spec"'''
)
sys.exit(1)
......@@ -24,12 +24,19 @@ import inspect
import collections
import sys
import pydoc
import hashlib
member_dict = collections.OrderedDict()
experimental_namespace = {"paddle.fluid.imperative"}
def md5(doc):
hash = hashlib.md5()
hash.update(str(doc).encode('utf-8'))
return hash.hexdigest()
def visit_member(parent_name, member):
cur_name = ".".join([parent_name, member.__name__])
if inspect.isclass(member):
......@@ -39,7 +46,10 @@ def visit_member(parent_name, member):
visit_member(cur_name, value)
elif callable(member):
try:
member_dict[cur_name] = inspect.getargspec(member)
doc = ('document', md5(member.__doc__))
args = inspect.getargspec(member)
all = (args, doc)
member_dict[cur_name] = all
except TypeError: # special for PyBind method
member_dict[cur_name] = " ".join([
line.strip() for line in pydoc.render_doc(member).split('\n')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册