diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e718b32cb6c48d11e73600509a17db107f438708..d8112837dc9627bc2e501940b8e97c89e97c45ff 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,12 +42,6 @@ repos:
         entry: bash ./tools/codestyle/pylint_pre_commit.hook
         language: system
         files: \.(py)$
--   repo: https://github.com/PaddlePaddle/pre-commit-golang
-    sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
-    hooks:
-    -   id: go-fmt
-        types:
-        - go
 -   repo: local
     hooks:
     -   id: copyright_checker
diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index 54826cedb871690a82b535ae3ed102600277c622..ae2679db4aed7a77ad407f881c4482fd3914ac27 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -39,8 +39,10 @@ IF(WIN32)
     SET(MKLML_IOMP_LIB            ${MKLML_LIB_DIR}/libiomp5md.lib)
     SET(MKLML_SHARED_LIB          ${MKLML_LIB_DIR}/mklml.dll)
     SET(MKLML_SHARED_IOMP_LIB     ${MKLML_LIB_DIR}/libiomp5md.dll)
-ELSE()  
-    SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
+ELSE()
+    #TODO(intel-huying):
+    #  Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
+    SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
     SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
     SET(MKLML_LIB                 ${MKLML_LIB_DIR}/libmklml_intel.so)
     SET(MKLML_IOMP_LIB            ${MKLML_LIB_DIR}/libiomp5.so)
diff --git a/cmake/external/ngraph.cmake b/cmake/external/ngraph.cmake
index 5812a61f0ddc3a3233ff212710fc1b16aa140724..e7fb69dbbc872c813b2eba16a5b1098eebfeedd8 100644
--- a/cmake/external/ngraph.cmake
+++ b/cmake/external/ngraph.cmake
@@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs)
 INCLUDE(ExternalProject)
 
 SET(NGRAPH_PROJECT         "extern_ngraph")
-SET(NGRAPH_GIT_TAG         "20bd8bbc79ae3a81c57313846a2be7313e5d1dab")
+SET(NGRAPH_GIT_TAG         "a444f7a959b7d87f2c117c9b57a4c387759e481e")
 SET(NGRAPH_SOURCES_DIR     ${THIRD_PARTY_PATH}/ngraph)
 SET(NGRAPH_INSTALL_DIR     ${THIRD_PARTY_PATH}/install/ngraph)
 SET(NGRAPH_INC_DIR         ${NGRAPH_INSTALL_DIR}/include)
@@ -69,7 +69,7 @@ ExternalProject_Add(
     CMAKE_ARGS          -DNGRAPH_DEX_ONLY=TRUE
     CMAKE_ARGS          -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
     CMAKE_ARGS          -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
-    CMAKE_ARGS          -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
+    CMAKE_ARGS          -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}
     CMAKE_ARGS          -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
 )
 
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
index c2d04828564e69d7ac965881057f185194aa0475..11a5b1b4554e7899c3ee7092a9295234743750d7 100644
--- a/cmake/operators.cmake
+++ b/cmake/operators.cmake
@@ -153,7 +153,11 @@ function(op_library TARGET)
     # pybind USE_OP_DEVICE_KERNEL for CUDNN
     list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
     if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
+      if(${TARGET} STREQUAL "activation")
+        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
+      else()
         file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
+      endif()
     endif()
 
     # pybind USE_OP_DEVICE_KERNEL for MIOPEN
@@ -168,6 +172,9 @@ function(op_library TARGET)
         file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
       elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
         file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
+        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
+        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
+        
       else()
         file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
       endif()
diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 1c3192b0193b8ba2452e959c983b23df458a652a..0b5e83efef6efc60f9f0476747aa107994c64051 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -1,474 +1,475 @@
-paddle.fluid.Program.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Program.block ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Program.clone ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Program.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.program_guard ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.name_scope ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
-paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.scope_guard ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
-paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
-paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
-paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.Program.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.Program.block (ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None), ('document', 'af5346376065ff4cf6832a8ac0ae0945'))
+paddle.fluid.Program.clone (ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'ebb7765b2962bd2be041d19720e49d0f'))
+paddle.fluid.Program.current_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5e162d3bf8dd625703463d9e4be36adb'))
+paddle.fluid.Program.global_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'cfb7e05a002b2e64650778cabde7301c'))
+paddle.fluid.Program.list_vars (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1c8647b14fe57c7824b1c9562394dd3c'))
+paddle.fluid.Program.parse_from_string (ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None), ('document', 'b6a7ffb239a30bf2ce58cfaca8d8b8d5'))
+paddle.fluid.Program.to_string (ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)), ('document', 'faec17e5a04af28e3776160e34504d15'))
+paddle.fluid.default_startup_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '99e5d53d92d82797093332719c9e3ccd'))
+paddle.fluid.default_main_program (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '5430f54ab4895f9f47db6bebbaf71659'))
+paddle.fluid.program_guard (ArgSpec(args=['main_program', 'startup_program'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b54f403e57825a1592aece03afe3afb6'))
+paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ef753f5cec69fef9ae6ad8b867b33a2'))
+paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '78e512cabeda9c7f42cb7c7e88967ae7'))
+paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'aba8093edebf2d5c869b735b92811e45'))
+paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0'))
+paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2'))
+paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
+paddle.fluid.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
+paddle.fluid.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c'))
+paddle.fluid.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff'))
+paddle.fluid.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4'))
+paddle.fluid.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4'))
+paddle.fluid.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
 paddle.fluid.DistributeTranspilerConfig.__init__ 
-paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
-paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
-paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, ''))
-paddle.fluid.AsyncExecutor.config_distributed_nodes ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.download_data ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12))
-paddle.fluid.AsyncExecutor.get_instance ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.init_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.init_server ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.init_worker ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False))
-paddle.fluid.AsyncExecutor.save_model ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.AsyncExecutor.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.CompiledProgram.__init__ ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.CompiledProgram.with_data_parallel ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.CompiledProgram.with_inference_optimize ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None)
+paddle.fluid.ParallelExecutor.__init__ (ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '2cb4bd74481861345c70228a0f57620c'))
+paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', '8e7bb21e83ff4604f5b379672e285b94'))
+paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '368f638b99f1dfe59e9b02aa6f077752'))
+paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4294493e31c4bc9fc4bd48753044235f'))
+paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21'))
+paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766'))
+paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690'))
+paddle.fluid.AsyncExecutor.__init__ (ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.AsyncExecutor.config_distributed_nodes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4810dbe1870452f16b3c60b6c5fd1459'))
+paddle.fluid.AsyncExecutor.download_data (ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)), ('document', '799a2066cc26819f1ed31f47c15ad083'))
+paddle.fluid.AsyncExecutor.get_instance (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f8688f76a2db1243c7097a60c507b182'))
+paddle.fluid.AsyncExecutor.init_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '504f39be2007404a17e5cabea1256c7d'))
+paddle.fluid.AsyncExecutor.init_server (ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None), ('document', 'c403ab46c5d3ef25c0f7e94ae75dcb68'))
+paddle.fluid.AsyncExecutor.init_worker (ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None), ('document', 'dcf08f4bf2f3282acf11391f5d39c536'))
+paddle.fluid.AsyncExecutor.run (ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)), ('document', '848fc53484e8326f6325feea87fe955c'))
+paddle.fluid.AsyncExecutor.save_model (ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None), ('document', 'c8ac0dfcb3b187aba25d03af7fea56b2'))
+paddle.fluid.AsyncExecutor.stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5f23d043607bb5d55e466ec3f578e093'))
+paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'e1af7fd53cf868554f312779fc803864'))
+paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8'))
 paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
 paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
 paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
 paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
-paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True))
-paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
-paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0))
-paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
-paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
-paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0))
-paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
-paddle.fluid.initializer.force_init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.initializer.init_on_cpu ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.initializer.NumpyArrayInitializer.__init__ ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.fc ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None))
-paddle.fluid.layers.embedding ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32'))
-paddle.fluid.layers.dynamic_lstm ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None))
-paddle.fluid.layers.dynamic_lstmp ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None))
-paddle.fluid.layers.dynamic_gru ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False))
-paddle.fluid.layers.gru_unit ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False))
-paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100))
-paddle.fluid.layers.bpr_loss ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None))
-paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
-paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None))
-paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None))
-paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
-paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
-paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
-paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
-paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
-paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
-paddle.fluid.layers.data_norm ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'use_mkldnn', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, False, None, None, None, False))
-paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
-paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
-paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
-paddle.fluid.layers.sequence_expand_as ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.sequence_unpad ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
-paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
-paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
-paddle.fluid.layers.reduce_max ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
-paddle.fluid.layers.reduce_min ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
-paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
-paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer'))
-paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
-paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
-paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
-paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
-paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False))
-paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.transpose ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.im2sequence ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None))
-paddle.fluid.layers.nce ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False))
-paddle.fluid.layers.sampled_softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0))
-paddle.fluid.layers.hsigmoid ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False))
-paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False))
-paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
-paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None))
-paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
-paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
-paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None))
-paddle.fluid.layers.squeeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.lrn ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None))
-paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
-paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
-paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None))
-paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
-paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None))
-paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
-paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1))
-paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
-paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1))
-paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True))
-paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.random_crop ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.mean_iou ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.selu ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.margin_rank_loss ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None))
-paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
-paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
-paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
-paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None))
-paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None))
-paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
-paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None))
-paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None))
-paddle.fluid.layers.soft_relu ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None))
-paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
-paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
-paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
-paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None))
-paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
-paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
-paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
-paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None))
-paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
-paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
-paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
-paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32'))
-paddle.fluid.layers.sum ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
-paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False))
-paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
-paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
-paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
-paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1))
-paddle.fluid.layers.shuffle_channel ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.py_func ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.psroi_pool ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.teacher_student_sigmoid_loss ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0))
-paddle.fluid.layers.huber_loss ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.tree_conv ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None))
-paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
-paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
-paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.shuffle ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,))
-paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
-paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
-paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.Preprocessor.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.Preprocessor.outputs ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
-paddle.fluid.layers.load ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.create_tensor ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False))
-paddle.fluid.layers.create_parameter ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None))
-paddle.fluid.layers.create_global_var ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None))
-paddle.fluid.layers.cast ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.tensor_array_to_tensor ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
-paddle.fluid.layers.concat ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None))
-paddle.fluid.layers.sums ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.assign ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.fill_constant_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0))
-paddle.fluid.layers.fill_constant ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None))
-paddle.fluid.layers.argmin ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
-paddle.fluid.layers.argmax ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
-paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None))
-paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None))
-paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
-paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
-paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
-paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.DynamicRNN.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.DynamicRNN.memory ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32'))
-paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
-paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
-paddle.fluid.layers.StaticRNN.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
-paddle.fluid.layers.StaticRNN.step ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.StaticRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.StaticRNN.step_output ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.StaticRNN.update_memory ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.reorder_lod_tensor_by_rank ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both'))
-paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
-paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.floor ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.cos ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.sin ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.round ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0))
-paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False))
-paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None))
-paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False))
-paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.layers.detection_output ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0))
-paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None))
-paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
-paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True))
-paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
-paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,))
-paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
-paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
-paddle.fluid.layers.generate_mask_labels ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0))
-paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None))
-paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
-paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
-paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False))
-paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32'))
-paddle.fluid.contrib.StateCell.__init__ ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.StateCell.compute_state ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.StateCell.get_input ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.StateCell.get_state ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.StateCell.out_state ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.StateCell.set_state ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.StateCell.state_updater ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.StateCell.update_states ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.TrainingDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.TrainingDecoder.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None)
-paddle.fluid.contrib.TrainingDecoder.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.TrainingDecoder.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.BeamSearchDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None))
-paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False))
-paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000))
-paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
-paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.contrib.Calibrator.__init__ ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.contrib.Calibrator.sample_data ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.Calibrator.save_int8_model ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.reader.ctr_reader.ctr_reader ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
-paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None))
-paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.CompressPass.apply ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.ImitationGraph.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.ImitationGraph.all_parameters ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.SensitivePruneStrategy.__init__ ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None))
-paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.MagnitudePruner.__init__ ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.MagnitudePruner.prune ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.RatioPruner.__init__ ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.RatioPruner.prune ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False))
-paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True))
-paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,))
-paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5))
-paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,))
-paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True))
-paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
-paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
-paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False))
-paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.HashName.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
+paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b55d6193a1d4198d45b013fc5779e1f2'))
+paddle.fluid.io.save_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '3a7a99abac3e1bf898871fe609354218'))
+paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9141bb5f32caf7975eb3fd88c8a1b2da'))
+paddle.fluid.io.load_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '0a5308f496632ab1ec3ba1f1377e6f95'))
+paddle.fluid.io.load_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '41779819cef32f2246e83aebc5a002e2'))
+paddle.fluid.io.load_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '28df5bfe26ca7a077f91156abb0fe6d2'))
+paddle.fluid.io.save_inference_model (ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True)), ('document', '582d87b8df75a5a639a107db8ff86f9c'))
+paddle.fluid.io.load_inference_model (ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '7a5255386075dac3c75b7058254fcdcb'))
+paddle.fluid.initializer.ConstantInitializer.__init__ (ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.initializer.UniformInitializer.__init__ (ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.initializer.NormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.initializer.TruncatedNormalInitializer.__init__ (ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.initializer.XavierInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.initializer.BilinearInitializer.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'd389912dc079cbef432335a00017cec0'))
+paddle.fluid.initializer.MSRAInitializer.__init__ (ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)), ('document', '53c757bed9345f2ad3361902531e7cf5'))
+paddle.fluid.initializer.force_init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '6d0f3e22c90d9d500d36ff57daf056ee'))
+paddle.fluid.initializer.init_on_cpu (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'a6d7011ca3d8c0d454dac3a56eae0c29'))
+paddle.fluid.initializer.NumpyArrayInitializer.__init__ (ArgSpec(args=['self', 'value'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.fc (ArgSpec(args=['input', 'size', 'num_flatten_dims', 'param_attr', 'bias_attr', 'act', 'is_test', 'name'], varargs=None, keywords=None, defaults=(1, None, None, None, False, None)), ('document', '1929058262994f212620599c63aea6bd'))
+paddle.fluid.layers.embedding (ArgSpec(args=['input', 'size', 'is_sparse', 'is_distributed', 'padding_idx', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, False, None, None, 'float32')), ('document', '89c2c55a0b0656b106064048e068e77a'))
+paddle.fluid.layers.dynamic_lstm (ArgSpec(args=['input', 'size', 'h_0', 'c_0', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'float32', None)), ('document', 'dfbb624f85015df29e994ca6999e8ff6'))
+paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', 'param_attr', 'bias_attr', 'use_peepholes', 'is_reverse', 'gate_activation', 'cell_activation', 'candidate_activation', 'proj_activation', 'dtype', 'name', 'h_0', 'c_0', 'cell_clip', 'proj_clip'], varargs=None, keywords=None, defaults=(None, None, True, False, 'sigmoid', 'tanh', 'tanh', 'tanh', 'float32', None, None, None, None, None)), ('document', 'b4b608b986eb9617aa0525e1be21d32d'))
+paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '4ec4845fd7d991bcac822f8b0dfc101f'))
+paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', 'e0e2439f7af069b57badca18a6ba60b8'))
+paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr'], varargs=None, keywords=None, defaults=(None,)), ('document', '7c49ef4bbf0adfd4b9a1d98e2e5f3fea'))
+paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)), ('document', '7642373ab65d3fc3b96d16d10fef1538'))
+paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', 'd740824aa7316b807c4b4a3c6c8c0bbe'))
+paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', '025b364dafb4b7975c801eb33e7831a1'))
+paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '30add751a0f99347a6257634c03ff254'))
+paddle.fluid.layers.square_error_cost (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', '44b6eef4a0f2bc15f7d9745782406736'))
+paddle.fluid.layers.chunk_eval (ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ee152a7ba3036e7b9ede9184545179b4'))
+paddle.fluid.layers.sequence_conv (ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)), ('document', 'b6543768e1afaa2ecb869709d6e9c7e2'))
+paddle.fluid.layers.conv2d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '8ca6121acd6d23cd8806a93f493c2e17'))
+paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)), ('document', '37042620f9bd3a2da6e5d3138b2f724b'))
+paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)), ('document', 'a194fb80614023f543df3949fbd0d0b8'))
+paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '19ef6f9cdd27feac8a1ae060f19c10b4'))
+paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'f19dd380864e61134ce3814e4be0de4b'))
+paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'bbd84e855e660cd1084bb71a2fd0cdaa'))
+paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '043de7333b79ee0ac55053c14ed81625'))
+paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '859b887174d06f361658f69cb7c06d95'))
+paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '120f4323a3d7ed9c0916f15a59f0e497'))
+paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', 'c527b71b8a4c60dca8df8a745c2b598d'))
+paddle.fluid.layers.data_norm (ArgSpec(args=['input', 'act', 'epsilon', 'param_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var'], varargs=None, keywords=None, defaults=(None, 1e-05, None, 'NCHW', False, None, None, None, False)), ('document', 'e45e09e65a2658e07cad987222f0d9ab'))
+paddle.fluid.layers.beam_search_decode (ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b0b8d53821716cd50c42e09b593f3feb'))
+paddle.fluid.layers.conv2d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', '03993955ab1e6d3044c44e6f17fc85e9'))
+paddle.fluid.layers.conv3d_transpose (ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)), ('document', 'ec113c6a3686ac94f8fccd1a7953d445'))
+paddle.fluid.layers.sequence_expand (ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '79c375214fa427faac504043d162dae9'))
+paddle.fluid.layers.sequence_expand_as (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d2611f84ab364c5da545e6a82f1770a'))
+paddle.fluid.layers.sequence_pad (ArgSpec(args=['x', 'pad_value', 'maxlen', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6a1adf3067b20f6e4bcb354d71c19184'))
+paddle.fluid.layers.sequence_unpad (ArgSpec(args=['x', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd12803c903c99aa36ec03aaac5f0cc5b'))
+paddle.fluid.layers.lstm_unit (ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None)), ('document', '027723966f3ef0d7bc598f22287a96cc'))
+paddle.fluid.layers.reduce_sum (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'b69998ce3ff4980fb21da0df05565f1b'))
+paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd4d80dd98a1a5839f41eeb3a0f85f370'))
+paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4'))
+paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c'))
+paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca'))
+paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3'))
+paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce'))
+paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6'))
+paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer')), ('document', 'dc7042734c6d8b8ce97321f017f01d6f'))
+paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6'))
+paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2'))
+paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571'))
+paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990'))
+paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9'))
+paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32'))
+paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab'))
+paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'a10ab9bf88d4a7e328882d411abb6fd1'))
+paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1feac48b843d679db82312dc85885f4'))
+paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', '3ce01160ede80b1c26f776f8fef9340f'))
+paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', 'fddad4896dee5193e1cdf70882c2a347'))
+paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', '5db30b8a74e8c93687943a3e8d221da0'))
+paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', '80641ee6810b1cdc3fd6e14fc89ecc9d'))
+paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', 'b350b9a30a18e7efd7e1bb740eef6996'))
+paddle.fluid.layers.row_conv (ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)), ('document', '17485788fffe4e2d36dc58c2ac8d174e'))
+paddle.fluid.layers.multiplex (ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None), ('document', '013795af319e2e86d3506741941078ee'))
+paddle.fluid.layers.layer_norm (ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)), ('document', 'de6a906950bae9f3c245cb744d22b94e'))
+paddle.fluid.layers.group_norm (ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)), ('document', '419c3a24a83cc89219a029cf4092788b'))
+paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, True, False)), ('document', 'bce1b75e3d95b75cacd1099655cbb3c3'))
+paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c6b175d253c55baf4b9c0eca9b1dda88'))
+paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None), ('document', '6148b6a555cbfb62fdcd030d8982c18c'))
+paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '3f6c828594720c9b2da89c464be94478'))
+paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', '323c019f257e55ddea4a824a362de62f'))
+paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3229d06517f794e86ca3da14c38b1465'))
+paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'bbd62da391b1df984a1909d069a759b2'))
+paddle.fluid.layers.lod_reset (ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f122194c562bd674f6ecdccf33785f99'))
+paddle.fluid.layers.lrn (ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None)), ('document', '0795e9940e42dcd62953514ff7e09f77'))
+paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '2f28153bdd2d5ea6f7bad5867bd03eeb'))
+paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', 'd2e1f45fef51b2c214e3f2aa8976c46c'))
+paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '70c113658102a11cc5d8e3d45145737a'))
+paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97'))
+paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1'))
+paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d'))
+paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', 'b3ecb819454832885c1f0f3ab9a5b938'))
+paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7'))
+paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'e4fb4ed511b2293b8f04f7e872afbfd7'))
+paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '735fa9758a6d7ff3b47d7b827f961c1d'))
+paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2'))
+paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342'))
+paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b'))
+paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c9ab9e460ef0a1823249935a30e82c66'))
+paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', '35cbbdfa585d027bb490707c95a176b9'))
+paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '866ffa1cc93f29e23662b526a7596537'))
+paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '9044c7fe667b76cb2d9264f2db11f417'))
+paddle.fluid.layers.log (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '98247c59d1c9b40af6730001b2aea73d'))
+paddle.fluid.layers.crop (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '883104791204d3127e24234bb630b2e7'))
+paddle.fluid.layers.rank_loss (ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c542e39ac6add24a6bef6e79bf5617e2'))
+paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '6d19dcc19917080b7ff3e03bde451bc8'))
+paddle.fluid.layers.elu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '463258ee9f8b60760eb1e26357cc9bfa'))
+paddle.fluid.layers.relu6 (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)), ('document', '6f367339caf6c7124bc262fe1475df70'))
+paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'a5117c1eb84aca2ac0b0abab337a4799'))
+paddle.fluid.layers.stanh (ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)), ('document', '959936a477efc6c1447a9c8bf8ce94bb'))
+paddle.fluid.layers.hard_sigmoid (ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)), ('document', 'c82059b6fea1aa730f9aac911807b756'))
+paddle.fluid.layers.swish (ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'ef745e55a48763ee7b46b21a81dc7e84'))
+paddle.fluid.layers.prelu (ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'f6acef7ff7d887e49ff499fbb1dad4a9'))
+paddle.fluid.layers.brelu (ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)), ('document', '3db337c195e156e6ef2b8b4a57113600'))
+paddle.fluid.layers.leaky_relu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)), ('document', 'f878486c82b576938151daad0de995a0'))
+paddle.fluid.layers.soft_relu (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)), ('document', '869adce548c342d6cc1bd88a948d83c9'))
+paddle.fluid.layers.flatten (ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'cb295c13cb957db85cd9609269d7784d'))
+paddle.fluid.layers.sequence_mask (ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)), ('document', 'f0dd6eddd3bff015a3c05269d82fcbd8'))
+paddle.fluid.layers.stack (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '367cfbb642839beacb5d117e2d2b4041'))
+paddle.fluid.layers.pad2d (ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)), ('document', '7f4d46320cc077ca2e8db600c35f4030'))
+paddle.fluid.layers.unstack (ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)), ('document', '98eb9d633116efcfc6f90c114bd44fd6'))
+paddle.fluid.layers.sequence_enumerate (ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'f6028537085dc296103bbbd85fa7763d'))
+paddle.fluid.layers.expand (ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '117d3607d1ffa0571835bbaebc7857ff'))
+paddle.fluid.layers.sequence_concat (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3a1d155dd1bf6e72a0a3e3e1519591d1'))
+paddle.fluid.layers.scale (ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)), ('document', '30190413b2fa442e7466d6cf2ce5ea07'))
+paddle.fluid.layers.elementwise_add (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '6bfbe72cbadc95ac7ab88c05ed5bf9f0'))
+paddle.fluid.layers.elementwise_div (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'cc6e6cc1cb942a152dde3ef08d5f165c'))
+paddle.fluid.layers.elementwise_sub (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'a12abdab09c3e57af5a6e1e9f138684a'))
+paddle.fluid.layers.elementwise_mul (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '422c77dbfcff355a57b5fdd4ec876daa'))
+paddle.fluid.layers.elementwise_max (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', 'f0bb0b2c454541cfafa761021a5cc776'))
+paddle.fluid.layers.elementwise_min (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '8a9cdefefbccbf9f6b0991c0946a21e9'))
+paddle.fluid.layers.elementwise_pow (ArgSpec(args=['x', 'y', 'axis', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, None, None)), ('document', '1aea4e197c552a284f83888a3c67a32e'))
+paddle.fluid.layers.uniform_random_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)), ('document', '129e0a3257f1d532a948eedf9d5bf671'))
+paddle.fluid.layers.gaussian_random (ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '389dafe36e099841b6a7fb18d11f1b4c'))
+paddle.fluid.layers.sampling_id (ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', '840fdac643d1341c1cae218d4511dbb9'))
+paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '840026b4766613c5705e06563cd103b6'))
+paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860'))
+paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a'))
+paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8'))
+paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42'))
+paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012'))
+paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25'))
+paddle.fluid.layers.logical_not (ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cd1c8cf31e040427d4e05711044caeb6'))
+paddle.fluid.layers.clip (ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b020b7aab59719be98a4ae229a76deba'))
+paddle.fluid.layers.clip_by_norm (ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a1ea0bc5a926f427458c4254ca022749'))
+paddle.fluid.layers.mean (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd638d915195ce86a8d7963b81110d4c8'))
+paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)), ('document', 'ccd37fa6b53f074adbfb732d738c4c2d'))
+paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '180c284317ea45ef89a460d8d79c0b72'))
+paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '891870d069a6aea746d34cc53b61690c'))
+paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e'))
+paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed'))
+paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f'))
+paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7'))
+paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35'))
+paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007'))
+paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d'))
+paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', '4b5a2341023afe63157a066c14254f98'))
+paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '4b9c2e8af5817937d831820874b5aa77'))
+paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'aa7540a0fa73ff69a02e11b4091aab75'))
+paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'dc63315b84f591ac79ecca0c3632027a'))
+paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7ffc849e71f31dfe29030ff94e662de6'))
+paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', 'd5e6c494ac35100e2ed4d4bd9a1ed932'))
+paddle.fluid.layers.shuffle_channel (ArgSpec(args=['x', 'group', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2fa6782d43d02ae64482d21235a82949'))
+paddle.fluid.layers.py_func (ArgSpec(args=['func', 'x', 'out', 'backward_func', 'skip_vars_in_backward_input'], varargs=None, keywords=None, defaults=(None, None)), ('document', '8404e472ac12b4a30a505d3d3a3e5fdb'))
+paddle.fluid.layers.psroi_pool (ArgSpec(args=['input', 'rois', 'output_channels', 'spatial_scale', 'pooled_height', 'pooled_width', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1546136806fef5c08f6918544bd9151d'))
+paddle.fluid.layers.teacher_student_sigmoid_loss (ArgSpec(args=['input', 'label', 'soft_max_up_bound', 'soft_max_lower_bound'], varargs=None, keywords=None, defaults=(15.0, -15.0)), ('document', '2f6ff96864054a31aa4bb659c6722c99'))
+paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], varargs=None, keywords=None, defaults=None), ('document', '431a4301c35032166ec029f7432c80a7'))
+paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
+paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
+paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc'))
+paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', 'b0a1c2fc51c27a106da28f3308c41f5e'))
+paddle.fluid.layers.shuffle (ArgSpec(args=['reader', 'buffer_size'], varargs=None, keywords=None, defaults=None), ('document', 'f967a73426db26f970bc70bfb03cffca'))
+paddle.fluid.layers.batch (ArgSpec(args=['reader', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', 'f563d376d35e1a4c4db100fd11b381a0'))
+paddle.fluid.layers.double_buffer (ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07e5b796674796eb1ef3fee9c10d24e3'))
+paddle.fluid.layers.random_data_generator (ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,)), ('document', '9b7f0f86ec24bbc97643cadcb6499cff'))
+paddle.fluid.layers.py_reader (ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True)), ('document', '13dabc57863f62ab3141586784ee356b'))
+paddle.fluid.layers.create_py_reader_by_data (ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True)), ('document', '350f74d93fab9adb2ac4950f1c26416b'))
+paddle.fluid.layers.Preprocessor.__init__ (ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.Preprocessor.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.Preprocessor.inputs (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.Preprocessor.outputs (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.load (ArgSpec(args=['out', 'file_path', 'load_as_fp16'], varargs=None, keywords=None, defaults=(None,)), ('document', '9d1a4bc97bbce9fa1d4f7a4200a771ff'))
+paddle.fluid.layers.create_tensor (ArgSpec(args=['dtype', 'name', 'persistable'], varargs=None, keywords=None, defaults=(None, False)), ('document', 'c0c3d0194f83fff8ea99ce0820657dae'))
+paddle.fluid.layers.create_parameter (ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'd62b866c899bc1fedb5385f95b88e1f8'))
+paddle.fluid.layers.create_global_var (ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)), ('document', 'ab914fac893607e29ac6e52bbdbea1a4'))
+paddle.fluid.layers.cast (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '60cb8f843d625abf33f8bf12455b8f99'))
+paddle.fluid.layers.tensor_array_to_tensor (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'b12717d3d4567e6119589f7f655b0cbb'))
+paddle.fluid.layers.concat (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b19b79be4f05e85d1d6cec642c9fb535'))
+paddle.fluid.layers.sums (ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)), ('document', '42912092418620b4be07f36af31e7816'))
+paddle.fluid.layers.assign (ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b690184f3537df5501e4d9d8f31152a5'))
+paddle.fluid.layers.fill_constant_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'value', 'input_dim_idx', 'output_dim_idx'], varargs=None, keywords=None, defaults=(0, 0)), ('document', 'd4059a2f5763036b07018d76429f9acb'))
+paddle.fluid.layers.fill_constant (ArgSpec(args=['shape', 'dtype', 'value', 'force_cpu', 'out'], varargs=None, keywords=None, defaults=(False, None)), ('document', '1d8b14729639fa38509c79b9784740fa'))
+paddle.fluid.layers.argmin (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '2778a1d34be49263a51211885599ea37'))
+paddle.fluid.layers.argmax (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)), ('document', '04114996cfb98994ba222804a1a6109f'))
+paddle.fluid.layers.argsort (ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '68ec45c6fb6b93e47de9c9a0945fb98e'))
+paddle.fluid.layers.ones (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'b402489c62e668df42e7daceb63c142b'))
+paddle.fluid.layers.zeros (ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)), ('document', 'c155e2efc56ffa5ed4658cca0272e491'))
+paddle.fluid.layers.reverse (ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None), ('document', '8ee7cb6ca639e7460e825f953b65d94d'))
+paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '8f8c0306117ea441f20dcbbdba1f0ecc'))
+paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8'))
+paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
+paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.Switch.case (ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None), ('document', 'f7c7160014c1b46cfeda9dd5808d1789'))
+paddle.fluid.layers.Switch.default (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '50853ae884df03d9c36703bb46d9ef07'))
+paddle.fluid.layers.increment (ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True)), ('document', '73bb96ec4783ec1a11e760e8851b0e77'))
+paddle.fluid.layers.array_write (ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,)), ('document', '40b6d15f4c86b2b09df340d7778ad713'))
+paddle.fluid.layers.create_array (ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None), ('document', '2d4f20087080ba5105b55205ad5c5b6a'))
+paddle.fluid.layers.less_than (ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None)), ('document', '067bbc799c66289ca8b8924c26b6673f'))
+paddle.fluid.layers.equal (ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=None, defaults=(None,)), ('document', '80c29b1dc64718f0116de90d1ac88a77'))
+paddle.fluid.layers.array_read (ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None), ('document', '0275133f1dde2aed528b4d3230edf823'))
+paddle.fluid.layers.array_length (ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None), ('document', 'ffb8b9578ec66db565b223d313aa82a2'))
+paddle.fluid.layers.IfElse.__init__ (ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.IfElse.false_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.IfElse.input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.IfElse.output (ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.IfElse.true_block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.DynamicRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.DynamicRNN.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6d3e0a5d9aa519a9773a36e1620ea9b7'))
+paddle.fluid.layers.DynamicRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'value', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, False, 'float32')), ('document', 'b9174d4e91505b0c8ecc193eb51e248d'))
+paddle.fluid.layers.DynamicRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'b439a176a3328de8a75bdc5c08eece4a'))
+paddle.fluid.layers.DynamicRNN.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'f29ad2478b6b2ad4f413d2936a331ea0'))
+paddle.fluid.layers.DynamicRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '169d694d2224f62b4f3afdc3dbc19e95'))
+paddle.fluid.layers.DynamicRNN.update_memory (ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None), ('document', '5d83987da13b98363d6a807a52d8024f'))
+paddle.fluid.layers.StaticRNN.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.StaticRNN.memory (ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1)), ('document', 'c24e368e23afac1ed91a78a639d7a9c7'))
+paddle.fluid.layers.StaticRNN.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.StaticRNN.step (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.StaticRNN.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.StaticRNN.step_output (ArgSpec(args=['self', 'o'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.StaticRNN.update_memory (ArgSpec(args=['self', 'mem', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.layers.reorder_lod_tensor_by_rank (ArgSpec(args=['x', 'rank_table'], varargs=None, keywords=None, defaults=None), ('document', '3545f529ef04e8f6ecb76b47fa3df01a'))
+paddle.fluid.layers.Print (ArgSpec(args=['input', 'first_n', 'message', 'summarize', 'print_tensor_name', 'print_tensor_type', 'print_tensor_shape', 'print_tensor_lod', 'print_phase'], varargs=None, keywords=None, defaults=(-1, None, -1, True, True, True, True, 'both')), ('document', '5fef91b0e21c93610785f2b1f7161732'))
+paddle.fluid.layers.is_empty (ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,)), ('document', 'bbe578dbb49ad13e15b014e98c22b519'))
+paddle.fluid.layers.sigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '29a25ba78de79152076cacfc5443137d'))
+paddle.fluid.layers.logsigmoid (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '81ccb7acafd06c7728e11581f5d342e3'))
+paddle.fluid.layers.exp (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e6b3e769413d96aab4176f96db25984b'))
+paddle.fluid.layers.tanh (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9d586a0b5bd05f67ee78048f9d503b6'))
+paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7'))
+paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13'))
+paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '072a8541e0f632366bba10f67cb0db27'))
+paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd'))
+paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad'))
+paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973'))
+paddle.fluid.layers.cos (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '485f2686bcc2fe37a4bd893769c8a3e2'))
+paddle.fluid.layers.sin (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '01f1766aa76eff1df30147505b59f7c4'))
+paddle.fluid.layers.round (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b47f5da13913d3e56bdb1e612a73f3f2'))
+paddle.fluid.layers.reciprocal (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'cc6ac2f14f03c52aaa83a59bf83b8d26'))
+paddle.fluid.layers.square (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '48dfb45d773dbc30126c3a7f777de5ee'))
+paddle.fluid.layers.softplus (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '459c5781e9d1dd88283b7c5769d7872a'))
+paddle.fluid.layers.softsign (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80846bcd4bd457207457a6d5411f4148'))
+paddle.fluid.layers.uniform_random (ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', -1.0, 1.0, 0)), ('document', '308b619af849caa82bbc31e897f5e641'))
+paddle.fluid.layers.hard_shrink (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c142f5884f3255e0d6075c286bbd531e'))
+paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '944d7c03057f5fc88bc78acd4d82f926'))
+paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '90566ea449ea4c681435546e2f70610a'))
+paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '14cac0ee643fa6e026ad82aeeee75bd8'))
+paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', 'a0d762bb08de9ce93bc780aa57cd5cd9'))
+paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'a6ab47a2fe681e52fabb7057ddf0efdd'))
+paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ddb9b966f193900193a95a3df77c3c1'))
+paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'c0b334f917828f95056f6ebe10907b1c'))
+paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0)), ('document', 'c33093a82a46e3091e789e5572588db1'))
+paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '6d5028fd09d01ab82d296adc0ea95aee'))
+paddle.fluid.layers.detection_map (ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral')), ('document', '1467d91b50c22cd52103b4aa1ee9d0a1'))
+paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '1dddef3eb4b3cbd4df8e03ac480dbf97'))
+paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '23337cc57bbf5be73884b6bd0f849603'))
+paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', '5761f9ed83654314416e24372b33bb84'))
+paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)), ('document', '87863717edeb7fe87a1268976cbc015d'))
+paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '57ab49f3f324f310b7eed322e7c1057a'))
+paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'f73706a65468e9ca3e0bee4a31521b0a'))
+paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '587845f60c5d97ffdf2dfd21da52eca1'))
+paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e'))
+paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b'))
+paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691'))
+paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e'))
+paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
+paddle.fluid.layers.accuracy (ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)), ('document', '9808534c12c5e739a10f73ebb0b4eafd'))
+paddle.fluid.layers.auc (ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)), ('document', 'e0e95334fce92d16c2d9db6e7caffc47'))
+paddle.fluid.layers.exponential_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '98a5050bee8522fcea81aa795adaba51'))
+paddle.fluid.layers.natural_exp_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', '676a7bc2a218691db50bca233903d21e'))
+paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', 'd07e767d59c4a5e6c930f3e6756d3f82'))
+paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40'))
+paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae'))
+paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
+paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
+paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
+paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.StateCell.compute_state (ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None), ('document', '92973b3f222081a1d17069c683cf4a99'))
+paddle.fluid.contrib.StateCell.get_input (ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None), ('document', '6f24a007cfa184e32f01a960703bfd70'))
+paddle.fluid.contrib.StateCell.get_state (ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None), ('document', '630a4945cfe659ea4f307598fbbce5d2'))
+paddle.fluid.contrib.StateCell.out_state (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '7ad681dff0393ddf13a724194e720f28'))
+paddle.fluid.contrib.StateCell.set_state (ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None), ('document', 'd4e0e08cd5d9d9a571cbc52d114f5ae9'))
+paddle.fluid.contrib.StateCell.state_updater (ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None), ('document', 'd5afe1b7665d94fb023b15cf913ca510'))
+paddle.fluid.contrib.StateCell.update_states (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'fe0b0f1338723516a35a30247899c81b'))
+paddle.fluid.contrib.TrainingDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.TrainingDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf'))
+paddle.fluid.contrib.TrainingDecoder.output (ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None), ('document', 'f0a457dee586559036202087ce2eff69'))
+paddle.fluid.contrib.TrainingDecoder.static_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', 'a024c72664fe815068423ba630b7658a'))
+paddle.fluid.contrib.TrainingDecoder.step_input (ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None), ('document', '4659db7a888a2495e71c1838a0483909'))
+paddle.fluid.contrib.BeamSearchDecoder.__init__ (ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.BeamSearchDecoder.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '98d88fa1c989748410a12517c6a585bf'))
+paddle.fluid.contrib.BeamSearchDecoder.decode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '1e47c60f080c1343ebb6ceaef89656b2'))
+paddle.fluid.contrib.BeamSearchDecoder.early_stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3a84a7454ed6707f79b9e954d92a7575'))
+paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'aa89eb8fd5e4cabaf5cc1bcae14665a4'))
+paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7'))
+paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47'))
+paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa'))
+paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
+paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
+paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
+paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
+paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
+paddle.fluid.contrib.build_compressor (ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.CompressPass.__init__ (ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.CompressPass.add_strategy (ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None), ('document', '3bf6010b6f47d3c86df0ec8957be95e0'))
+paddle.fluid.contrib.CompressPass.apply (ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None), ('document', 'a92bf85d4b59bd4f2ac1706d7c4899a6'))
+paddle.fluid.contrib.ImitationGraph.__init__ (ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.ImitationGraph.all_parameters (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.__init__ (ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end (ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.MagnitudePruner.__init__ (ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.MagnitudePruner.prune (ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.RatioPruner.__init__ (ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e7a81a325b296a9ca502ee5adb4fc85d'))
+paddle.fluid.contrib.RatioPruner.prune (ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)), ('document', '358cbf2978c91028fb96a195a9884645'))
+paddle.fluid.contrib.load_persistables_for_increment (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None), ('document', '11fbf7e8dd2289805de291b453a33ee7'))
+paddle.fluid.contrib.load_persistables_for_inference (ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None), ('document', '5b5577bb3d24070da819674255d16196'))
+paddle.fluid.contrib.convert_dist_to_sparse_program (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4efbd93876832d4d35497cdbc7a1e6d8'))
+paddle.fluid.contrib.HDFSClient.__init__ (ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.contrib.HDFSClient.delete (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', 'c3721aa2d4d9ef5a857dd47b2681c03e'))
+paddle.fluid.contrib.HDFSClient.download (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)), ('document', 'ca55bde92184d3fd0f9f5c963b25e634'))
+paddle.fluid.contrib.HDFSClient.is_dir (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', '45bde1bae02605a205c8245b58b9156d'))
+paddle.fluid.contrib.HDFSClient.is_exist (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)), ('document', 'be9c94bccff7ba0c1d95883ac62b5864'))
+paddle.fluid.contrib.HDFSClient.ls (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '808acac504870c7e46594b95674f8a86'))
+paddle.fluid.contrib.HDFSClient.lsr (ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)), ('document', 'fae835aa3354eb6a0434c0f9ba3c2747'))
+paddle.fluid.contrib.HDFSClient.make_local_dirs (ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None), ('document', 'e76b89c8e7f019b5da576c0026fcf689'))
+paddle.fluid.contrib.HDFSClient.makedirs (ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None), ('document', '44d9972aae390aedf40aaea731a37e4b'))
+paddle.fluid.contrib.HDFSClient.rename (ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)), ('document', '0eb133644d9a9f4da45bb39261ff0955'))
+paddle.fluid.contrib.HDFSClient.upload (ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)), ('document', '7d053b4bfd6dcfdd2c9dda0e0dbd9665'))
+paddle.fluid.contrib.multi_download (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)), ('document', '100927be598ed8f9eaa1f3ef1b23568a'))
+paddle.fluid.contrib.multi_upload (ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)), ('document', '183f34c83d30dbe16e09e8716c41958a'))
+paddle.fluid.transpiler.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '292ab72977afbe58e6a3bde175452680'))
+paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs (ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None), ('document', '78f4949aedf317666a89ca74b3748ba8'))
+paddle.fluid.transpiler.DistributeTranspiler.get_startup_program (ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'd796fc0c8d51503b556fcf6dc15c4f0c'))
+paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program (ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)), ('document', '736330e31a7a54abccc0c7fd9119d9ff'))
+paddle.fluid.transpiler.DistributeTranspiler.transpile (ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')), ('document', '06ce55338dfe96311ad1078235ab3bf4'))
+paddle.fluid.transpiler.memory_optimize (ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level', 'skip_grads'], varargs=None, keywords=None, defaults=(None, False, 0, False)), ('document', 'eda17d0f1639bc6ca215cecf87f588a4'))
+paddle.fluid.transpiler.release_memory (ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ac4114d3df16264f1946deb3a8434a6f'))
+paddle.fluid.transpiler.HashName.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.transpiler.HashName.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.transpiler.HashName.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.transpiler.RoundRobin.__init__ (ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.transpiler.RoundRobin.dispatch (ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.transpiler.RoundRobin.reset (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ 
-paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True))
-paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None))
-paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
-paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
-paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True))
-paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.optimizer.SGDOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.SGDOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
-paddle.fluid.optimizer.MomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.MomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0))
-paddle.fluid.optimizer.AdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.AdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False))
-paddle.fluid.optimizer.AdamOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.AdamOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
-paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.AdamaxOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None))
-paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.DecayedAdagradOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None))
-paddle.fluid.optimizer.FtrlOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.FtrlOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None))
-paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.RMSPropOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None))
-paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.AdadeltaOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None))
-paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,))
-paddle.fluid.optimizer.ModelAverage.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.ModelAverage.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None))
-paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.optimizer.LarsMomentumOptimizer.backward ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None))
-paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
-paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
+paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', 'e0f67f35abf27f666f81003113b90244'))
+paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', '48c434dd7bb827f69d90e5135d77470f'))
+paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '08c1c57e1db6b20bf87b264cb7cf3ca8'))
+paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', '921714c9bfb351b41403418265393203'))
+paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '3802be78fbfb206dae64a2d9f8480970'))
+paddle.fluid.optimizer.SGDOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.SGDOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.SGDOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.SGDOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.MomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.MomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.MomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.MomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.AdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name', 'initial_accumulator_value'], varargs=None, keywords=None, defaults=(1e-06, None, None, 0.0)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.AdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.AdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.AdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.AdamOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name', 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.AdamOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.AdamOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.AdamOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.AdamaxOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.AdamaxOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.AdamaxOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.AdamaxOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.DecayedAdagradOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.DecayedAdagradOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.FtrlOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.FtrlOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.FtrlOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.FtrlOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.RMSPropOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.RMSPropOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.AdadeltaOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.AdadeltaOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.ModelAverage.__init__ (ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.ModelAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '46234a5470590feb336346f70a3db715'))
+paddle.fluid.optimizer.ModelAverage.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.ModelAverage.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.ModelAverage.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.optimizer.ModelAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '18db9c70be9c4dd466f9844457b21bfe'))
+paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
+paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
+paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
+paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1a79bd7d10ae54ca763ec81bca36ba24'))
+paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None  2. __init__(self: paddle.fluid.core.LoDTensor) -> None
 paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool
 paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
@@ -482,38 +483,38 @@ paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray
 paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
 paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
 paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
-paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False))
-paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False))
-paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True))
-paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.DataFeeder.feed_parallel ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.clip.ErrorClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.clip.GradientClipByValue.__init__ ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.clip.GradientClipByNorm.__init__ ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.clip.GradientClipByGlobalNorm.__init__ ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',))
-paddle.fluid.profiler.cuda_profiler ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None))
-paddle.fluid.profiler.reset_profiler ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
-paddle.fluid.profiler.profiler ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile'))
-paddle.fluid.profiler.start_profiler ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.profiler.stop_profiler ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile'))
-paddle.fluid.unique_name.generate ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None)
-paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.unique_name.guard ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
-paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None))
+paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', '0eed2f198dc73c08a41b61edbc755753'))
+paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', '459e316301279dfd82001b46f0b8ffca'))
+paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '543863d1f9d4853758adb613b8659e85'))
+paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.clip.GradientClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.clip.GradientClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.clip.GradientClipByGlobalNorm.__init__ (ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.profiler.cuda_profiler (ArgSpec(args=['output_file', 'output_mode', 'config'], varargs=None, keywords=None, defaults=(None, None)), ('document', '2e2fb1cfc469a67f19fb578a2ed6be79'))
+paddle.fluid.profiler.reset_profiler (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '397ce757fabbe5c622e0c3458c41fcd0'))
+paddle.fluid.profiler.profiler (ArgSpec(args=['state', 'sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'bd3a07eeb68e384f4d2d416cb2e28d86'))
+paddle.fluid.profiler.start_profiler (ArgSpec(args=['state'], varargs=None, keywords=None, defaults=None), ('document', '88da8fb6dbebaee2f7520188a09574f9'))
+paddle.fluid.profiler.stop_profiler (ArgSpec(args=['sorted_key', 'profile_path'], varargs=None, keywords=None, defaults=(None, '/tmp/profile')), ('document', 'a7500e39dd033f1e64f562e909333a8a'))
+paddle.fluid.unique_name.generate (ArgSpec(args=['key'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', '65c7523e86f0c50bb729b01667f36310'))
+paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7'))
 paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
-paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None)
-paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None)
-paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
-paddle.reader.chain ArgSpec(args=[], varargs='readers', keywords=None, defaults=None)
-paddle.reader.shuffle ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None)
-paddle.reader.firstn ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None)
-paddle.reader.xmap_readers ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,))
-paddle.reader.PipeReader.__init__ ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain'))
-paddle.reader.PipeReader.get_line ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n'))
-paddle.reader.multiprocess_reader ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000))
-paddle.reader.Fake.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
-paddle.reader.creator.np_array ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
-paddle.reader.creator.text_file ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None)
-paddle.reader.creator.recordio ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,))
+paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d'))
+paddle.reader.buffered (ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None), ('document', '0d6186f109feceb99f60ec50a0a624cb'))
+paddle.reader.compose (ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None), ('document', '884291104e1c3f37f33aae44b7deeb0d'))
+paddle.reader.chain (ArgSpec(args=[], varargs='readers', keywords=None, defaults=None), ('document', 'd22c34e379a53901ae67a6bca7f4def4'))
+paddle.reader.shuffle (ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None), ('document', 'e42ea6fee23ce26b23cb142cd1d6522d'))
+paddle.reader.firstn (ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None), ('document', 'c5bb8f7dd4f917f1569a368aab5b8aad'))
+paddle.reader.xmap_readers (ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,)), ('document', '283bc0b8a0e26ae186b8b9bee4aec560'))
+paddle.reader.PipeReader.__init__ (ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.reader.PipeReader.get_line (ArgSpec(args=['self', 'cut_lines', 'line_break'], varargs=None, keywords=None, defaults=(True, '\n')), ('document', '5f80a7ed70052f01665e4c74acccfa69'))
+paddle.reader.multiprocess_reader (ArgSpec(args=['readers', 'use_pipe', 'queue_size'], varargs=None, keywords=None, defaults=(True, 1000)), ('document', '7d8b3a96e592107c893d5d51ce968ba0'))
+paddle.reader.Fake.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
+paddle.reader.creator.np_array (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '28d457fbc9a71efa4ac91a3be179cada'))
+paddle.reader.creator.text_file (ArgSpec(args=['path'], varargs=None, keywords=None, defaults=None), ('document', '44fe286ab6175a5464d3a961a68c266a'))
+paddle.reader.creator.recordio (ArgSpec(args=['paths', 'buf_size'], varargs=None, keywords=None, defaults=(100,)), ('document', '11b3704ea42cfd537953387a7e58dae8'))
diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc
index f4bb2f3e2fc2c8cf0376631d1996b395a8bc581a..0b7aaf11746d1931e10ad7e5368d9e053092500e 100644
--- a/paddle/fluid/framework/block_desc.cc
+++ b/paddle/fluid/framework/block_desc.cc
@@ -13,7 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/block_desc.h"
+
 #include <queue>
+#include <unordered_set>
+#include <utility>
+
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/program_desc.h"
 
@@ -155,6 +159,16 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
   ops_.erase(ops_.begin() + s, ops_.begin() + e);
 }
 
+void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) {
+  // TODO(minqiyang): make this faster
+  for (auto it = ops_.begin(); it != ops_.end(); ++it) {
+    if (it->get() == op_desc) {
+      ops_.erase(it);
+      break;
+    }
+  }
+}
+
 std::vector<OpDesc *> BlockDesc::AllOps() const {
   std::vector<OpDesc *> res;
   for (const auto &op : ops_) {
@@ -163,20 +177,6 @@ std::vector<OpDesc *> BlockDesc::AllOps() const {
   return res;
 }
 
-void BlockDesc::Clear() {
-  // clear all ops
-  ops_.clear();
-
-  // clear all vars which are not persistable
-  for (auto it = vars_.begin(); it != vars_.end();) {
-    if (it->second->Persistable()) {
-      ++it;
-    } else {
-      vars_.erase(it++);
-    }
-  }
-}
-
 void BlockDesc::Flush() {
   for (auto &op_desc : ops_) {
     op_desc->Flush();
diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h
index e192624a261e1291f1610e8e7e700d99a9d814d2..5c6e421516269a9b9865605400efa772f944a96f 100644
--- a/paddle/fluid/framework/block_desc.h
+++ b/paddle/fluid/framework/block_desc.h
@@ -93,12 +93,12 @@ class BlockDesc {
    */
   void RemoveOp(size_t s, size_t e);
 
+  void RemoveOpInternal(const OpDesc *op_desc);
+
   void RemoveVar(const std::string &name) { vars_.erase(name); }
 
   std::vector<OpDesc *> AllOps() const;
 
-  void Clear();
-
   size_t OpSize() const { return ops_.size(); }
 
   OpDesc *Op(int idx) const { return ops_.at(idx).get(); }
diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
index f03646705817b49d6d59e8beb3d91f625dc44bef..d4fbea9d95118666ababde811867e95c657c07de 100644
--- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
@@ -12,7 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "paddle/fluid/framework/details/fetch_op_handle.h"
 #include "paddle/fluid/framework/details/multi_devices_helper.h"
@@ -55,7 +57,7 @@ FeedFetchList FastThreadedSSAGraphExecutor::Run(
   std::vector<FetchOpHandle *> fetch_ops;
 
   for (auto &fetch_var_name : fetch_tensors) {
-    for (auto &var_map : graph_->Get<details::GraphVars>("vars")) {
+    for (auto &var_map : graph_->Get<details::GraphVars>(details::kGraphVars)) {
       auto it = var_map.find(fetch_var_name);
       if (it != var_map.end()) {
         fetched_vars[fetch_var_name].push_back(*it->second.rbegin());
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
index 4323883fa5cc9b26a68c2980f3b7a49eca610543..c31d0beec306fe165164837cd15c95b4efd76af0 100644
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -20,6 +20,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/lod_tensor_array.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/reader.h"
+#include "paddle/fluid/framework/threadpool.h"
 #include "paddle/fluid/framework/transfer_scope_cache.h"
 #include "paddle/fluid/framework/variable_helper.h"
 #include "paddle/fluid/operators/distributed/distributed.h"
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index 25d9afbcc8b2bc89ec47654f0dba4cb838be55b0..ca6b0229e906c0f8bfbf9ee6781013cb4ef7bbce 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -105,4 +105,5 @@ if (WITH_MKLDNN)
     cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
     cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
     cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
+    cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass)
 endif ()
diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
index 9ef5c298b8cddfec094e9544dc6da9afdcaf0dab..433d89d8d3f20b3f87cd94901ebbf79cd99de813 100644
--- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc
@@ -44,10 +44,14 @@ struct TestIsReachable {
   using func = std::function<bool(const std::string&, const std::string&)>;
 
   auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
-    auto find_node = [](const std::unique_ptr<ir::Graph>& graph,
-                        const std::string& name) -> Node* {
+    auto hash = [](const Node* node) -> std::string {
+      return node->Name() + std::to_string(node->id());
+    };
+
+    auto find_node = [&](const std::unique_ptr<ir::Graph>& graph,
+                         const std::string& name) -> Node* {
       for (auto& node : GraphTraits::DFS(*graph)) {
-        if (name == node.Name()) {
+        if (name == hash(&node)) {
           return &node;
         }
       }
@@ -55,13 +59,17 @@ struct TestIsReachable {
       return nullptr;
     };
 
-    return [&](std::string from, const std::string to) -> bool {
+    // update the from and to strings to hashed equivs in loop from graph traits
+    return [&](std::string from, std::string to) -> bool {
       if (from == to) return true;
 
       std::map<std::string, bool> visited;
 
       for (auto& node : GraphTraits::DFS(*graph)) {
-        visited[node.Name()] = false;
+        auto hashed = hash(&node);
+        if (node.Name() == from) from = hashed;
+        if (node.Name() == to) to = hashed;
+        visited[hashed] = false;
       }
 
       visited[from] = true;
@@ -72,15 +80,15 @@ struct TestIsReachable {
       while (!queue.empty()) {
         auto cur = find_node(graph, queue.front());
         queue.pop_front();
-
         if (cur == nullptr) return false;
 
         for (auto n : cur->outputs) {
-          if (n->Name() == to) return true;
+          auto hashed_name = hash(n);
+          if (hashed_name == to) return true;
 
-          if (!visited[n->Name()]) {
-            visited[n->Name()] = true;
-            queue.push_back(n->Name());
+          if (!visited[hashed_name]) {
+            visited[hashed_name] = true;
+            queue.push_back(hashed_name);
           }
         }
       }
@@ -166,6 +174,28 @@ TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionAsYWithElementwiseAddRelu) {
   RunPassAndAssert(&prog, "a", "relu", 1);
 }
 
+TEST(ConvElementwiseAddMKLDNNFusePass,
+     ConvolutionProjectionAsYWithElementwiseAddRelu) {
+  auto prog = BuildProgramDesc({"a", "b", "c", "d", "e", "f"},
+                               {"bias", "weights", "bias2", "weights2"});
+
+  SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
+  // right branch
+  SetOp(&prog, "conv2d",
+        {{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
+        {"Output", "c"});
+
+  // left branch
+  SetOp(&prog, "conv2d",
+        {{"Input", "a"}, {"Bias", "bias2"}, {"Filter", "weights2"}},
+        {"Output", "f"});
+
+  SetOp(&prog, "elementwise_add", {{"X", "f"}, {"Y", "c"}}, {"Out", "d"});
+  SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
+
+  RunPassAndAssert(&prog, "a", "relu", 2);
+}
+
 TEST(ConvElementwiseAddMKLDNNFusePass,
      ConvolutionAsYWithElementwiseAddReluNoBias) {
   auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc
index 20e52410ffe3caa86450bc05bf3aabf5a5bce374..ccac65f3b3ad22d0f424ef9de9a7bd506e8ac862 100644
--- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.cc
@@ -21,7 +21,7 @@ namespace ir {
 
 std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
     std::unique_ptr<ir::Graph> graph) const {
-  VLOG(3) << "Aplies MKL-DNN placement strategy.";
+  VLOG(3) << "Applies MKL-DNN placement strategy.";
   const auto& op_types_list =
       Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
   for (const Node* n : graph->Nodes()) {
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b6ec7e4d68b95125d630ce4a60635eb7b711e820
--- /dev/null
+++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc
@@ -0,0 +1,136 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h"
+
+#include <gtest/gtest.h>
+#include <boost/logic/tribool.hpp>
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
+           const std::vector<std::string>& inputs,
+           const std::vector<std::string>& outputs, boost::tribool use_mkldnn) {
+  auto* op = prog->MutableBlock(0)->AppendOp();
+
+  op->SetType(type);
+
+  if (!boost::indeterminate(use_mkldnn)) op->SetAttr("use_mkldnn", use_mkldnn);
+
+  if (type == "conv2d") {
+    op->SetAttr("name", name);
+    op->SetInput("Input", {inputs[0]});
+    op->SetInput("Filter", {inputs[1]});
+    op->SetInput("Bias", {inputs[2]});
+  } else if (type == "relu") {
+    op->SetInput("X", inputs);
+  } else if (type == "concat") {
+    op->SetAttr("axis", 1);
+    op->SetInput("X", {inputs[0], inputs[1]});
+  } else if (type == "pool2d") {
+    op->SetInput("X", {inputs[0]});
+  } else {
+    FAIL() << "Unexpected operator type.";
+  }
+  op->SetOutput("Out", {outputs[0]});
+}
+
+// operator                      use_mkldnn
+// ---------------------------------------
+// (a,b)->concat->c              none
+// (c,weights,bias)->conv->f     none
+// f->relu->g                    false
+// g->pool->h                    false
+// (h,weights2,bias2)->conv->k   true
+// k->relu->l                    true
+ProgramDesc BuildProgramDesc() {
+  ProgramDesc prog;
+
+  for (auto& v :
+       std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
+                                 "h", "weights2", "bias2", "k", "l"})) {
+    auto* var = prog.MutableBlock(0)->Var(v);
+    var->SetType(proto::VarType::SELECTED_ROWS);
+    if (v == "weights" || v == "bias") {
+      var->SetPersistable(true);
+    }
+  }
+
+  SetOp(&prog, "concat", "concat1", std::vector<std::string>({"a", "b"}),
+        std::vector<std::string>({"c"}), boost::indeterminate);
+  SetOp(&prog, "conv2d", "conv1",
+        std::vector<std::string>({"c", "weights", "bias"}),
+        std::vector<std::string>({"f"}), boost::indeterminate);
+  SetOp(&prog, "relu", "relu1", std::vector<std::string>({"f"}),
+        std::vector<std::string>({"g"}), false);
+  SetOp(&prog, "pool2d", "pool1", std::vector<std::string>({"g"}),
+        std::vector<std::string>({"h"}), false);
+  SetOp(&prog, "conv2d", "conv2",
+        std::vector<std::string>({"h", "weights2", "bias2"}),
+        std::vector<std::string>({"k"}), true);
+  SetOp(&prog, "relu", "relu2", std::vector<std::string>({"k"}),
+        std::vector<std::string>({"l"}), true);
+
+  return prog;
+}
+
+void MainTest(std::initializer_list<std::string> mkldnn_enabled_op_types,
+              unsigned expected_use_mkldnn_true_count) {
+  auto prog = BuildProgramDesc();
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  auto pass = PassRegistry::Instance().Get("mkldnn_placement_pass");
+  pass->Set("mkldnn_enabled_op_types",
+            new std::unordered_set<std::string>(mkldnn_enabled_op_types));
+
+  graph = pass->Apply(std::move(graph));
+
+  unsigned use_mkldnn_true_count = 0;
+
+  for (auto* node : graph->Nodes()) {
+    if (node->IsOp()) {
+      auto* op = node->Op();
+      if (op->HasAttr("use_mkldnn") &&
+          boost::get<bool>(op->GetAttr("use_mkldnn"))) {
+        ++use_mkldnn_true_count;
+      }
+    }
+  }
+
+  EXPECT_EQ(use_mkldnn_true_count, expected_use_mkldnn_true_count);
+}
+
+TEST(MKLDNNPlacementPass, enable_conv_relu) {
+  // 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 0 pool
+  MainTest({"conv2d", "relu"}, 3);
+}
+
+TEST(MKLDNNPlacementPass, enable_relu_pool) {
+  // 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
+  MainTest({"relu", "pool2d"}, 4);
+}
+
+TEST(MKLDNNPlacementPass, enable_all) {
+  // 1 conv (1 conv is always true) + 2 relu (1 relu is always true) + 1 pool
+  MainTest({}, 4);
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(mkldnn_placement_pass);
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index 2c1648c81fc999c6306d5b08bc243f3ad21fec04..a53a81c270aeec1b6ee4ed30e77526f4ea2e7977 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -290,7 +290,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
       "USE_OP_DEVICE_KERNEL must be in global namespace");                 \
   extern int                                                               \
       TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name(); \
-  UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##DEFAULT_TYPE##_ = /* NOLINT */ \
+  UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
       TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
 
 #define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 64592d73e1741c2bc93a2c90b58b1824b2c887f9..5a874fe437d83e2ba795a0b063d7f1811afa04d8 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -882,7 +882,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
   const RuntimeContext& ctx_;
 };
 
-static void CheckTensorNANOrInf(const std::string& name,
+static void CheckTensorNANOrInf(const std::string& op_type,
+                                const std::string& name,
                                 const framework::Tensor& tensor) {
   if (tensor.memory_size() == 0) {
     return;
@@ -892,9 +893,9 @@ static void CheckTensorNANOrInf(const std::string& name,
     return;
   }
   PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
-                 "Tensor %s contains Inf", name);
+                 "Operator %s output Tensor %s contains Inf", op_type, name);
   PADDLE_ENFORCE(!framework::TensorContainsNAN(tensor),
-                 "Tensor %s contains NAN", name);
+                 "Operator %s output Tensor %s contains NAN", op_type, name);
 }
 
 void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
@@ -988,9 +989,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
       auto* var = exec_scope.FindVar(vname);
       if (var == nullptr) continue;
       if (var->IsType<framework::LoDTensor>()) {
-        CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
+        CheckTensorNANOrInf(type_, vname, var->Get<framework::LoDTensor>());
       } else if (var->IsType<framework::SelectedRows>()) {
-        CheckTensorNANOrInf(vname, var->Get<framework::SelectedRows>().value());
+        CheckTensorNANOrInf(type_, vname,
+                            var->Get<framework::SelectedRows>().value());
       }
     }
   }
diff --git a/paddle/fluid/framework/python_headers.h b/paddle/fluid/framework/python_headers.h
index 422af19a13683dc9ae6377cac1b1ab2c2ac8f96b..8f9e3fad57f7bb87e78e334e741be23751417a78 100644
--- a/paddle/fluid/framework/python_headers.h
+++ b/paddle/fluid/framework/python_headers.h
@@ -24,3 +24,11 @@ limitations under the License. */
 
 #pragma pop_macro("_XOPEN_SOURCE")
 #pragma pop_macro("_POSIX_C_SOURCE")
+
+#if !defined(PYBIND11_HIDDEN)
+#ifdef _WIN32
+#define PYBIND11_HIDDEN __declspec(dllexport)
+#else
+#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
+#endif
+#endif
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
index 85d15c5d3faa5a3d021b12396f9f8ea7735f9148..89166bfd15f26e066d32a7191217a9b9a8977bda 100644
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -14,6 +14,8 @@
 #include "paddle/fluid/framework/tensor_util.h"
 #include <algorithm>
 #include <limits>
+#include <memory>
+#include <utility>
 #include <vector>
 #include "paddle/fluid/framework/data_type.h"
 
diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
index aff5cf24be7c41cf58929069768d4fdb34386ae6..012dfc1c7f66027bc5375794e0d70ed78e70e781 100644
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -18,6 +18,7 @@
 #include <limits>
 #include <map>
 #include <random>
+#include <unordered_set>
 #include <utility>
 
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -139,6 +140,8 @@ class Autograd {
           }
         }
       }
+
+      ready_op->InvokeBackwardHooks();
     }
   }
 
@@ -156,8 +159,10 @@ class Autograd {
       for (auto it : candidate->pre_ops_) {
         for (OpBase* pre_op : it.second) {
           if (!pre_op) continue;
-          VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- "
-                  << it.first << " <---- " << pre_op->op_desc_->Type();
+          VLOG(5) << "op dep " << candidate->op_desc_->Type() << " trace id "
+                  << candidate->trace_id_ << " <---- " << it.first << " <---- "
+                  << pre_op->op_desc_->Type() << " trace id "
+                  << pre_op->trace_id_;
           if (visited.find(pre_op) == visited.end()) {
             visited.insert(pre_op);
             queue.push_back(pre_op);
@@ -211,6 +216,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
     return {};
   }
 
+  VLOG(3) << "apply op grad: " << op_desc_->Type();
   std::vector<framework::VariableValueMap> grad_outputs;
   if (backward_id_ > 0) {
     VLOG(3) << "py_layer_grad";
@@ -272,6 +278,22 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
   return input_vars_;
 }
 
+void OpBase::InvokeBackwardHooks() {
+  VLOG(3) << "call backward hooks, hooks num: " << backward_hooks_.size();
+
+  // call backward hooks
+  for (py::object& callable : backward_hooks_) {
+    callable(this);
+  }
+}
+
+void OpBase::RegisterBackwardHooks(const py::object& callable) {
+  VLOG(3) << "Register backward hooks " << trace_id_;
+
+  // TODO(minqiyang): check the callable format
+  backward_hooks_.push_back(callable);
+}
+
 void VarBase::RunBackward() {
   if (!pre_op_) return;
 
diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h
index bbf614831ca817031b209fffec043495ea24d10f..7a9f33dc1e6cbc0c3ec1e649906fb0a8de047189 100644
--- a/paddle/fluid/imperative/layer.h
+++ b/paddle/fluid/imperative/layer.h
@@ -123,22 +123,32 @@ class VarBase {
 
  private:
   VarBase(framework::Variable* var, VarBase* grad, bool stop_gradient)
-      : var_desc_(nullptr),
+      : name_(),
+        var_desc_(nullptr),
         var_(var),
         grads_(grad),
+        block_(nullptr),
+        persistable_(false),
         stop_gradient_(stop_gradient),
         pre_op_(nullptr),
+        pre_op_out_name_(),
         pre_op_out_idx_(-1) {}
 
  public:
   virtual ~VarBase() {
+    // TODO(minqiyang): remove var desc from block desc
     if (var_) {
       delete var_;
+      var_ = nullptr;
     }
 
     if (grads_) {
       delete grads_;
+      grads_ = nullptr;
     }
+
+    pre_op_ = nullptr;
+    pre_op_out_idx_ = -1;
   }
 
   inline OpBase* PreOp() const { return pre_op_; }
@@ -151,6 +161,14 @@ class VarBase {
 
   void RunBackward();
 
+  inline void ResetPreOp(OpBase* op) {
+    if (op == pre_op_) {
+      // clear pre_op info when op equals to var's pre_op
+      pre_op_ = nullptr;
+      pre_op_out_idx_ = -1;
+    }
+  }
+
   void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name,
                   int pre_op_out_idx, bool pre_op_stop_gradient) {
     pre_op_ = pre_op;
@@ -184,11 +202,15 @@ class VarBase {
     return string::Sprintf("%s@IGrad", var_desc_->Name());
   }
 
+  std::string name_;
   framework::VarDesc* var_desc_;
 
   framework::Variable* var_;
   VarBase* grads_;
 
+  framework::BlockDesc* block_;
+  bool persistable_;
+
  private:
   bool stop_gradient_;
   OpBase* pre_op_;
@@ -199,15 +221,27 @@ class VarBase {
 /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its
  * gradient. This object should be managed totally by Python intepreter.
  */
-class OpBase {
+class PYBIND11_HIDDEN OpBase {
  public:
   OpBase()
       : op_desc_(nullptr),
         forward_id_(-1),
         backward_id_(-1),
-        place_(platform::CPUPlace()) {}
+        trace_id_(-1),
+        place_(platform::CPUPlace()),
+        backward_hooks_() {}
 
   virtual ~OpBase() {
+    // TODO(minqiyang): remove op_desc from block_desc in tracer
+    //
+    // reset all output vars' pre op
+    for (auto iter : output_vars_) {
+      for (VarBase* var : iter.second) {
+        var->ResetPreOp(this);
+      }
+    }
+
+    // release resource
     for (framework::OpDesc* desc : grad_op_descs_) {
       delete desc;
     }
@@ -215,6 +249,10 @@ class OpBase {
 
   std::map<std::string, std::vector<VarBase*>> ApplyGrad();
 
+  void RegisterBackwardHooks(const py::object& callable);
+
+  void InvokeBackwardHooks();
+
   // One of `op_desc_` or `forward_id_` is set, not both.
   // For pure python PyLayer, use `forward_id_`, otherwise, use op_desc_.
   framework::OpDesc* op_desc_;
@@ -225,6 +263,7 @@ class OpBase {
   // Note: each fwd op corresponds to a vector of bwd ops.
   std::vector<framework::OpDesc*> grad_op_descs_;
   int backward_id_;
+  int trace_id_;
 
   platform::Place place_;
 
@@ -239,6 +278,8 @@ class OpBase {
   std::vector<framework::VariableValueMap> grad_output_vars_;
 
   framework::BlockDesc* block_;
+
+  std::vector<py::object> backward_hooks_;
 };
 
 class Layer {
diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc
index 2993ab309027f9306c61023b55b1c061e0ebddc0..0cb1676372fdd35a762e897d269550f2d1e1ac36 100644
--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -14,15 +14,32 @@
 
 #include "paddle/fluid/imperative/tracer.h"
 
+#include <memory>
 #include <set>
+#include <unordered_map>
+#include <unordered_set>
 
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
 
+#ifdef WITH_GPERFTOOLS
+#include "gperftools/profiler.h"
+#endif
+
+DEFINE_string(
+    tracer_profile_fname, "",
+    "Profiler filename for imperative tracer, which generated by gperftools."
+    "Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");
+
 namespace paddle {
 namespace imperative {
 
+static std::once_flag gTracerProfileOnce;
+#ifdef WITH_GPERFTOOLS
+static bool gTracerProfilerStarted = false;
+#endif
+
 void CreateGradOp(const framework::OpDesc& op_desc,
                   const std::unordered_set<std::string>& no_grad_set,
                   const std::vector<framework::BlockDesc*>& grad_sub_block,
@@ -68,15 +85,36 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) {
   return result;
 }
 
+Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
+  if (!FLAGS_tracer_profile_fname.empty()) {
+    std::call_once(gTracerProfileOnce, [] {
+#ifdef WITH_GPERFTOOLS
+      ProfilerStart(FLAGS_tracer_profile_fname.c_str());
+      gTracerProfilerStarted = true;
+#else
+      LOG(WARNING) << "Paddle is not compiled with gperftools. "
+                      "FLAGS_tracer_profile_fname will be ignored";
+#endif
+    });
+  }
+}
+
 std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
                                     const VarBasePtrMap& outputs,
                                     framework::BlockDesc* block,
                                     const platform::Place expected_place,
                                     const bool stop_gradient) {
+#ifdef WITH_GPERFTOOLS
+  if (gTracerProfilerStarted) {
+    ProfilerFlush();
+  }
+#endif
+
   std::map<std::string, VarBase*> vars;
 
   framework::OpDesc* op_desc = op->op_desc_;
-  VLOG(3) << "tracer tracing " << op_desc->Type();
+  VLOG(3) << "tracer tracing " << op_desc->Type() << " trace id "
+          << op->trace_id_;
   op_desc->InferShape(*block);
   op_desc->InferVarType(block);
 
@@ -99,11 +137,13 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
       if (inp->PreOp() && !inp->IsStopGradient()) {
         op->pre_ops_[it.first].push_back(inp->PreOp());
         op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx());
+        VLOG(3) << "add pre op " << inp->PreOp()->op_desc_->Type();
       } else {
         op->pre_ops_[it.first].push_back(nullptr);
       }
       VLOG(3) << "input vname " << inp->var_desc_->Name() << " "
-              << inp->var_->IsInitialized();
+              << inp->var_->IsInitialized() << " stop_gradient "
+              << inp->IsStopGradient();
     }
   }
 
@@ -155,6 +195,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
 
     op->grad_input_vars_.resize(op->grad_op_descs_.size());
     op->grad_output_vars_.resize(op->grad_op_descs_.size());
+
     for (size_t i = 0; i < op->grad_op_descs_.size(); ++i) {
       framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
       for (auto it : grad_op_desc->Inputs()) {
@@ -167,7 +208,6 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
             PADDLE_ENFORCE(fwd_var_it != vars.end());
             // Forward inputs or outputs.
             grad_in_vars.push_back(fwd_var_it->second->var_);
-            vars_saved_for_backward.insert(it.first);
           } else {
             VarBase* var = vars[var_it->second];
             if (!var->grads_->var_->IsInitialized()) {
@@ -177,6 +217,8 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
             // Douts.
             grad_in_vars.push_back(var->grads_->var_);
           }
+
+          vars_saved_for_backward.insert(it.first);
         }
       }
 
diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h
index 98909e378f0e4188250fcb6efd9502dcc9740da4..8a0267c37f7c98a172fe0fa573955dc420952c0a 100644
--- a/paddle/fluid/imperative/tracer.h
+++ b/paddle/fluid/imperative/tracer.h
@@ -40,7 +40,7 @@ platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs);
 
 class Tracer {
  public:
-  explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
+  explicit Tracer(framework::BlockDesc* root_block);
 
   virtual ~Tracer() {}
 
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 157862016e3556902f6507e02417624363ed1029..762640d6d1ce12dff511fc7149e872efa834036c 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -16,6 +16,7 @@ add_subdirectory(utils)
 if (TENSORRT_FOUND)
   add_subdirectory(tensorrt)
 endif()
+# add_subdirectory(anakin)
 
 get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
 get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
diff --git a/paddle/fluid/inference/anakin/CMakeLists.txt b/paddle/fluid/inference/anakin/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b418af62f8cae4513bcca24f057d1fe100bbea25
--- /dev/null
+++ b/paddle/fluid/inference/anakin/CMakeLists.txt
@@ -0,0 +1,4 @@
+cc_library(anakin_engine SRCS engine.cc)
+target_link_libraries(anakin_engine anakin anakin_saber_common)
+cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
+add_subdirectory(convert)
diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5bfee861f14877b5a67bc48aeb14b8213a27370
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt
@@ -0,0 +1,2 @@
+cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope)
+cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc
new file mode 100644
index 0000000000000000000000000000000000000000..33a5aff1de2851ad55c2df83cc48ba86f8ded754
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/fc.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/fc.h"
+#include <algorithm>
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+void FcOpConverter::operator()(const framework::proto::OpDesc &op,
+                               const framework::Scope &scope, bool test_mode) {
+  framework::OpDesc op_desc(op, nullptr);
+  PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
+  PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+
+  auto x_name = op_desc.Input("X").front();
+  auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
+  auto *y_v = scope.FindVar(op_desc.Input("Y").front());
+  PADDLE_ENFORCE_NOT_NULL(y_v);
+  auto *y_t = y_v->GetMutable<framework::LoDTensor>();
+
+  auto input_name = op_desc.Input("X").front();
+  auto output_name = op_desc.Output("Out").front();
+
+  auto weight_shape = framework::vectorize2int(y_t->dims());
+  engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
+  engine_->AddOpAttr(op_name, "bias_term", false);
+  engine_->AddOpAttr(op_name, "axis", 1);
+  int out_dim = weight_shape[1];
+  engine_->AddOpAttr(op_name, "out_dim", out_dim);
+
+  weight_shape.push_back(1);
+  weight_shape.push_back(1);
+  Shape anakin_shape(weight_shape);
+
+  framework::LoDTensor weight_tensor;
+  weight_tensor.Resize(y_t->dims());
+  TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor);
+
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(anakin_shape);
+  float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
+  std::copy_n(weight_tensor.data<float>(), weight_tensor.numel(), cpu_data);
+  weight1->d_tensor().set_shape(anakin_shape);
+  weight1->d_tensor().copy_from(weight1->h_tensor());
+  engine_->AddOpAttr(op_name, "weight_1", *weight1);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h
new file mode 100644
index 0000000000000000000000000000000000000000..b670486f12b36043a01ceb002da8756901ed01ce
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/fc.h
@@ -0,0 +1,38 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class FcOpConverter : public AnakinOpConverter {
+ public:
+  FcOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope,
+                          bool test_mode) override;
+  virtual ~FcOpConverter() {}
+
+ private:
+};
+
+static Registrar<FcOpConverter> register_fc_op_converter("fc");
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h
new file mode 100644
index 0000000000000000000000000000000000000000..b9a221079dcec78fc86ebed7dfac0c59ec0f8540
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/op_converter.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include "framework/core/types.h"
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/anakin/convert/registrar.h"
+#include "paddle/fluid/inference/anakin/engine.h"
+#include "paddle/fluid/inference/utils/singleton.h"
+#include "saber/saber_types.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+using AnakinNvEngine =
+    AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
+
+class AnakinOpConverter {
+ public:
+  AnakinOpConverter() = default;
+
+  virtual void operator()(const framework::proto::OpDesc &op,
+                          const framework::Scope &scope, bool test_mode) {}
+  void ConvertOp(const framework::proto::OpDesc &op,
+                 const std::unordered_set<std::string> &parameters,
+                 const framework::Scope &scope, AnakinNvEngine *engine,
+                 bool test_mode = false) {
+    framework::OpDesc op_desc(op, nullptr);
+    std::string op_type = op_desc.Type();
+    std::shared_ptr<AnakinOpConverter> it{nullptr};
+
+    if (op_type == "mul") {
+      PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL);
+      std::string Y = op_desc.Input("Y")[0];
+      std::cout << Y << parameters.count(Y) << std::endl;
+      if (parameters.count(Y)) {
+        it = OpRegister::instance()->Get("fc");
+      }
+    }
+
+    if (!it) {
+      it = OpRegister::instance()->Get(op_type);
+    }
+    PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type);
+    it->SetEngine(engine);
+    (*it)(op, scope, test_mode);
+  }
+
+  void ConvertBlock(const framework::proto::BlockDesc &block,
+                    const std::unordered_set<std::string> &parameters,
+                    const framework::Scope &scope, AnakinNvEngine *engine) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    for (auto i = 0; i < block.ops_size(); i++) {
+      auto &op = block.ops(i);
+      ConvertOp(op, parameters, scope, engine);
+    }
+  }
+  void SetEngine(AnakinNvEngine *engine) { engine_ = engine; }
+  virtual ~AnakinOpConverter() {}
+
+ protected:
+  bool test_mode_;
+  AnakinNvEngine *engine_{nullptr};
+
+ private:
+  std::unordered_map<std::string, AnakinOpConverter *> converters_;
+  framework::Scope *scope_{nullptr};
+  std::mutex mutex_;
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__)                \
+  struct anakin_##op_type__##_converter                                     \
+      : public ::paddle::framework::Registrar {                             \
+    anakin_##op_type__##_converter() {                                      \
+      ::paddle::inference::                                                 \
+          Registry<paddle::inference::anakin::AnakinOpConverter>::Register< \
+              ::paddle::inference::anakin::Converter__>(#op_type__);        \
+    }                                                                       \
+  };                                                                        \
+  anakin_##op_type__##_converter anakin_##op_type__##_converter__;          \
+  int TouchConverterRegister_anakin_##op_type__() {                         \
+    anakin_##op_type__##_converter__.Touch();                               \
+    return 0;                                                               \
+  }
+
+#define USE_ANAKIN_CONVERTER(op_type__)                                    \
+  extern int TouchConverterRegister_anakin_##op_type__();                  \
+  static int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \
+      TouchConverterRegister_anakin_##op_type__();
diff --git a/paddle/fluid/inference/anakin/convert/registrar.cc b/paddle/fluid/inference/anakin/convert/registrar.cc
new file mode 100644
index 0000000000000000000000000000000000000000..701ebdb2d43cf524330f946ac56d32dfa884f42a
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/registrar.cc
@@ -0,0 +1,34 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/convert/registrar.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+std::shared_ptr<AnakinOpConverter> OpRegister::Get(const std::string &name) {
+  auto it = registry_.find(name);
+  if (it == registry_.end()) return nullptr;
+  return it->second();
+}
+
+OpRegister *OpRegister::instance() {
+  static OpRegister factory;
+  return &factory;
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/convert/registrar.h b/paddle/fluid/inference/anakin/convert/registrar.h
new file mode 100644
index 0000000000000000000000000000000000000000..afce66ca084143ae203af9a60089aa2f5d18a725
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/registrar.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class AnakinOpConverter;
+
+class OpRegister {
+ public:
+  OpRegister() = default;
+  std::shared_ptr<AnakinOpConverter> Get(const std::string &name);
+  static OpRegister *instance();
+  void OpRegisterFn(const std::string &name,
+                    std::function<std::shared_ptr<AnakinOpConverter>()> fn) {
+    registry_[name] = fn;
+  }
+
+ private:
+  using RegisterFnType = std::function<std::shared_ptr<AnakinOpConverter>()>;
+  std::map<std::string, std::function<std::shared_ptr<AnakinOpConverter>()>>
+      registry_;
+};
+
+template <typename T, typename... Args>
+class Registrar {
+ public:
+  Registrar(const std::string &name, Args... args) {
+    std::shared_ptr<AnakinOpConverter> converter =
+        std::make_shared<T>(std::move(args)...);
+    OpRegister::instance()->OpRegisterFn(name,
+                                         [converter]() { return converter; });
+  }
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7b8ceefe28873f0ffb9cedbb04b832ba029b7de4
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc
@@ -0,0 +1,54 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/inference/anakin/convert/fc.h"
+#include "paddle/fluid/inference/anakin/convert/op_converter.h"
+#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+TEST(fc_op, test) {
+  auto fc_converter = OpRegister::instance()->Get("fc");
+  ASSERT_TRUE(fc_converter != nullptr);
+  // Registrar<FcOpConverter> register_fc("fc");
+  // auto fc = std::make_shared<FcOpConverter>();
+
+  std::unordered_set<std::string> parameters({"mul_y"});
+  framework::Scope scope;
+  AnakinConvertValidation validator(parameters, scope);
+  validator.DeclInputVar("mul_x", {1, 1, 1, 1});
+  validator.DeclParamVar("mul_y", {1, 2});
+  validator.DeclOutputVar("mul_out", {1, 1, 1, 2});
+
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("mul");
+  desc.SetInput("X", {"mul_x"});
+  desc.SetInput("Y", {"mul_y"});
+  desc.SetOutput("Out", {"mul_out"});
+  int num_flatten_dims = 3;
+  desc.SetAttr("x_num_col_dims", num_flatten_dims);
+  validator.SetOp(*desc.Proto());
+
+  validator.Execute(10);
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
+
+USE_OP(mul);
diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h
new file mode 100644
index 0000000000000000000000000000000000000000..38d8e596a738ac98c9f9870473f72dcc72b0e7aa
--- /dev/null
+++ b/paddle/fluid/inference/anakin/convert/ut_helper.h
@@ -0,0 +1,202 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/inference/anakin/engine.h"
+#include "paddle/fluid/inference/analysis/helper.h"
+#include "paddle/fluid/inference/utils/singleton.h"
+#include "paddle/fluid/platform/enforce.h"
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+/*
+ * Get a random float value between [low, high]
+ */
+float random(float low, float high) {
+  static std::random_device rd;
+  static std::mt19937 mt(rd());
+  std::uniform_real_distribution<double> dist(low, high);
+  return dist(mt);
+}
+
+void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
+                     const platform::DeviceContext& ctx) {
+  auto dims = tensor->dims();
+  size_t num_elements = analysis::AccuDims(dims, dims.size());
+  PADDLE_ENFORCE_GT(num_elements, 0);
+
+  platform::CPUPlace cpu_place;
+  framework::LoDTensor temp_tensor;
+  temp_tensor.Resize(dims);
+  auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
+
+  for (size_t i = 0; i < num_elements; i++) {
+    *(temp_data + i) = random(0., 1.);
+  }
+
+  TensorCopySync(temp_tensor, place, tensor);
+}
+
+/*
+ * Help to validate the correctness between Fluid Op and the corresponding
+ * anakin
+ * layer.
+ */
+class AnakinConvertValidation {
+  using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
+
+ public:
+  AnakinConvertValidation() = delete;
+
+  AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
+                          const framework::Scope& scope)
+      : parameters_(parameters), scope_(scope), place_(0) {
+    PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
+    engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
+  }
+
+  // Declare a Variable as input with random initialization.
+  void DeclInputVar(const std::string& name,
+                    const std::vector<int> tensor_dims) {
+    DeclVar(name, tensor_dims);
+    // should decalre anakin input here.
+  }
+
+  void DeclParamVar(const std::string& name, const std::vector<int> dim_vec) {
+    DeclVar(name, dim_vec);
+  }
+
+  void DeclOutputVar(const std::string& name, const std::vector<int> dim_vec) {
+    DeclVar(name, dim_vec);
+    // should declare anakin output here.
+  }
+
+  void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
+    platform::CUDADeviceContext ctx(place_);
+    auto* x = scope_.Var(name);
+    auto* x_tensor = x->GetMutable<framework::LoDTensor>();
+    x_tensor->Resize(framework::make_ddim(dim_vec));
+    RandomizeTensor(x_tensor, place_, ctx);
+  }
+
+  void SetOp(const framework::proto::OpDesc& desc) {
+    op_ = framework::OpRegistry::CreateOp(desc);
+    op_desc_.reset(new framework::OpDesc(desc, nullptr));
+    // should init anakin engine here.
+
+    Singleton<AnakinOpConverter>::Global().ConvertOp(
+        desc, parameters_, scope_, engine_.get(), true /*test_mode*/);
+    engine_->Freeze();
+    for (const auto& input : op_desc_->InputArgumentNames()) {
+      if (parameters_.count(input)) continue;
+      auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_,
+                                                                        input);
+      auto t_shape = framework::vectorize2int(t.dims());
+      engine_->SetInputShape(input, t_shape);
+    }
+    engine_->Optimize();
+    engine_->InitGraph();
+  }
+
+  // We use the set 'neglected_output' here, because some Ops like batch norm,
+  // the outputs specified in the op des are only used during training,
+  // so we should neglect those output during inference.
+  void Execute(int batch_size,
+               std::unordered_set<std::string> neglected_output = {}) {
+    // Execute Fluid Op
+    platform::CUDADeviceContext ctx(place_);
+    op_->Run(scope_, place_);
+
+    // std::vector<framework::LoDTensor> input_vector;
+    // std::vector<framework::LoDTensor> output_vector;
+    std::map<std::string, framework::LoDTensor*> inputs;
+    for (const auto& input : op_desc_->InputArgumentNames()) {
+      if (parameters_.count(input)) continue;
+      auto* var = scope_.FindVar(input);
+      auto tensor = var->GetMutable<framework::LoDTensor>();
+      inputs.insert({input, tensor});
+    }
+
+    std::map<std::string, framework::LoDTensor*> outputs;
+    std::vector<std::vector<float>> fluid_outputs;
+    for (const auto& output : op_desc_->OutputArgumentNames()) {
+      if (neglected_output.count(output)) continue;
+      std::vector<float> fluid_out;
+      auto* var = scope_.FindVar(output);
+      auto tensor = var->GetMutable<framework::LoDTensor>();
+      framework::TensorToVector(*tensor, ctx, &fluid_out);
+      fluid_outputs.push_back(fluid_out);
+
+      // size_t fluid_out_size = fluid_out.size();
+      /*for (size_t i = 0; i < fluid_out_size; i++) {
+        std::cout << fluid_out[i] << std::endl;
+      }*/
+      outputs.insert({output, tensor});
+    }
+
+    engine_->Execute(inputs, outputs);
+    int i_output = 0;
+    for (const auto& output : op_desc_->OutputArgumentNames()) {
+      if (neglected_output.count(output)) continue;
+      std::vector<float> anakin_out;
+      auto* var = scope_.FindVar(output);
+      auto tensor = var->GetMutable<framework::LoDTensor>();
+      framework::TensorToVector(*tensor, ctx, &anakin_out);
+
+      size_t anakin_out_size = anakin_out.size();
+      auto fluid_out = fluid_outputs[i_output++];
+      for (size_t i = 0; i < anakin_out_size; i++) {
+        LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], "
+                  << "fluid[" << fluid_out[i] << "]";
+      }
+    }
+  }
+
+  framework::Scope& scope() { return scope_; }
+
+ private:
+  std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
+  cudaStream_t stream_;
+  std::unique_ptr<framework::OperatorBase> op_;
+  std::unique_ptr<framework::OpDesc> op_desc_;
+  const std::unordered_set<std::string>& parameters_;
+  framework::Scope& scope_;
+  platform::CUDAPlace place_;
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6549991474f4834f0c3ef74c60d294cca6bebc91
--- /dev/null
+++ b/paddle/fluid/inference/anakin/engine.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/anakin/engine.h"
+#include <algorithm>
+#include <cstring>
+#include <map>
+#include <utility>
+#include "paddle/fluid/framework/ddim.h"
+
+using anakin::Precision;
+using anakin::OpRunType;
+using paddle::framework::LoDTensor;
+template <typename T, Precision P, OpRunType O>
+using AnakinNetT = anakin::Net<T, P, O>;
+
+template <typename T, Precision P>
+using AnakinGraphT = anakin::graph::Graph<T, P>;
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(bool need_summary)
+    : graph_(new AnakinGraphT<TargetT, PrecisionType>()),
+      net_(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary)) {}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+AnakinEngine<TargetT, PrecisionType, RunType>::~AnakinEngine() {}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+void AnakinEngine<TargetT, PrecisionType, RunType>::SetInputShape(
+    const std::string &name, std::vector<int> shape) {
+  graph_->AddOpAttr<::anakin::PTuple<int>>(name, "input_shape",
+                                           std::move(shape));
+}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+void AnakinEngine<TargetT, PrecisionType, RunType>::InitGraph() {
+  net_->init(*graph_);
+}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
+    const std::string &name, const std::string &type,
+    const std::vector<std::string> &inputs,
+    const std::vector<std::string> &outputs) {
+  PADDLE_ENFORCE(graph_->AddOp(name, type, inputs, outputs), "Add operation.");
+}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
+    const std::map<std::string, framework::LoDTensor *> &inputs,
+    const std::map<std::string, framework::LoDTensor *> &outputs) {
+  for (const auto &input : inputs) {
+    auto *tensor = input.second;
+    auto *data = tensor->data<float>();
+    auto shape = framework::vectorize2int(tensor->dims());
+    ::anakin::saber::Shape anakin_shape(shape);
+    auto *anakin_input = net_->get_in(input.first);
+    ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
+                                                       anakin_shape);
+    anakin_input->share_from(tmp_anakin_tensor);
+  }
+
+  for (const auto &output : outputs) {
+    auto *tensor = output.second;
+    auto *data = tensor->data<float>();
+    auto shape = framework::vectorize2int(tensor->dims());
+    ::anakin::saber::Shape anakin_shape(shape);
+    auto *anakin_output = net_->get_out(output.first);
+    ::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), 0,
+                                                       anakin_shape);
+    anakin_output->share_from(tmp_anakin_tensor);
+  }
+  net_->prediction();
+}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
+  PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph.");
+}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
+  PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization.");
+}
+
+template <typename TargetT, Precision PrecisionType, OpRunType RunType>
+std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>>
+AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
+  auto *engine = new AnakinEngine();
+  engine->net_ = std::move(net_->Clone());
+  return std::unique_ptr<AnakinEngine>(engine);
+}
+
+template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h
new file mode 100644
index 0000000000000000000000000000000000000000..d8f32f57be5aabb91ba720c6457a03f15083db43
--- /dev/null
+++ b/paddle/fluid/inference/anakin/engine.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/inference/engine.h"
+#include "paddle/fluid/inference/utils/singleton.h"
+
+#include "framework/core/net/net.h"
+#include "framework/core/types.h"
+#include "framework/graph/graph.h"
+#include "saber/saber_types.h"
+
+namespace anakin {
+
+template <typename, Precision, OpRunType>
+class Net;
+
+namespace graph {
+template <typename, Precision>
+class Graph;
+}  // namespace graph
+}  // namespace anakin
+
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+template <typename TargetT, ::anakin::Precision PrecisionType,
+          ::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
+class AnakinEngine {
+ public:
+  explicit AnakinEngine(bool need_summary = false);
+  ~AnakinEngine();
+  void InitGraph();
+  void SetInputShape(const std::string &name, std::vector<int> shape);
+  void AddOp(const std::string &name, const std::string &type,
+             const std::vector<std::string> &inputs,
+             const std::vector<std::string> &outputs);
+
+  template <typename T>
+  void AddOpAttr(const std::string &op_name, const std::string &attr_name,
+                 const T &attr_value) {
+    PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
+                   "Add operation's attribution.");
+  }
+
+  std::unique_ptr<AnakinEngine> Clone();
+  void Freeze();
+  void Optimize();
+  void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
+               const std::map<std::string, framework::LoDTensor *> &outputs);
+
+ private:
+  using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
+  using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
+  std::unique_ptr<GraphT> graph_;
+  std::unique_ptr<NetT> net_;
+};
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/anakin/test_anakin_engine.cc b/paddle/fluid/inference/anakin/test_anakin_engine.cc
new file mode 100644
index 0000000000000000000000000000000000000000..571294d3e22fb9489686bfcb2f3a64198099f970
--- /dev/null
+++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc
@@ -0,0 +1,96 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include <map>
+
+#include "framework/core/net/net.h"
+#include "framework/graph/graph.h"
+#include "framework/graph/graph_global_mem.h"
+#include "paddle/fluid/inference/anakin/engine.h"
+
+using anakin::graph::GraphGlobalMem;
+using anakin::AK_FLOAT;
+using anakin::Precision;
+using anakin::saber::NV;
+using anakin::saber::X86;
+using anakin::saber::Shape;
+using anakin::PBlock;
+using anakin::PTuple;
+namespace paddle {
+namespace inference {
+namespace anakin {
+
+class TestAnakinEngine : public ::testing::Test {
+ protected:
+  void SetUp() override;
+  void TearDown() override {}
+
+ protected:
+  using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
+  std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
+};
+
+void TestAnakinEngine::SetUp() {
+  engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
+}
+
+TEST_F(TestAnakinEngine, Execute) {
+  engine_->AddOp("op1", "Dense", {"x"}, {"y"});
+  engine_->AddOpAttr("op1", "out_dim", 2);
+  engine_->AddOpAttr("op1", "bias_term", false);
+  engine_->AddOpAttr("op1", "axis", 1);
+  std::vector<int> shape = {1, 1, 1, 2};
+  Shape tmp_shape(shape);
+  // PBlock<NV> weight1(tmp_shape);
+  auto *weight1 =
+      GraphGlobalMem<NV>::Global().template new_block<AK_FLOAT>(tmp_shape);
+  // auto *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
+
+  float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
+  cpu_data[0] = 2.;
+  weight1->d_tensor().set_shape(tmp_shape);
+  weight1->d_tensor().copy_from(weight1->h_tensor());
+  engine_->AddOpAttr("op1", "weight_1", *weight1);
+
+  engine_->Freeze();
+  // PTuple<int> input_shape = {1};
+  // engine_->AddOpAttr("x", "input_shape", input_shape);
+  engine_->SetInputShape("x", {1, 1, 1, 1});
+  engine_->Optimize();
+  engine_->InitGraph();
+  framework::LoDTensor x;
+  framework::LoDTensor y;
+  x.Resize({1, 1, 1, 1});
+  y.Resize({1, 1, 1, 2});
+  auto *x_data = x.mutable_data<float>(platform::CUDAPlace());
+  float x_data_cpu[] = {1.};
+  cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice);
+
+  std::map<std::string, framework::LoDTensor *> inputs = {{"x", &x}};
+  auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
+  std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};
+
+  engine_->Execute(inputs, outputs);
+  auto *y_data_gpu = y_data;
+  float y_data_cpu[2];
+  cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);
+  LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
+}
+
+}  // namespace anakin
+}  // namespace inference
+}  // namespace paddle
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
index f9c13c2fa84b3b5d629297d3f44a6f5889a734f4..92c24647e87a096e7cfbbf69876b678fe48842a4 100644
--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #include "paddle/fluid/inference/api/paddle_pass_builder.h"
-
+#ifdef PADDLE_WITH_CUDA
+#include <cudnn.h>
+#endif
 #include <glog/logging.h>
 
 namespace paddle {
diff --git a/paddle/fluid/operators/activation_cudnn.cu.cc b/paddle/fluid/operators/activation_cudnn.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..494c02374a9faa22486644c9b9c7d586c86d41b0
--- /dev/null
+++ b/paddle/fluid/operators/activation_cudnn.cu.cc
@@ -0,0 +1,40 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/activation_op.h"
+#include "paddle/fluid/platform/cudnn_desc.h"
+
+namespace paddle {
+namespace operators {
+using framework::Tensor;
+using platform::ActivationDescriptor;
+using platform::TensorDescriptor;
+
+template <typename Functor>
+class CudnnActivationKernel
+    : public framework::OpKernel<Functor::ElEWISE_TYPE> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    framework::Tensor *X, *Out;
+    ExtractActivationTensor(context, X, Out);
+    ActivationDescriptor act_desc;
+    TensorDescriptor x_desc, out_desc;
+    x_desc.set(detail::Ref(X));
+    out_desc.set(detail::Ref(Out));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/activation_cudnn_op.cu.cc b/paddle/fluid/operators/activation_cudnn_op.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a382414d5c473a9c36f92a9af56837da819e96a4
--- /dev/null
+++ b/paddle/fluid/operators/activation_cudnn_op.cu.cc
@@ -0,0 +1,175 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/activation_op.h"
+#include "paddle/fluid/platform/cudnn_desc.h"
+
+namespace paddle {
+namespace operators {
+using framework::Tensor;
+using platform::ActivationDescriptor;
+using platform::TensorDescriptor;
+using platform::CUDADeviceContext;
+
+template <typename T>
+struct CudnnActivationFunctor {
+  using ELEMENT_TYPE = T;
+  CudnnActivationFunctor(const CUDADeviceContext& ctx, const T& c,
+                         const cudnnActivationMode_t& m)
+      : ctx_(ctx), coef_(c), mode_(m) {}
+  void operator()(const Tensor& x, Tensor* out) {
+    ActivationDescriptor act_desc;
+    act_desc.set(mode_, coef_);
+    TensorDescriptor x_desc, out_desc;
+    x_desc.set(x);
+    out_desc.set(detail::Ref(out));
+    PADDLE_ENFORCE(platform::dynload::cudnnActivationForward(
+        ctx_.cudnn_handle(), act_desc.desc(),
+        platform::CudnnDataType<T>::kOne(), x_desc.desc(), x.data<T>(),
+        platform::CudnnDataType<T>::kZero(), out_desc.desc(),
+        out->mutable_data<T>(ctx_.GetPlace())));
+  }
+  const CUDADeviceContext& ctx_;
+  const T coef_;
+  const cudnnActivationMode_t mode_;
+};
+
+template <typename T>
+struct CudnnActivationGradFunctor {
+  using ELEMENT_TYPE = T;
+  CudnnActivationGradFunctor(const CUDADeviceContext& ctx, const T& c,
+                             const cudnnActivationMode_t& m)
+      : ctx_(ctx), coef_(c), mode_(m) {}
+  void operator()(const Tensor& x, const Tensor& out, const Tensor dout,
+                  Tensor* dx) {
+    ActivationDescriptor act_desc;
+    act_desc.set(mode_, coef_);
+    TensorDescriptor x_desc, out_desc, dout_desc, dx_desc;
+    x_desc.set(x);
+    out_desc.set(out);
+    dout_desc.set(dout);
+    dx_desc.set(detail::Ref(dx));
+    PADDLE_ENFORCE(platform::dynload::cudnnActivationBackward(
+        ctx_.cudnn_handle(), act_desc.desc(),
+        platform::CudnnDataType<T>::kOne(), out_desc.desc(), out.data<T>(),
+        dout_desc.desc(), dout.data<T>(), x_desc.desc(), x.data<T>(),
+        platform::CudnnDataType<T>::kZero(), dx_desc.desc(),
+        dx->mutable_data<T>(ctx_.GetPlace())));
+  }
+  const CUDADeviceContext& ctx_;
+  const T coef_;
+  const cudnnActivationMode_t mode_;
+};
+
+template <typename T>
+struct CudnnReluFunctor : public CudnnActivationFunctor<T> {
+  explicit CudnnReluFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
+};
+template <typename T>
+struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
+  explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
+};
+
+template <typename T>
+struct CudnnRelu6Functor : public CudnnActivationFunctor<T> {
+  explicit CudnnRelu6Functor(const CUDADeviceContext& ctx)
+      : CudnnActivationFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {}
+};
+template <typename T>
+struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
+  explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {
+  }
+};
+
+template <typename T>
+struct CudnnSigmoidFunctor : public CudnnActivationFunctor<T> {
+  explicit CudnnSigmoidFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
+};
+template <typename T>
+struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
+  explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
+};
+
+template <typename T>
+struct CudnnTanhFunctor : public CudnnActivationFunctor<T> {
+  explicit CudnnTanhFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
+};
+template <typename T>
+struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
+  explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
+      : CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
+};
+
+template <typename Functor>
+class CudnnActivationKernel
+    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
+ public:
+  using T = typename Functor::ELEMENT_TYPE;
+  void Compute(const framework::ExecutionContext& context) const override {
+    const framework::Tensor* X = nullptr;
+    framework::Tensor* Out = nullptr;
+    ExtractActivationTensor(context, &X, &Out);
+    Out->mutable_data<T>(context.GetPlace());
+    auto& dev_ctx = context.template device_context<CUDADeviceContext>();
+    Functor functor(dev_ctx);
+    functor(detail::Ref(X), Out);
+  }
+};
+
+template <typename Functor>
+class CudnnActivationGradKernel
+    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
+ public:
+  using T = typename Functor::ELEMENT_TYPE;
+  void Compute(const framework::ExecutionContext& context) const override {
+    const framework::Tensor *X, *Out, *dOut;
+    X = Out = dOut = nullptr;
+    framework::Tensor* dX = nullptr;
+    ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
+    dX->mutable_data<T>(context.GetPlace());
+    auto& dev_ctx = context.template device_context<CUDADeviceContext>();
+    Functor functor(dev_ctx);
+    functor(detail::Ref(X), detail::Ref(Out), detail::Ref(dOut), dX);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace plat = paddle::platform;
+namespace ops = paddle::operators;
+
+#define FOR_EACH_CUDNN_OP_FUNCTOR(__macro)                  \
+  __macro(relu, CudnnReluFunctor, CudnnReluGradFunctor);    \
+  __macro(relu6, CudnnRelu6Functor, CudnnRelu6GradFunctor); \
+  __macro(sigmoid, CudnnTanhFunctor, CudnnTanhGradFunctor); \
+  __macro(tanh, CudnnTanhFunctor, CudnnTanhGradFunctor)
+
+#define REGISTER_ACTIVATION_CUDNN_KERNEL(act_type, functor, grad_functor) \
+  REGISTER_OP_KERNEL(act_type, CUDNN, plat::CUDAPlace,                    \
+                     ops::CudnnActivationKernel<ops::functor<float>>,     \
+                     ops::CudnnActivationKernel<ops::functor<double>>);   \
+  REGISTER_OP_KERNEL(                                                     \
+      act_type##_grad, CUDNN, plat::CUDAPlace,                            \
+      ops::CudnnActivationGradKernel<ops::grad_functor<float>>,           \
+      ops::CudnnActivationGradKernel<ops::grad_functor<double>>);
+
+FOR_EACH_CUDNN_OP_FUNCTOR(REGISTER_ACTIVATION_CUDNN_KERNEL);
diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc
index 65efe2966ce12e86ba7f4944eb57ae72cdf9796f..2feb8e4c4787440fd086c597fa2a7f97204e34ac 100644
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -16,29 +16,36 @@ limitations under the License. */
 #include <string>
 #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
 #include "paddle/fluid/platform/port.h"
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/platform/cudnn_helper.h"
+#endif
 
 namespace paddle {
 namespace operators {
 
 using paddle::framework::Tensor;
 
-#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT)                \
-  class OP_NAME##OpMaker                                                 \
-      : public ::paddle::framework::OpProtoAndCheckerMaker {             \
-   public:                                                               \
-    void Make() override {                                               \
-      AddInput("X", "Input of " #OP_NAME " operator");                   \
-      AddOutput("Out", "Output of " #OP_NAME " operator");               \
-      AddAttr<bool>("use_mkldnn",                                        \
-                    "(bool, default false) Only used in mkldnn kernel")  \
-          .SetDefault(false);                                            \
-      AddAttr<bool>(                                                     \
-          "is_test",                                                     \
-          "(bool, default false) Set to true for inference only, false " \
-          "for training. Some layers may run faster when this is true.") \
-          .SetDefault(false);                                            \
-      AddComment(OP_COMMENT);                                            \
-    }                                                                    \
+#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT)                    \
+  class OP_NAME##OpMaker                                                     \
+      : public ::paddle::framework::OpProtoAndCheckerMaker {                 \
+   public:                                                                   \
+    void Make() override {                                                   \
+      AddInput("X", "Input of " #OP_NAME " operator");                       \
+      AddOutput("Out", "Output of " #OP_NAME " operator");                   \
+      AddAttr<bool>("use_mkldnn",                                            \
+                    "(bool, default false) Only used in mkldnn kernel")      \
+          .SetDefault(false);                                                \
+      AddAttr<bool>("use_cudnn",                                             \
+                    "(bool, default false) Only used in cudnn kernel, need " \
+                    "install cudnn")                                         \
+          .SetDefault(false);                                                \
+      AddAttr<bool>(                                                         \
+          "is_test",                                                         \
+          "(bool, default false) Set to true for inference only, false "     \
+          "for training. Some layers may run faster when this is true.")     \
+          .SetDefault(false);                                                \
+      AddComment(OP_COMMENT);                                                \
+    }                                                                        \
   }
 
 #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE)              \
@@ -67,6 +74,12 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
                                       const std::string& name) {
   framework::LibraryType library{framework::LibraryType::kPlain};
   framework::DataLayout layout = framework::DataLayout::kAnyLayout;
+#ifdef PADDLE_WITH_CUDA
+  auto it1 = oper.Attrs().find("use_cudnn");
+  if (it1 != oper.Attrs().end() && platform::CanCUDNNBeUsed(ctx)) {
+    library = framework::LibraryType::kCUDNN;
+  }
+#endif
 #ifdef PADDLE_WITH_MKLDNN
   auto it = oper.Attrs().find("use_mkldnn");
   if (library == framework::LibraryType::kPlain && it != oper.Attrs().end() &&
diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h
index c7df3ea58a91579e35ff0d486516271a6daf054f..1f5ae7fb5cd2e1c14190602d2c35e6c3755cfd70 100644
--- a/paddle/fluid/operators/activation_op.h
+++ b/paddle/fluid/operators/activation_op.h
@@ -11,6 +11,7 @@ limitations under the License. */
 
 #pragma once
 #include <glog/logging.h>
+#include <algorithm>
 #include <string>
 #include <unordered_set>
 #include <utility>
@@ -24,6 +25,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/detail/safe_ref.h"
+#include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/platform/float16.h"
 
 #ifdef PADDLE_WITH_MKLDNN
@@ -41,53 +43,115 @@ static std::unordered_set<std::string> InplaceOpSet = {
     "floor",   "reciprocal", "relu6", "soft_relu", "hard_sigmoid",
 };
 
+static bool IsInplace(const std::string& op) {
+  bool inplace = InplaceOpSet.count(op);
+  // for op_grad
+  const int kGradSuffixLen = 4;
+  if (op.size() > kGradSuffixLen &&
+      op.compare(op.size() - kGradSuffixLen - 1, kGradSuffixLen, "grad")) {
+    inplace =
+        InplaceOpSet.count(op.substr(0, op.size() - (kGradSuffixLen + 1)));
+  }
+  return inplace;
+}
+
 /* The following operator can be used to process SelectedRows, because the
  * output of those operator for zero is zero too.
  */
 static std::unordered_set<std::string> CanBeUsedBySelectedRows = {
     "abs", "abs_grad", "square", "square_grad", "sqrt", "sqrt_grad"};
 
-static bool IsInplace(std::string op) { return InplaceOpSet.count(op); }
-
-template <typename DeviceContext, typename Functor>
-class ActivationKernel
-    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
- public:
-  using T = typename Functor::ELEMENT_TYPE;
-
-  void Compute(const framework::ExecutionContext& context) const override {
+inline void ExtractActivationTensor(const framework::ExecutionContext& context,
+                                    const framework::Tensor** X,
+                                    framework::Tensor** Out) {
+  auto x_var = context.InputVar("X");
+  auto out_var = context.OutputVar("Out");
+  PADDLE_ENFORCE(x_var != nullptr,
+                 "Cannot get input Variable X, variable name = %s",
+                 context.op().Input("X"));
+  PADDLE_ENFORCE(out_var != nullptr,
+                 "Cannot get output Variable Out, variable name = %s",
+                 context.op().Output("Out"));
+  if (CanBeUsedBySelectedRows.count(context.op().Type())) {
+    *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
+    *Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
+        out_var);
+  } else {
+    *X = context.Input<framework::Tensor>("X");
+    *Out = context.Output<framework::Tensor>("Out");
+  }
+
+  PADDLE_ENFORCE(*Out != nullptr,
+                 "Cannot get output tensor Out, variable name = %s",
+                 context.op().Output("Out"));
+}
+
+inline void ExtractActivationGradTensor(
+    const framework::ExecutionContext& context, const framework::Tensor** X,
+    const framework::Tensor** Out, const framework::Tensor** dOut,
+    framework::Tensor** dX) {
+  auto out_var = context.InputVar("Out");
+  auto out_grad_var = context.InputVar(framework::GradVarName("Out"));
+  auto x_grad_var = context.OutputVar(framework::GradVarName("X"));
+  PADDLE_ENFORCE(out_var != nullptr,
+                 "Cannot get input Variable Out, variable name = %s",
+                 context.op().Input("Out"));
+  PADDLE_ENFORCE(out_grad_var != nullptr,
+                 "Cannot get input Variable %s, variable name = %s",
+                 framework::GradVarName("Out"),
+                 context.op().Input(framework::GradVarName("Out")));
+  PADDLE_ENFORCE(x_grad_var != nullptr,
+                 "Cannot get output Variable %s, variable name = %s",
+                 framework::GradVarName("X"),
+                 context.op().Output(framework::GradVarName("X")));
+
+  if (CanBeUsedBySelectedRows.count(context.op().Type())) {
+    *Out = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var);
+    *dOut = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
+        *out_grad_var);
+    *dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
+        x_grad_var);
+  } else {
+    *Out = context.Input<framework::Tensor>("Out");
+    *dOut = context.Input<framework::Tensor>(framework::GradVarName("Out"));
+    *dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
+  }
+  PADDLE_ENFORCE(*dX != nullptr,
+                 "Cannot get output tensor %s, variable name = %s",
+                 framework::GradVarName("X"),
+                 context.op().Output(framework::GradVarName("X")));
+
+  bool inplace = IsInplace(context.op().Type());
+  if (!inplace) {
     auto x_var = context.InputVar("X");
-    auto out_var = context.OutputVar("Out");
     PADDLE_ENFORCE(x_var != nullptr,
-                   "Cannot get input Variable X, variable name = %s",
+                   "Cannot get input tensor X, variable name = %s",
                    context.op().Input("X"));
-    PADDLE_ENFORCE(out_var != nullptr,
-                   "Cannot get output Variable Out, variable name = %s",
-                   context.op().Output("Out"));
-
-    framework::Tensor X, *Out;
-
     if (CanBeUsedBySelectedRows.count(context.op().Type())) {
-      X = detail::Ref(
-          paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var),
-          "Cannot get input Tensor X, variable name = %s",
-          context.op().Input("X"));
-      Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
-          out_var);
+      *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var);
     } else {
-      X = detail::Ref(context.Input<framework::Tensor>("X"),
-                      "Cannot get input Tensor X, variable name = %s",
-                      context.op().Input("X"));
-      Out = context.Output<framework::Tensor>("Out");
+      *X = context.Input<framework::Tensor>("X");
     }
+  } else {
+    VLOG(10) << " Inplace activation of Op : " << context.op().Type();
+    *X = *dX;
+  }
+}
 
-    PADDLE_ENFORCE(Out != nullptr,
-                   "Cannot get output tensor Out, variable name = %s",
-                   context.op().Output("Out"));
+template <typename DeviceContext, typename Functor>
+class ActivationKernel
+    : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
+ public:
+  using T = typename Functor::ELEMENT_TYPE;
 
+  void Compute(const framework::ExecutionContext& context) const override {
+    const framework::Tensor* X = nullptr;
+    framework::Tensor* Out = nullptr;
+    ExtractActivationTensor(context, &X, &Out);
     Out->mutable_data<T>(context.GetPlace());
-    auto x = framework::EigenVector<T>::Flatten(X);
-    auto out = framework::EigenVector<T>::Flatten(*Out);
+
+    auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
+    auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
     auto* place =
         context.template device_context<DeviceContext>().eigen_device();
     Functor functor;
@@ -106,55 +170,15 @@ class ActivationGradKernel
  public:
   using T = typename Functor::ELEMENT_TYPE;
   void Compute(const framework::ExecutionContext& context) const override {
-    auto out_var = context.InputVar("Out");
-    auto out_grad_var = context.InputVar(framework::GradVarName("Out"));
-    auto x_grad_var = context.OutputVar(framework::GradVarName("X"));
-    PADDLE_ENFORCE(out_var != nullptr,
-                   "Cannot get input Variable Out, variable name = %s",
-                   context.op().Input("Out"));
-    PADDLE_ENFORCE(out_grad_var != nullptr,
-                   "Cannot get input Variable %s, variable name = %s",
-                   framework::GradVarName("Out"),
-                   context.op().Input(framework::GradVarName("Out")));
-    PADDLE_ENFORCE(x_grad_var != nullptr,
-                   "Cannot get output Variable %s, variable name = %s",
-                   framework::GradVarName("X"),
-                   context.op().Output(framework::GradVarName("X")));
-
-    framework::Tensor Out, dOut, *dX;
-    if (CanBeUsedBySelectedRows.count(context.op().Type())) {
-      Out = detail::Ref(
-          paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*out_var),
-          "Cannot get input Tensor Out, variable name = %s",
-          context.op().Input("Out"));
-      dOut =
-          detail::Ref(paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(
-                          *out_grad_var),
-                      "Cannot get input Tensor %s, variable name = %s",
-                      framework::GradVarName("Out"),
-                      context.op().Input(framework::GradVarName("Out")));
-      dX = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(
-          x_grad_var);
-    } else {
-      Out = detail::Ref(context.Input<framework::Tensor>("Out"),
-                        "Cannot get input Tensor Out, variable name = %s",
-                        context.op().Input("Out"));
-      dOut = detail::Ref(
-          context.Input<framework::Tensor>(framework::GradVarName("Out")),
-          "Cannot get input Tensor %s, variable name = %s",
-          framework::GradVarName("Out"),
-          context.op().Input(framework::GradVarName("Out")));
-      dX = context.Output<framework::Tensor>(framework::GradVarName("X"));
-    }
-    PADDLE_ENFORCE(dX != nullptr,
-                   "Cannot get output tensor %s, variable name = %s",
-                   framework::GradVarName("X"),
-                   context.op().Output(framework::GradVarName("X")));
+    const framework::Tensor *X, *Out, *dOut;
+    framework::Tensor* dX = nullptr;
+    X = Out = dOut = nullptr;
+    ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
     dX->mutable_data<T>(context.GetPlace());
-
-    auto dout = framework::EigenVector<T>::Flatten(dOut);
-    auto out = framework::EigenVector<T>::Flatten(Out);
-    auto dx = framework::EigenVector<T>::Flatten(*dX);
+    auto dout = framework::EigenVector<T>::Flatten(detail::Ref(dOut));
+    auto out = framework::EigenVector<T>::Flatten(detail::Ref(Out));
+    auto dx = framework::EigenVector<T>::Flatten(detail::Ref(dX));
+    auto x = framework::EigenVector<T>::Flatten(detail::Ref(X));
     auto* place =
         context.template device_context<DeviceContext>().eigen_device();
     Functor functor;
@@ -162,27 +186,7 @@ class ActivationGradKernel
     for (auto& attr : attrs) {
       *attr.second = context.Attr<float>(attr.first);
     }
-    bool inplace = functor.Inplace();
-    if (!inplace) {
-      auto x_var = context.InputVar("X");
-      PADDLE_ENFORCE(x_var != nullptr,
-                     "Cannot get input tensor X, variable name = %s",
-                     context.op().Input("X"));
-      framework::Tensor X;
-      if (CanBeUsedBySelectedRows.count(context.op().Type())) {
-        X = detail::Ref(
-            paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var));
-      } else {
-        X = detail::Ref(context.Input<framework::Tensor>("X"));
-      }
-
-      auto x = framework::EigenVector<T>::Flatten(X);
-      functor(*place, x, out, dout, dx);
-    } else {
-      VLOG(10) << " Inplace activation ";
-      auto x = framework::EigenVector<T>::Flatten(*dX);
-      functor(*place, x, out, dout, dx);
-    }
+    functor(*place, x, out, dout, dx);
   }
 };
 
@@ -214,7 +218,6 @@ struct SigmoidFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("sigmoid"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -269,7 +272,6 @@ struct ExpFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct ExpGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("exp"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -288,7 +290,6 @@ struct ReluFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct ReluGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("relu"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -301,8 +302,28 @@ template <typename T>
 struct GeluFunctor : public BaseActivationFunctor<T> {
   template <typename Device, typename X, typename Out>
   void operator()(Device d, X x, Out out) const {
+// Because the execute or device context can not be deliver here, it keep the
+// marco for NVCC.
+#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
+    !defined(__OSX__) && !defined(PADDLE_WITH_CUDA)
+    auto x_data = x.data();
+    auto out_data = out.data();
+    int n = std::min(x.size(), out.size());
+
+    std::memset(out_data, 0, n * sizeof(T));
+    math::CBlas<T>::AXPY(n, static_cast<T>(M_SQRT1_2), x_data, 1, out_data, 1);
+    math::CBlas<T>::VMERF(n, out_data, out_data, VML_LA);
+    for (int i = 0; i < n; i++) {
+      out_data[i] += static_cast<T>(1);
+    }
+    math::CBlas<T>::VMUL(n, x_data, out_data, out_data);
+    for (int i = 0; i < n; i++) {
+      out_data[i] *= static_cast<T>(0.5);
+    }
+#else
     auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
     out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
+#endif
   }
 };
 
@@ -331,7 +352,6 @@ struct TanhFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct TanhGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("tanh"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -437,7 +457,6 @@ struct SqrtFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct SqrtGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("sqrt"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -456,7 +475,6 @@ struct CeilFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct ZeroGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("ceil"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -573,7 +591,6 @@ struct ReciprocalFunctor : public BaseActivationFunctor<T> {
 
 template <typename T>
 struct ReciprocalGradFunctor : public BaseActivationFunctor<T> {
-  bool Inplace() const { return IsInplace("reciprocal"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -673,7 +690,6 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
     return {{"threshold", &threshold}};
   }
-  bool Inplace() const { return IsInplace("relu6"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -755,7 +771,6 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
     return {{"threshold", &threshold}};
   }
-  bool Inplace() const { return IsInplace("soft_relu"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
@@ -936,7 +951,6 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
   typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
     return {{"slope", &slope}, {"offset", &offset}};
   }
-  bool Inplace() { return IsInplace("hard_sigmoid"); }
   template <typename Device, typename X, typename Out, typename dOut,
             typename dX>
   void operator()(Device d, X x, Out out, dOut dout, dX dx) const {
diff --git a/paddle/fluid/operators/alloc_continuous_space_op.cc b/paddle/fluid/operators/alloc_continuous_space_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..df0e9911cf7186e952cfd7fbf7f43889e9098c84
--- /dev/null
+++ b/paddle/fluid/operators/alloc_continuous_space_op.cc
@@ -0,0 +1,211 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/var_type.h"
+#include "paddle/fluid/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+static framework::proto::VarType::Type kDefaultDtype =
+    framework::proto::VarType::Type::VarType_Type_BOOL;
+
+template <typename DeviceContext, typename T>
+class AllocContinuousSpaceKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &context) const override {
+    auto &in_var_names = context.Inputs("Input");
+    auto &out_var_names = context.Outputs("Output");
+    auto &in_vars = context.MultiInputVar("Input");
+    auto out_vars = context.MultiOutputVar("Output");
+
+    PADDLE_ENFORCE_GT(in_var_names.size(), static_cast<size_t>(0));
+    PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size());
+
+    for (size_t i = 0; i < in_var_names.size(); ++i) {
+      // Only support LoDTensor
+      PADDLE_ENFORCE_NOT_NULL(in_vars[i], "%s should not be nullptr,",
+                              in_var_names[i]);
+      PADDLE_ENFORCE_NOT_NULL(out_vars[i], "%s should not be nullptr,",
+                              out_var_names[i]);
+      PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensor>());
+      PADDLE_ENFORCE(out_vars[i]->IsType<framework::LoDTensor>());
+    }
+
+    auto in_tensors = context.MultiInput<framework::LoDTensor>("Input");
+
+    if (context.Attr<bool>("check_name")) {
+      for (size_t i = 0; i < in_var_names.size(); ++i) {
+        PADDLE_ENFORCE_EQ(in_var_names[i], out_var_names[i]);
+      }
+    } else {
+      // Init the output as input
+      for (size_t i = 0; i < in_tensors.size(); ++i) {
+        out_vars[i]->GetMutable<framework::LoDTensor>()->Resize(
+            in_tensors[i]->dims());
+      }
+    }
+
+    auto &dev_ctx = context.template device_context<DeviceContext>();
+
+    // Get numel and dtype
+    size_t numel = 0;
+    auto dtype = kDefaultDtype;
+    GetMemSizeAndDtype(in_tensors, in_var_names, &numel, &dtype);
+
+    // Alloc the continuous space
+    auto fused_tensor = context.Output<framework::LoDTensor>("FusedOutput");
+    fused_tensor->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
+        .mutable_data(context.GetPlace(), dtype);
+
+    // Init the continuous space
+    auto out_tensors = context.MultiOutput<framework::LoDTensor>("Output");
+    int64_t offset = 0;
+    if (context.Attr<bool>("copy_data")) {
+      for (size_t i = 0; i < in_var_names.size(); ++i) {
+        int64_t len = out_tensors[i]->numel();
+        auto sub_tensor = fused_tensor->Slice(offset, offset + len);
+        offset += len;
+        framework::TensorCopy(*out_tensors[i], context.GetPlace(), dev_ctx,
+                              &sub_tensor);
+      }
+    } else if (context.Attr<bool>("set_constant")) {
+      math::SetConstant<DeviceContext, T> set_constant;
+      set_constant(dev_ctx, fused_tensor,
+                   static_cast<T>(context.Attr<float>("constant")));
+    }
+
+    // Make the outputs point to the continuous space.
+    offset = 0;
+    for (size_t i = 0; i < out_tensors.size(); ++i) {
+      int64_t len = out_tensors[i]->numel();
+      auto dim = out_tensors[i]->dims();
+      out_tensors[i]
+          ->ShareDataWith(fused_tensor->Slice(offset, offset + len))
+          .Resize(dim);
+      offset += len;
+      VLOG(10) << "alloc_space_for_vars: output(" << out_var_names[i]
+               << ") ,dim:(" << dim << ")"
+               << " Address: " << out_tensors[i]->data<void>();
+    }
+  }
+
+  void GetMemSizeAndDtype(
+      const std::vector<const framework::LoDTensor *> &lod_tensors,
+      const std::vector<std::string> var_names, size_t *numel,
+      framework::proto::VarType::Type *dtype) const {
+    PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size());
+    *numel = 0;
+    for (size_t i = 0; i < var_names.size(); ++i) {
+      PADDLE_ENFORCE(lod_tensors[i]->IsInitialized(), "%s is not initialized.",
+                     var_names[i]);
+
+      auto p_dtype = lod_tensors[i]->type();
+      if (*dtype == kDefaultDtype) {
+        PADDLE_ENFORCE_NE(p_dtype, kDefaultDtype, "%s's type should not be %s.",
+                          var_names[i], kDefaultDtype);
+        *dtype = p_dtype;
+      }
+      PADDLE_ENFORCE_EQ(p_dtype, *dtype, "Input vars is not equal.");
+
+      auto size = lod_tensors[i]->numel();
+      PADDLE_ENFORCE_GT(size, 0);
+      VLOG(10) << "alloc_space_for_vars: input(" << var_names[i] << ") ,dim:("
+               << lod_tensors[i]->dims() << ")";
+      *numel += size;
+    }
+  }
+};
+
+class AllocContinuousSpaceOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {}
+};
+
+class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("Input",
+             "(vector<LoDTensor>) The input tensors of"
+             " alloc_continuous_space operator.")
+        .AsDuplicable();
+    AddOutput("Output",
+              "(vector<LoDTensor>) The output "
+              "tensors of alloc_continuous_space operator. And the address "
+              "of output tensors are continuous, they are sliced from the "
+              "tensor of FusedOutput.")
+        .AsDuplicable();
+    AddOutput("FusedOutput",
+              "(LoDTensor) The output tensor "
+              "of alloc_continuous_space operator. And the tensors of"
+              " Output is sliced from the tensor of FusedOutput.");
+    AddAttr<bool>("copy_data", "Whether to copy the Input value to Output.")
+        .SetDefault(false);
+    AddAttr<bool>("set_constant",
+                  "Whether to set the Output with a constant value.")
+        .SetDefault(false);
+    AddAttr<float>("constant",
+                   "If set_constant is true, the constant value will be used "
+                   "to set the Output.")
+        .SetDefault(0.0);
+    AddAttr<bool>("check_name",
+                  "Whether to check the name of Input and Output to ensure "
+                  "they are the same separately.")
+        .SetDefault(false);
+    AddComment(R"DOC(
+AllocContinuousSpace Operator.
+
+alloc_continuous_space is used to make the address of Output
+continuous according to the Input. This Op will alloc a big tensor
+according to the tensors of Input, the dtype is the same with those input tensors,
+the size is the sum of those input tensors' numel, and the dim of the big
+tensor is {sum(numel)}. And the big tensor is stored in FusedOutput.
+The tensors of Output are sliced from the tensor of FusedOutput.
+Note that, the dtype of Input should be the same, and the dim of Input
+and Output should equal.
+The tensors of Input and Output could be the same or different. And
+alloc_continuous_space allows copying the value of Input to Output, or
+setting the Output with a constant value.
+
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OPERATOR(alloc_continuous_space,
+                  paddle::operators::AllocContinuousSpaceOp,
+                  paddle::operators::AllocContinuousSpaceOpMaker);
+namespace ops = paddle::operators;
+REGISTER_OP_CPU_KERNEL(
+    alloc_continuous_space,
+    ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::AllocContinuousSpaceKernel<paddle::platform::CPUDeviceContext,
+                                    double>);
+
+#ifdef PADDLE_WITH_CUDA
+REGISTER_OP_CUDA_KERNEL(
+    alloc_continuous_space,
+    ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::AllocContinuousSpaceKernel<paddle::platform::CUDADeviceContext,
+                                    double>);
+#endif
diff --git a/paddle/fluid/operators/benchmark/op_tester.cc b/paddle/fluid/operators/benchmark/op_tester.cc
index e179de56cddc5fada2e5833086d351659a7cf540..064903c299d947df3c6b42d916fce8dcbd85eebb 100644
--- a/paddle/fluid/operators/benchmark/op_tester.cc
+++ b/paddle/fluid/operators/benchmark/op_tester.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/benchmark/op_tester.h"
+#include <fstream>
 #include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/framework/op_info.h"
@@ -28,6 +29,7 @@ namespace operators {
 namespace benchmark {
 
 DEFINE_string(op_config_list, "", "Path of op config file.");
+DEFINE_int32(specified_config_id, -1, "Test the specified op config.");
 
 void OpTester::Init(const std::string &filename) {
   Init(OpTesterConfig(filename));
@@ -147,7 +149,7 @@ void OpTester::CreateInputVarDesc() {
     var->SetShape(input->dims);
 
     op_desc_.SetInput(name, {var_name});
-    inputs_.push_back(var_name);
+    input_lods_[var_name] = input->lod;
   }
 }
 
@@ -162,7 +164,6 @@ void OpTester::CreateOutputVarDesc() {
     var->SetDataType(framework::proto::VarType::FP32);
 
     op_desc_.SetOutput(name, {var_name});
-    outputs_.push_back(var_name);
   }
 }
 
@@ -218,16 +219,26 @@ void OpTester::CreateVariables(framework::Scope *scope) {
     }
   }
 
-  // Allocate memory for input tensor
-  for (auto &name : inputs_) {
-    VLOG(3) << "Allocate memory for tensor " << name;
-    auto &var_desc = vars_[name];
+  for (auto &item : input_lods_) {
+    // Allocate memory for input tensor
+    auto &var_name = item.first;
+    VLOG(3) << "Allocate memory for tensor " << var_name;
+
+    auto &var_desc = vars_[var_name];
     std::vector<int64_t> shape = var_desc->GetShape();
 
-    auto *var = scope->Var(name);
+    auto *var = scope->Var(var_name);
     auto *tensor = var->GetMutable<framework::LoDTensor>();
     SetupTensor<float>(tensor, shape, static_cast<float>(0.0),
                        static_cast<float>(1.0));
+
+    VLOG(3) << "Set lod for tensor " << var_name;
+    std::vector<std::vector<size_t>> &lod_vec = item.second;
+    framework::LoD lod;
+    for (size_t i = 0; i < lod_vec.size(); ++i) {
+      lod.push_back(lod_vec[i]);
+    }
+    tensor->set_lod(lod);
   }
 }
 
@@ -282,10 +293,32 @@ std::string OpTester::DebugString() {
 }
 
 TEST(op_tester, base) {
-  OpTester tester;
   if (!FLAGS_op_config_list.empty()) {
-    tester.Init(FLAGS_op_config_list);
+    std::ifstream fin(FLAGS_op_config_list, std::ios::in | std::ios::binary);
+    PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s",
+                   FLAGS_op_config_list.c_str());
+    std::vector<OpTesterConfig> op_configs;
+    while (!fin.eof()) {
+      OpTesterConfig config;
+      bool result = config.Init(fin);
+      if (result) {
+        op_configs.push_back(config);
+      }
+    }
+    if (FLAGS_specified_config_id >= 0 &&
+        FLAGS_specified_config_id < static_cast<int>(op_configs.size())) {
+      OpTester tester;
+      tester.Init(op_configs[FLAGS_specified_config_id]);
+      tester.Run();
+    } else {
+      for (size_t i = 0; i < op_configs.size(); ++i) {
+        OpTester tester;
+        tester.Init(op_configs[i]);
+        tester.Run();
+      }
+    }
   } else {
+    OpTester tester;
     OpTesterConfig config;
     config.op_type = "elementwise_add";
     config.inputs.resize(2);
@@ -294,8 +327,8 @@ TEST(op_tester, base) {
     config.inputs[1].name = "Y";
     config.inputs[1].dims = {64, 1};
     tester.Init(config);
+    tester.Run();
   }
-  tester.Run();
 }
 
 }  // namespace benchmark
diff --git a/paddle/fluid/operators/benchmark/op_tester.h b/paddle/fluid/operators/benchmark/op_tester.h
index 1723d46c47ed67199713e6d726c6245f34f7c224..8f150b23ad783acdfd203d471d578ab6aae71494 100644
--- a/paddle/fluid/operators/benchmark/op_tester.h
+++ b/paddle/fluid/operators/benchmark/op_tester.h
@@ -57,8 +57,7 @@ class OpTester {
   std::string type_;
   framework::OpDesc op_desc_;
   std::unordered_map<std::string, std::unique_ptr<framework::VarDesc>> vars_;
-  std::vector<std::string> inputs_;
-  std::vector<std::string> outputs_;
+  std::unordered_map<std::string, std::vector<std::vector<size_t>>> input_lods_;
   std::unique_ptr<framework::OperatorBase> op_;
   platform::Place place_;
   std::unique_ptr<framework::Scope> scope_;
diff --git a/paddle/fluid/operators/benchmark/op_tester_config.cc b/paddle/fluid/operators/benchmark/op_tester_config.cc
index 3db8de7f76801eb814b57859d6b95590761c96f3..8336804ec07d2b7b176f55ad4113452086296494 100644
--- a/paddle/fluid/operators/benchmark/op_tester_config.cc
+++ b/paddle/fluid/operators/benchmark/op_tester_config.cc
@@ -33,21 +33,64 @@ static bool EndWith(const std::string& str, const std::string& substr) {
   return str.rfind(substr) == (str.length() - substr.length());
 }
 
-static void EraseEndSep(std::string* str) {
-  std::string substr = kSepBetweenItems;
+static void EraseEndSep(std::string* str,
+                        std::string substr = kSepBetweenItems) {
   if (EndWith(*str, substr)) {
     str->erase(str->length() - substr.length(), str->length());
   }
 }
 
-static std::vector<int64_t> ParseDims(std::string dims_str) {
-  std::vector<int64_t> dims;
+void OpInputConfig::ParseDims(std::istream& is) {
+  std::string dims_str;
+  is >> dims_str;
+
+  dims.clear();
   std::string token;
   std::istringstream token_stream(dims_str);
   while (std::getline(token_stream, token, 'x')) {
     dims.push_back(std::stoi(token));
   }
-  return dims;
+}
+
+void OpInputConfig::ParseLoD(std::istream& is) {
+  std::string lod_str;
+  std::string start_sep =
+      std::string(kStartSeparator) + std::string(kStartSeparator);
+  std::string end_sep = std::string(kEndSeparator) + std::string(kEndSeparator);
+
+  std::string sep;
+  is >> sep;
+  if (StartWith(sep, start_sep)) {
+    lod_str += sep;
+    while (!EndWith(sep, end_sep)) {
+      is >> sep;
+      lod_str += sep;
+    }
+  }
+  EraseEndSep(&lod_str);
+  PADDLE_ENFORCE_GE(lod_str.length(), 4U);
+  VLOG(4) << "lod: " << lod_str << ", length: " << lod_str.length();
+
+  // Parse the lod_str
+  lod.clear();
+  for (size_t i = 1; i < lod_str.length() - 1;) {
+    if (lod_str[i] == '{') {
+      std::vector<size_t> level;
+      while (lod_str[i] != '}') {
+        ++i;
+
+        std::string number;
+        while (lod_str[i] >= '0' && lod_str[i] <= '9') {
+          number += lod_str[i];
+          ++i;
+        }
+        level.push_back(atoi(number.c_str()));
+      }
+      lod.push_back(level);
+    } else if (lod_str[i] == '}') {
+      ++i;
+    }
+  }
 }
 
 OpInputConfig::OpInputConfig(std::istream& is) {
@@ -60,9 +103,9 @@ OpInputConfig::OpInputConfig(std::istream& is) {
         is >> name;
         EraseEndSep(&name);
       } else if (sep == "dims" || sep == "dims:") {
-        std::string dims_str;
-        is >> dims_str;
-        dims = ParseDims(dims_str);
+        ParseDims(is);
+      } else if (sep == "lod" || sep == "lod:") {
+        ParseLoD(is);
       }
     }
   }
@@ -76,7 +119,7 @@ OpTesterConfig::OpTesterConfig(const std::string& filename) {
   Init(fin);
 }
 
-void OpTesterConfig::Init(std::istream& is) {
+bool OpTesterConfig::Init(std::istream& is) {
   std::string sep;
   is >> sep;
   if (sep == kStartSeparator) {
@@ -95,9 +138,40 @@ void OpTesterConfig::Init(std::istream& is) {
       } else if (sep == "input" || sep == "input:") {
         OpInputConfig input_config(is);
         inputs.push_back(input_config);
+      } else if (sep == "attrs" || sep == "attrs:") {
+        ParseAttrs(is);
+      } else {
+        if (sep != kEndSeparator) {
+          return false;
+        }
       }
     }
+  } else {
+    return false;
+  }
+  return true;
+}
+
+bool OpTesterConfig::ParseAttrs(std::istream& is) {
+  std::string sep;
+  is >> sep;
+  if (sep == kStartSeparator) {
+    while (true) {
+      std::string key;
+      is >> key;
+      if (key == kEndSeparator) {
+        break;
+      }
+
+      std::string value;
+      is >> value;
+      EraseEndSep(&key, ":");
+      EraseEndSep(&value);
+
+      attrs[key] = value;
+    }
   }
+  return true;
 }
 
 const OpInputConfig* OpTesterConfig::GetInput(const std::string& name) {
diff --git a/paddle/fluid/operators/benchmark/op_tester_config.h b/paddle/fluid/operators/benchmark/op_tester_config.h
index f7b62cb8ad03b410a2ea99fe4c2a8dc8a6bea7a7..c2ff6dafc053eb7202a686954d53ae6f3d62d02e 100644
--- a/paddle/fluid/operators/benchmark/op_tester_config.h
+++ b/paddle/fluid/operators/benchmark/op_tester_config.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <istream>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 namespace paddle {
@@ -26,19 +27,27 @@ struct OpInputConfig {
   OpInputConfig() {}
   explicit OpInputConfig(std::istream& is);
 
+  void ParseDims(std::istream& is);
+  void ParseLoD(std::istream& is);
+
   std::string name;
   std::vector<int64_t> dims;
+  std::vector<std::vector<size_t>> lod;
 };
 
 struct OpTesterConfig {
   OpTesterConfig() {}
   explicit OpTesterConfig(const std::string& filename);
-  void Init(std::istream& is);
+
+  bool Init(std::istream& is);
+
+  bool ParseAttrs(std::istream& is);
 
   const OpInputConfig* GetInput(const std::string& name);
 
   std::string op_type;
   std::vector<OpInputConfig> inputs;
+  std::unordered_map<std::string, std::string> attrs;
   int device_id{-1};  // CPU: -1
   int repeat{1};
   int profile{0};
diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc
index a37c8d3ccd9c3bb8fae8a5f198bc4db714301b68..ca6bc4df0fe2c6cddaf548d3e708e777172a0841 100644
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -81,6 +81,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
       framework::OpKernelType::kDefaultCustomizedTypeValue;
   framework::LibraryType library{framework::LibraryType::kPlain};
   // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
+  auto input_data_type = ctx.Input<Tensor>("Input")->type();
   std::string data_format = ctx.Attr<std::string>("data_format");
   framework::DataLayout layout = framework::StringToDataLayout(data_format);
 
@@ -94,11 +95,14 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
       platform::CanMKLDNNBeUsed(ctx)) {
     library = framework::LibraryType::kMKLDNN;
     layout = framework::DataLayout::kMKLDNN;
-    customized_type_value = kConvMKLDNNFP32;
+    customized_type_value =
+        (input_data_type == framework::DataTypeTrait<int8_t>::DataType ||
+         input_data_type == framework::DataTypeTrait<uint8_t>::DataType)
+            ? kConvMKLDNNINT8
+            : kConvMKLDNNFP32;
   }
 #endif
 
-  auto input_data_type = ctx.Input<Tensor>("Input")->type();
   if (input_data_type != framework::proto::VarType::INT8 &&
       input_data_type != framework::proto::VarType::UINT8) {
     auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc
index 1968e54b00601139e252f0480ca3ae1fc08904f4..3adc7baebddd06ced74afea1e77017beb57582e8 100644
--- a/paddle/fluid/operators/cross_entropy_op.cc
+++ b/paddle/fluid/operators/cross_entropy_op.cc
@@ -32,14 +32,23 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
     int rank = x_dims.size();
     PADDLE_ENFORCE_EQ(rank, label_dims.size(),
                       "Input(X) and Input(Label) shall have the same rank.");
-    PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                      framework::slice_ddim(label_dims, 0, rank - 1),
-                      "Input(X) and Input(Label) shall have the same shape "
-                      "except the last dimension.");
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "Input(X) and Input(Label) shall have the same shape "
+                        "except the last dimension.");
+    }
     if (ctx->Attrs().Get<bool>("soft_label")) {
-      PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
-                        "If Attr(soft_label) == true, the last dimension of "
-                        "Input(X) and Input(Label) should be equal.");
+      if (check) {
+        PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
+                          "If Attr(soft_label) == true, the last dimension of "
+                          "Input(X) and Input(Label) should be equal.");
+      }
     } else {
       PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
                         "If Attr(softLabel) == false, the last dimension of "
@@ -82,20 +91,32 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
                       "Input(Y@Grad) and Input(X) should have the same rank.");
     PADDLE_ENFORCE_EQ(label_dims.size(), rank,
                       "Input(Label) and Input(X) should have the same rank.");
-    PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                      framework::slice_ddim(label_dims, 0, rank - 1),
-                      "The Input(X) and Input(Label) should have the same "
-                      "shape except the last dimension.");
-    PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
-                      framework::slice_ddim(dy_dims, 0, rank - 1),
-                      "The Input(X) and Input(Y@Grad) should have the same "
-                      "shape except the last dimension.");
+
+    bool check = true;
+    if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 ||
+                                framework::product(label_dims) <= 0)) {
+      check = false;
+    }
+
+    if (check) {
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(label_dims, 0, rank - 1),
+                        "The Input(X) and Input(Label) should have the same "
+                        "shape except the last dimension.");
+      PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
+                        framework::slice_ddim(dy_dims, 0, rank - 1),
+                        "The Input(X) and Input(Y@Grad) should have the same "
+                        "shape except the last dimension.");
+    }
     PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
                       "The last dimension of Input(Y@Grad) should be 1.");
     if (ctx->Attrs().Get<bool>("soft_label")) {
-      PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
-                        "When Attr(soft_label) == true, the last dimension of "
-                        "Input(X) and Input(Label) should be equal.");
+      if (check) {
+        PADDLE_ENFORCE_EQ(
+            x_dims[rank - 1], label_dims[rank - 1],
+            "When Attr(soft_label) == true, the last dimension of "
+            "Input(X) and Input(Label) should be equal.");
+      }
     } else {
       PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
                         "When Attr(soft_label) == false, the last dimension of "
diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc
index d5bc25d19cba4de6f059612e3e8c4a65b2edd0f9..45bce6e5203f8c1dbb744e0f954f7f0a71c53372 100644
--- a/paddle/fluid/operators/data_norm_op.cc
+++ b/paddle/fluid/operators/data_norm_op.cc
@@ -140,9 +140,6 @@ class DataNormOpMaker : public framework::OpProtoAndCheckerMaker {
               "Scales of the history data batch, "
               "will apply to output when training")
         .AsIntermediate();
-    AddAttr<bool>("use_mkldnn",
-                  "(bool, default false) Only used in mkldnn kernel")
-        .SetDefault(false);
     AddComment(R"DOC(
 Data Normalization.
 
diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h
index f84405664596ebe25983e5acbbb82bfc18c38124..d3e26256b50f2d7010fee3738802d59173678b34 100644
--- a/paddle/fluid/operators/detection/prior_box_op.h
+++ b/paddle/fluid/operators/detection/prior_box_op.h
@@ -172,6 +172,10 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
         framework::make_ddim({1, static_cast<int>(variances.size())}),
         ctx.GetPlace());
     auto var_et = framework::EigenTensor<T, 2>::From(var_t);
+
+#ifdef PADDLE_WITH_MKLML
+#pragma omp parallel for
+#endif
     for (size_t i = 0; i < variances.size(); ++i) {
       var_et(0, i) = variances[i];
     }
@@ -181,8 +185,15 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
     vars->Resize({box_num, static_cast<int>(variances.size())});
 
     auto e_vars = framework::EigenMatrix<T, Eigen::RowMajor>::From(*vars);
-    e_vars = var_et.broadcast(Eigen::DSizes<int, 2>(box_num, 1));
 
+#ifdef PADDLE_WITH_MKLML
+#pragma omp parallel for collapse(2)
+#endif
+    for (int i = 0; i < box_num; ++i) {
+      for (int j = 0; j < variances.size(); ++j) {
+        e_vars(i, j) = variances[j];
+      }
+    }
     vars->Resize(var_dim);
   }
 };  // namespace operators
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
index 6a6741d8fc54d22addca91b75dfabf5950c1a35a..7aaa607f1585c98fe2dd816e8d66e5c6fd171e80 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -77,8 +77,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
       } else {
         functor.RunMidWise(n, pre, post);
       }
-      z->set_layout(DataLayout::kMKLDNN);
-      z->set_format(x->format());
+      z->set_mkldnn_prim_desc(x->get_mkldnn_prim_desc());
     } else {
       PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
                          x->format() != memory::format::format_undef,
@@ -116,7 +115,8 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
       auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd);
 
       // create mkldnn memory for dst
-      memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data);
+      auto dst_mem_pd = sum_pd.dst_primitive_desc();
+      memory dst_memory = memory(dst_mem_pd, z_data);
 
       std::vector<primitive::at> inputs;
       inputs.push_back(srcs[0]);
@@ -129,9 +129,7 @@ class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
       pipeline.push_back(sum_prim);
       stream(stream::kind::eager).submit(pipeline).wait();
 
-      z->set_layout(DataLayout::kMKLDNN);
-      z->set_format(
-          (memory::format)dst_memory.get_primitive_desc().desc().data.format);
+      z->set_mkldnn_prim_desc(dst_mem_pd);
     }
   }
 };
@@ -152,24 +150,19 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
     auto* out = dout;
     auto *x = dout, *y = dout;
 
-    auto set_mkldnn_format = [](Tensor* in, const Tensor* out) {
-      in->set_layout(DataLayout::kMKLDNN);
-      in->set_format(out->format());
-    };
-
     if (dx != nullptr && dy != nullptr && dx->dims() == dy->dims()) {
       if (dx->dims() == dy->dims()) {
         auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, T>(ctx);
         if (dx) {
           blas.VCOPY(dout->numel(), dout->data<T>(),
                      dx->mutable_data<T>(ctx.GetPlace()));
-          set_mkldnn_format(dx, dout);
+          dx->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc());
         }
 
         if (dy) {
           blas.VCOPY(dout->numel(), dout->data<T>(),
                      dy->mutable_data<T>(ctx.GetPlace()));
-          set_mkldnn_format(dy, dout);
+          dy->set_mkldnn_prim_desc(dout->get_mkldnn_prim_desc());
         }
       }
     } else {
diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc
index d51eb054a96d27f6ce87ba4b4e717f49dcd8a588..3bb07d383548e6f4be810c96d2a916c0fe5e45f5 100644
--- a/paddle/fluid/operators/fake_quantize_op.cc
+++ b/paddle/fluid/operators/fake_quantize_op.cc
@@ -31,7 +31,7 @@ template <typename T>
 struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> {
   void operator()(const platform::CPUDeviceContext& ctx, const T* in,
                   const int num, T* out) {
-    *out = *(std::max_element(in + 0, in + num, Compare<T>()));
+    *out = std::abs(*(std::max_element(in + 0, in + num, Compare<T>())));
   }
 };
 
@@ -46,10 +46,8 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
     platform::Transform<platform::CPUDeviceContext> trans;
     trans(ctx, in.data<T>(), in.data<T>() + in.numel(),
           out->mutable_data<T>(ctx.GetPlace()), ClipFunctor<T>(-s, s));
-    auto in_e = framework::EigenVector<T>::Flatten(in);
     auto out_e = framework::EigenVector<T>::Flatten(*out);
-
-    out_e.device(*ctx.eigen_device()) = (bin_cnt / s * in_e).round();
+    out_e.device(*ctx.eigen_device()) = (bin_cnt / s * out_e).round();
   }
 };
 
diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
index fe4c73f4723355d4b56d075423de29b45b9cd4e4..80caf70b08e65932d6ccb90a5293d072b2b2bc72 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
@@ -23,6 +23,9 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override {
+    if (ctx->IsRuntime()) {
+      return;
+    }
     PADDLE_ENFORCE(ctx->HasInput("W"),
                    "Input W of FusedEmbeddingSeqPoolOp should not be null.");
     PADDLE_ENFORCE(ctx->HasInput("Ids"),
@@ -42,36 +45,15 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
     // we only support sum now
     PADDLE_ENFORCE_EQ(combiner, "sum");
 
-    int64_t last_dim = table_dims[1];
-    for (int i = 1; i != ids_dims.size(); ++i) {
-      last_dim *= ids_dims[i];
-    }
-
-    if (ctx->IsRuntime()) {
-      framework::Variable* ids_var =
-          boost::get<framework::Variable*>(ctx->GetInputVarPtrs("Ids")[0]);
-      const auto& ids_lod = ids_var->Get<LoDTensor>().lod();
+    int64_t last_dim = FusedEmbeddingSeqPoolLastDim(table_dims, ids_dims);
+    // in compile time, the lod level of ids must be 1
+    framework::VarDesc* ids_desc =
+        boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
+    PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
 
-      // in run time, the LoD of ids must be 1
-      PADDLE_ENFORCE(ids_lod.size(), 1u,
-                     "The LoD level of Input(Ids) must be 1");
-      PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
-
-      int64_t batch_size = ids_lod[0].size() - 1;
-
-      // in run time, the shape from Ids -> output
-      // should be [seq_length, 1] -> [batch_size, embedding_size]
-      ctx->SetOutputDim("Out", framework::make_ddim({batch_size, last_dim}));
-    } else {
-      // in compile time, the lod level of ids must be 1
-      framework::VarDesc* ids_desc =
-          boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
-      PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
-
-      // in compile time, the shape from Ids -> output
-      // should be [-1, 1] -> [-1, embedding_size]
-      ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
-    }
+    // in compile time, the shape from Ids -> output
+    // should be [-1, 1] -> [-1, embedding_size]
+    ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
   }
 
  protected:
diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
index 33a1b47d150f653b84a377a61b251491aa719bee..2b0c1f560f23eee7fbdf14444bf933535b704167 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
@@ -61,6 +61,15 @@ struct EmbeddingVSumFunctor {
   }
 };
 
+inline int FusedEmbeddingSeqPoolLastDim(const framework::DDim &table_dims,
+                                        const framework::DDim &ids_dims) {
+  int64_t last_dim = table_dims[1];
+  for (int i = 1; i != ids_dims.size(); ++i) {
+    last_dim *= ids_dims[i];
+  }
+  return last_dim;
+}
+
 template <typename T>
 class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
  public:
@@ -70,6 +79,17 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
     const LoDTensor *table_var = context.Input<LoDTensor>("W");
     const std::string &combiner_type = context.Attr<std::string>("combiner");
 
+    int64_t last_dim =
+        FusedEmbeddingSeqPoolLastDim(table_var->dims(), ids_t->dims());
+    const auto &ids_lod = ids_t->lod();
+    // in run time, the LoD of ids must be 1
+    PADDLE_ENFORCE(ids_lod.size(), 1u, "The LoD level of Input(Ids) must be 1");
+    PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
+    int64_t batch_size = ids_lod[0].size() - 1;
+    // in run time, the shape from Ids -> output
+    // should be [seq_length, 1] -> [batch_size, embedding_size]
+    output_t->Resize({batch_size, last_dim});
+
     if (combiner_type == "sum") {
       EmbeddingVSumFunctor<T> functor;
       functor(context, table_var, ids_t, output_t);
diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc
index b2c2c7954b79658e66f1524a81bcad0b7bf22c35..7a29f80ff1ce413519ea9cea6a35747bdced5885 100644
--- a/paddle/fluid/operators/hash_op.cc
+++ b/paddle/fluid/operators/hash_op.cc
@@ -14,7 +14,6 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/hash_op.h"
 #include <string>
-#include <vector>
 
 namespace paddle {
 namespace operators {
@@ -27,6 +26,9 @@ class HashOp : public framework::OperatorWithKernel {
       : OperatorWithKernel(type, inputs, outputs, attrs) {}
 
   void InferShape(framework::InferShapeContext *ctx) const override {
+    if (ctx->IsRuntime()) {
+      return;
+    }
     PADDLE_ENFORCE(ctx->HasInput("X"),
                    "Input(X) of HashOp should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
@@ -36,15 +38,8 @@ class HashOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_EQ(dims.size(), 2UL,
                       "The input of hash_op's dimensions must be 2");
     std::vector<int64_t> out_dims;
-    out_dims.reserve(dims.size() + 1);
-    // copy all dims except the last one
-    for (int i = 0u; i != dims.size() - 1; ++i) {
-      out_dims.emplace_back(dims[i]);
-    }
     int num_hash = ctx->Attrs().Get<int>("num_hash");
-    out_dims.emplace_back(num_hash);
-    // keep the last dim to 1
-    out_dims.emplace_back(1);
+    HashOutputSize(dims, out_dims, num_hash);
 
     ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
     ctx->ShareLoD("X", /*->*/ "Out");
@@ -71,4 +66,4 @@ $$Out = scale * X$$
 namespace ops = paddle::operators;
 
 REGISTER_OP_WITHOUT_GRADIENT(hash, ops::HashOp, ops::HashOpMaker);
-REGISTER_OP_CPU_KERNEL(hash, ops::HashKerel<int>, ops::HashKerel<int64_t>);
+REGISTER_OP_CPU_KERNEL(hash, ops::HashKernel<int>, ops::HashKernel<int64_t>);
diff --git a/paddle/fluid/operators/hash_op.h b/paddle/fluid/operators/hash_op.h
index 9781bb0f453642cefb3eb59a05389c339a7de39d..9e7ad5235ff483a2fc0cfbb8bc35c620084bb896 100644
--- a/paddle/fluid/operators/hash_op.h
+++ b/paddle/fluid/operators/hash_op.h
@@ -17,21 +17,34 @@ limitations under the License. */
 extern "C" {
 #include <xxhash.h>
 }
+#include <vector>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 
 namespace paddle {
 namespace operators {
-// template <typename DeviceContext, typename T>
+
+inline void HashOutputSize(const framework::DDim& in_dims,
+                           std::vector<int64_t>& out_dims,  // NOLINT
+                           int num_hash) {
+  out_dims.reserve(in_dims.size() + 1);
+  // copy all dims except the last one
+  for (int i = 0u; i != in_dims.size() - 1; ++i) {
+    out_dims.emplace_back(in_dims[i]);
+  }
+  out_dims.emplace_back(num_hash);
+  // keep the last dim to 1
+  out_dims.emplace_back(1);
+}
+
 template <typename T>
-class HashKerel : public framework::OpKernel<T> {
+class HashKernel : public framework::OpKernel<T> {
  public:
   virtual void Compute(const framework::ExecutionContext& context) const {
     auto* out_t = context.Output<framework::LoDTensor>("Out");
     auto* in_t = context.Input<framework::LoDTensor>("X");
     int mod_by = context.Attr<int>("mod_by");
     int num_hash = context.Attr<int>("num_hash");
-    auto* output = out_t->mutable_data<T>(context.GetPlace());
 
     auto in_dims = in_t->dims();
     auto in_lod = in_t->lod();
@@ -39,6 +52,11 @@ class HashKerel : public framework::OpKernel<T> {
         static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
         "The actual input data's size mismatched with LoD information.");
 
+    std::vector<int64_t> out_dims;
+    HashOutputSize(in_dims, out_dims, num_hash);
+    out_t->Resize(framework::make_ddim(out_dims));
+    auto* output = out_t->mutable_data<T>(context.GetPlace());
+
     auto seq_length = in_dims[0];
     auto last_dim = in_dims[in_dims.size() - 1];
     auto* input = in_t->data<T>();
@@ -49,6 +67,7 @@ class HashKerel : public framework::OpKernel<T> {
       }
       input += last_dim;
     }
+    out_t->set_lod(in_t->lod());
   }
 };
 
diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc
index de91ba6270ac2ed22c8380878c0a0037fb1629c0..10d01af982d01800bdd2d5d59761cfb09e2a8139 100644
--- a/paddle/fluid/operators/interpolate_op.cc
+++ b/paddle/fluid/operators/interpolate_op.cc
@@ -84,13 +84,13 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault("bilinear");
     AddAttr<bool>(
         "align_corners",
-        "an optinal bool. Defaults to True. "
+        "an optional bool. Defaults to True. "
         "If True, the centers of 4 corner pixels of the input and output "
         "tensors are aligned, preserving the values at the corner pixels, "
-        "if Flase, are not aligned")
+        "If False, are not aligned")
         .SetDefault(true);
     AddAttr<int>("align_mode",
-                 "(int, default \'1\'), optional for bilinear interpolation"
+                 "(int, default \'1\'), optional for bilinear interpolation, "
                  "can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , "
                  "can be \'1\' for src_idx = scale*dst_index .")
         .SetDefault(1);
diff --git a/paddle/fluid/operators/is_empty_op.cc b/paddle/fluid/operators/is_empty_op.cc
index ba50bdf34baf2b9b0748b24c98c274aa18e22e36..092a6eae6f5b7edcc5656522377de10a08a01ea8 100644
--- a/paddle/fluid/operators/is_empty_op.cc
+++ b/paddle/fluid/operators/is_empty_op.cc
@@ -34,9 +34,8 @@ class IsEmptyOp : public framework::OperatorWithKernel {
 
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
-    framework::OpKernelType kt = framework::OpKernelType(
-        ctx.Input<framework::LoDTensor>("X")->type(), platform::CPUPlace());
-    return kt;
+    auto *x = ctx.Input<framework::LoDTensor>("X");
+    return framework::OpKernelType(x->type(), x->place());
   }
 };
 
@@ -58,7 +57,6 @@ It will just return product(tensor.ddims()) > 0;
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-
 REGISTER_OPERATOR(is_empty, ops::IsEmptyOp, ops::IsEmptyOpMaker,
                   paddle::framework::EmptyGradOpMaker);
 REGISTER_OP_CPU_KERNEL(
diff --git a/paddle/fluid/operators/is_empty_op.cu.cc b/paddle/fluid/operators/is_empty_op.cu.cc
new file mode 100644
index 0000000000000000000000000000000000000000..3c256503baf6ba3bc8f8dff866a2ce9c57ec5bf1
--- /dev/null
+++ b/paddle/fluid/operators/is_empty_op.cu.cc
@@ -0,0 +1,23 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/is_empty_op.h"
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    is_empty, ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::IsEmptyOpKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/is_empty_op.h b/paddle/fluid/operators/is_empty_op.h
index 3e3af22fa8d842b6a1e67418446f1a40949e046b..4f6419eb577709836275481cf617c07ea6c7f4c0 100644
--- a/paddle/fluid/operators/is_empty_op.h
+++ b/paddle/fluid/operators/is_empty_op.h
@@ -28,6 +28,9 @@ class IsEmptyOpKernel : public framework::OpKernel<T> {
     // get output
     auto* output_tensor = context.Output<framework::LoDTensor>("Out");
 
+    // Note: is_empty is always executed on CPU and the output data should
+    // always be allocated for CPUPlace. We reigister CUDA kernel for this op to
+    // avoid the unnecessary data transform.
     output_tensor->mutable_data<bool>(platform::CPUPlace())[0] =
         framework::product(input_tensor->dims()) == 0;
   }
diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc
index 3348778ee782ef0cdd1df4c3c4b24060436d7d79..11dc615f5ff8ea78bbbf6eeb655ee88b3a52dc13 100644
--- a/paddle/fluid/operators/jit/benchmark.cc
+++ b/paddle/fluid/operators/jit/benchmark.cc
@@ -332,6 +332,45 @@ void BenchEmbSeqPoolKernel() {
   }
 }
 
+template <jit::KernelType KT, typename T, typename PlaceType>
+void BenchSgdKernel() {
+  const T lr = 0.1;
+  auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
+                                  const int64_t upper) -> std::vector<int64_t> {
+    PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1);
+    PADDLE_ENFORCE_GT(n, 0);
+    std::vector<int64_t> all, out;
+    for (int i = 0; i < n; ++i) {
+      all.push_back(i);
+    }
+    std::random_shuffle(all.begin(), all.end());
+    out.insert(out.begin(), all.begin(), all.begin() + n);
+    return out;
+  };
+  for (int param_h : {1, 1000}) {
+    for (int grad_w : {1, 2, 8, 16, 30, 256}) {
+      // only benchmark inplace
+      Tensor param;
+      param.Resize({param_h, grad_w});
+      T* param_data = param.mutable_data<T>(PlaceType());
+      RandomVec<T>(param_h * grad_w, param_data, -2.f, 2.f);
+      for (int rows_size = 1; rows_size <= std::min(param_h, 10); ++rows_size) {
+        Tensor grad;
+        grad.Resize({rows_size, grad_w});
+        std::vector<int64_t> rows =
+            UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
+        RandomVec<T>(rows_size * grad_w, grad.mutable_data<T>(PlaceType()),
+                     -2.f, 2.f);
+        const T* grad_data = grad.data<T>();
+        const int64_t* rows_data = rows.data();
+        jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
+        BenchAllImpls<KT, jit::SgdTuples<T>, PlaceType>(
+            attr, &lr, param_data, grad_data, rows_data, param_data, &attr);
+      }
+    }
+  }
+}
+
 template <jit::KernelType KT, typename T, typename PlaceType>
 void BenchMatMulKernel() {
   for (int m : {1, 2, 3, 4}) {
@@ -477,6 +516,9 @@ BENCH_FP32_CPU(kEmbSeqPool) {
   BenchEmbSeqPoolKernel<jit::kEmbSeqPool, T, CPUPlace>();
 }
 
+// sgd function
+BENCH_FP32_CPU(kSgd) { BenchSgdKernel<jit::kSgd, T, CPUPlace>(); }
+
 // matmul
 BENCH_FP32_CPU(kMatMul) { BenchMatMulKernel<jit::kMatMul, T, CPUPlace>(); }
 
diff --git a/paddle/fluid/operators/jit/gen/CMakeLists.txt b/paddle/fluid/operators/jit/gen/CMakeLists.txt
index 294f73d9646c93132e464a032e93562094663a73..eb0c03568ddddf1c456fec6fcc81f3b40d051844 100644
--- a/paddle/fluid/operators/jit/gen/CMakeLists.txt
+++ b/paddle/fluid/operators/jit/gen/CMakeLists.txt
@@ -32,3 +32,4 @@ USE_JITKERNEL_GEN(kSeqPool)
 USE_JITKERNEL_GEN(kHMax)
 USE_JITKERNEL_GEN(kHSum)
 USE_JITKERNEL_GEN(kEmbSeqPool)
+USE_JITKERNEL_GEN(kSgd)
diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h
index 689df8b1cbb7a928c9f9175d28a8231b56e2e82e..39847d1b65f771976c4dde5a3e34cc40e33851e6 100644
--- a/paddle/fluid/operators/jit/gen/jitcode.h
+++ b/paddle/fluid/operators/jit/gen/jitcode.h
@@ -31,7 +31,8 @@ namespace gen {
 // Application Binary Interface
 constexpr Xbyak::Operand::Code abi_param1(Xbyak::Operand::RDI),
     abi_param2(Xbyak::Operand::RSI), abi_param3(Xbyak::Operand::RDX),
-    abi_param4(Xbyak::Operand::RCX);
+    abi_param4(Xbyak::Operand::RCX), abi_param5(Xbyak::Operand::R8),
+    abi_param6(Xbyak::Operand::R9);
 
 constexpr Xbyak::Operand::Code g_abi_regs[] = {
     Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12,
diff --git a/paddle/fluid/operators/jit/gen/sgd.cc b/paddle/fluid/operators/jit/gen/sgd.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a745a27f9543a75f6915c9316aad62fa41305bb1
--- /dev/null
+++ b/paddle/fluid/operators/jit/gen/sgd.cc
@@ -0,0 +1,130 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#include "paddle/fluid/operators/jit/gen/sgd.h"
+#include <stddef.h>  // offsetof
+#include <vector>
+#include "paddle/fluid/operators/jit/registry.h"
+#include "paddle/fluid/platform/cpu_info.h"
+
+namespace paddle {
+namespace operators {
+namespace jit {
+namespace gen {
+
+void SgdJitCode::genCode() {
+  preCode();
+  constexpr int block = YMM_FLOAT_BLOCK;
+  constexpr int max_num_regs = 7;
+  const int num_block = w_ / block;
+  const int num_groups = num_block / max_num_regs;
+  const size_t block_size = sizeof(float) * block;
+  const size_t width_size = w_ * sizeof(float);
+  std::vector<int> groups(num_groups, max_num_regs);
+  int rest_num_regs = num_block % max_num_regs;
+  if (rest_num_regs > 0) {
+    groups.push_back(rest_num_regs);
+  }
+
+  vbroadcastss(ymm_lr, ptr[param_lr]);
+  // protect rdx
+  mov(reg_ptr_grad_i, param_grad);
+  mov(reg_ptr_rows_i, param_rows);
+
+  mov(reg_rows_size_in_byte,
+      qword[param_attr + offsetof(sgd_attr_t, selected_rows_size)]);
+  mov(rax, sizeof(int64_t));
+  mul(reg_rows_size_in_byte);
+  mov(reg_rows_size_in_byte, rax);
+  add(reg_rows_size_in_byte, reg_ptr_rows_i);
+
+  Label l_next_row;
+  L(l_next_row);
+  {
+    mov(reg_row, qword[reg_ptr_rows_i]);
+    mov(rax, width_size);
+    mul(reg_row);
+    mov(reg_row, rax);
+
+    mov(reg_ptr_param_i, param_param);
+    mov(reg_ptr_out_i, param_out);
+    add(reg_ptr_param_i, reg_row);
+    add(reg_ptr_out_i, reg_row);
+
+    size_t w_offset = 0;
+    for (int num_regs : groups) {
+      // load grad
+      size_t inner_offfset = w_offset;
+      for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
+        vmovups(ymm_t(reg_i), ptr[reg_ptr_grad_i + inner_offfset]);
+        inner_offfset += block_size;
+      }
+
+      // load param
+      inner_offfset = w_offset;
+      for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
+        vmovups(ymm_t(reg_i + num_regs), ptr[reg_ptr_param_i + inner_offfset]);
+        inner_offfset += block_size;
+      }
+
+      // compute out
+      for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
+        vmulps(ymm_t(reg_i), ymm_t(reg_i), ymm_lr);
+        vsubps(ymm_t(reg_i + num_regs), ymm_t(reg_i + num_regs), ymm_t(reg_i));
+      }
+
+      // save out
+      inner_offfset = w_offset;
+      for (int reg_i = 0; reg_i < num_regs; ++reg_i) {
+        vmovups(ptr[reg_ptr_out_i + inner_offfset], ymm_t(reg_i + num_regs));
+        inner_offfset += block_size;
+      }
+      w_offset += (block_size * num_regs);
+    }
+
+    add(reg_ptr_grad_i, width_size);
+    add(reg_ptr_rows_i, sizeof(int64_t));
+    cmp(reg_ptr_rows_i, reg_rows_size_in_byte);
+    jl(l_next_row, T_NEAR);
+  }
+
+  postCode();
+}
+
+class SgdCreator : public JitCodeCreator<sgd_attr_t> {
+ public:
+  bool UseMe(const sgd_attr_t& attr) const override {
+    return platform::MayIUse(platform::avx) &&
+           attr.grad_width % YMM_FLOAT_BLOCK == 0;
+  }
+  size_t CodeSize(const sgd_attr_t& attr) const override {
+    return 96 + (attr.grad_width / YMM_FLOAT_BLOCK) * 32 * 8;
+  }
+  std::unique_ptr<GenBase> CreateJitCode(
+      const sgd_attr_t& attr) const override {
+    PADDLE_ENFORCE_EQ(attr.param_width, attr.grad_width);
+    PADDLE_ENFORCE_LE(attr.selected_rows_size, attr.grad_height);
+    PADDLE_ENFORCE_GE(attr.selected_rows_size, 0);
+    return make_unique<SgdJitCode>(attr, CodeSize(attr));
+  }
+};
+
+}  // namespace gen
+}  // namespace jit
+}  // namespace operators
+}  // namespace paddle
+
+namespace gen = paddle::operators::jit::gen;
+
+REGISTER_JITKERNEL_GEN(kSgd, gen::SgdCreator);
diff --git a/paddle/fluid/operators/jit/gen/sgd.h b/paddle/fluid/operators/jit/gen/sgd.h
new file mode 100644
index 0000000000000000000000000000000000000000..317edcd2bcb5fea1f14f32260fd16c9c706eaf00
--- /dev/null
+++ b/paddle/fluid/operators/jit/gen/sgd.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#pragma once
+
+#include <string>
+#include "glog/logging.h"
+#include "paddle/fluid/operators/jit/gen/jitcode.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace operators {
+namespace jit {
+namespace gen {
+
+class SgdJitCode : public JitCode {
+ public:
+  explicit SgdJitCode(const sgd_attr_t& attr, size_t code_size = 256 * 1024,
+                      void* code_ptr = nullptr)
+      : JitCode(code_size, code_ptr), w_(attr.grad_width) {
+    this->genCode();
+  }
+
+  DECLARE_JIT_CODE(SgdJitCode);
+  void genCode() override;
+
+ private:
+  int w_;
+  reg64_t param_lr{abi_param1};
+  reg64_t param_param{abi_param2};
+  reg64_t param_grad{abi_param3};
+  reg64_t param_rows{abi_param4};
+  reg64_t param_out{abi_param5};
+  reg64_t param_attr{abi_param6};
+
+  ymm_t ymm_lr = ymm_t(15);
+
+  reg64_t reg_ptr_grad_i{r10};
+  reg64_t reg_ptr_rows_i{r11};
+  reg64_t reg_rows_size_in_byte{r12};
+  reg64_t reg_row{r13};
+  reg64_t reg_ptr_param_i{r14};
+  reg64_t reg_ptr_out_i{r15};
+};
+
+}  // namespace gen
+}  // namespace jit
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc
index a76653613289892c4bb41596f998c5f4cc131fd7..1dc60442d5c5f6acf49b6319223b190f6c81e1a6 100644
--- a/paddle/fluid/operators/jit/helper.cc
+++ b/paddle/fluid/operators/jit/helper.cc
@@ -55,6 +55,7 @@ const char* to_string(KernelType kt) {
     ONE_CASE(kHSum);
     ONE_CASE(kSoftmax);
     ONE_CASE(kEmbSeqPool);
+    ONE_CASE(kSgd);
     default:
       PADDLE_THROW("Not support type: %d, or forget to add it.", kt);
       return "NOT JITKernel";
diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h
index 07998588a5a560f9c2ad7cc765b66e76e87da6f6..d85c719c1c58c88ec244f1f6ad8343d66391241d 100644
--- a/paddle/fluid/operators/jit/helper.h
+++ b/paddle/fluid/operators/jit/helper.h
@@ -181,6 +181,14 @@ inline std::ostream& operator<<(std::ostream& os,
   return os;
 }
 
+inline std::ostream& operator<<(std::ostream& os, const sgd_attr_t& attr) {
+  os << "param_height[" << attr.param_height << "],param_width["
+     << attr.param_width << "],grad_height[" << attr.grad_height
+     << "],grad_width[" << attr.grad_width << "],selected_rows_size["
+     << attr.selected_rows_size << "]";
+  return os;
+}
+
 inline std::ostream& operator<<(std::ostream& os, const matmul_attr_t& attr) {
   os << "M[" << attr.m << "],N[" << attr.n << "],K[" << attr.k << "]";
   return os;
diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h
index 20b6a32bef9860c52ab4423395a8e00f719b0210..895e2d4d6f3809a66443ed6d6bfc1ee02d6c529a 100644
--- a/paddle/fluid/operators/jit/kernel_base.h
+++ b/paddle/fluid/operators/jit/kernel_base.h
@@ -46,6 +46,7 @@ typedef enum {
   kVMul,
   kVRelu,
   kVScal,
+  kSgd,
   kVSigmoid,
   kVSquare,
   kVSub,
@@ -173,6 +174,28 @@ struct EmbSeqPoolTuples {
                             const emb_seq_pool_attr_t*);
 };
 
+typedef struct sgd_attr_s {
+  int64_t param_height, param_width;
+  int64_t grad_height, grad_width;
+  int64_t selected_rows_size;
+  sgd_attr_s() = default;
+  explicit sgd_attr_s(int64_t param_h, int64_t param_w, int64_t grad_h,
+                      int64_t grad_w, int64_t selected_rows_sz)
+      : param_height(param_h),
+        param_width(param_w),
+        grad_height(grad_h),
+        grad_width(grad_w),
+        selected_rows_size(selected_rows_sz) {}
+} sgd_attr_t;
+
+template <typename T>
+struct SgdTuples {
+  typedef T data_type;
+  typedef sgd_attr_t attr_type;
+  typedef void (*func_type)(const T*, const T*, const T*, const int64_t*, T*,
+                            const sgd_attr_t*);
+};
+
 typedef struct matmul_attr_s {
   int m, n, k;
   void* packed_weight{nullptr};
diff --git a/paddle/fluid/operators/jit/kernel_key.cc b/paddle/fluid/operators/jit/kernel_key.cc
index e659c6d254391f09ac8692e0b7602c65e1afd47d..740d0f850a072a5ad3238e52402141a83c0b7e33 100644
--- a/paddle/fluid/operators/jit/kernel_key.cc
+++ b/paddle/fluid/operators/jit/kernel_key.cc
@@ -13,6 +13,7 @@
  * limitations under the License. */
 
 #include "paddle/fluid/operators/jit/kernel_key.h"
+#include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
 namespace operators {
@@ -23,14 +24,30 @@ size_t JitCodeKey<int>(const int& d) {
   return d;
 }
 
+// TODO(TJ): refine and benchmark JitCodeKey generatation
 constexpr int act_type_shift = 3;  // suppot 2^3 act types
+static inline int act_type_convert(KernelType type) {
+  if (type == kVIdentity) {
+    return 0;
+  } else if (type == kVExp) {
+    return 1;
+  } else if (type == kVRelu) {
+    return 2;
+  } else if (type == kVSigmoid) {
+    return 3;
+  } else if (type == kVTanh) {
+    return 4;
+  }
+  PADDLE_THROW("Unsupported act type %d", type);
+  return 0;
+}
 
 template <>
 size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
   size_t key = attr.d;
-  int gate_key = static_cast<int>(attr.act_gate) << 1;
-  int cand_key = static_cast<int>(attr.act_cand) << (1 + act_type_shift);
-  int cell_key = static_cast<int>(attr.act_cell) << (1 + act_type_shift * 2);
+  int gate_key = act_type_convert(attr.act_gate) << 1;
+  int cand_key = act_type_convert(attr.act_cand) << (1 + act_type_shift);
+  int cell_key = act_type_convert(attr.act_cell) << (1 + act_type_shift * 2);
   return (key << (1 + act_type_shift * 3)) + gate_key + cand_key + cell_key +
          attr.use_peephole;
 }
@@ -38,8 +55,8 @@ size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
 template <>
 size_t JitCodeKey<gru_attr_t>(const gru_attr_t& attr) {
   size_t key = attr.d;
-  return (key << (act_type_shift * 2)) + static_cast<int>(attr.act_gate) +
-         (static_cast<int>(attr.act_cand) << act_type_shift);
+  return (key << (act_type_shift * 2)) + act_type_convert(attr.act_gate) +
+         (act_type_convert(attr.act_cand) << act_type_shift);
 }
 
 template <>
@@ -61,6 +78,11 @@ size_t JitCodeKey<emb_seq_pool_attr_t>(const emb_seq_pool_attr_t& attr) {
   return attr.table_width;
 }
 
+template <>
+size_t JitCodeKey<sgd_attr_t>(const sgd_attr_t& attr) {
+  return attr.grad_width;
+}
+
 }  // namespace jit
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
index d209f31007255b3a90fdeeb4d609311b80bdc7b5..9a00ad56a6a909a677cb8f60bd80fe399e82952f 100644
--- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
@@ -14,3 +14,4 @@ USE_JITKERNEL_MORE(kVTanh, mkl)
 USE_JITKERNEL_MORE(kSeqPool, mkl)
 USE_JITKERNEL_MORE(kSoftmax, mkl)
 USE_JITKERNEL_MORE(kEmbSeqPool, mkl)
+USE_JITKERNEL_MORE(kSgd, mkl)
diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc
index 29a451f832fa745f8e1f5a45fd934f09e1f41e76..780fda02c1ff3da2e0b945f9b2fece30484e4519 100644
--- a/paddle/fluid/operators/jit/more/mkl/mkl.cc
+++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc
@@ -184,6 +184,16 @@ bool EmbSeqPoolKernel<double>::UseMe(const emb_seq_pool_attr_t& attr) const {
   return true;
 }
 
+template <>
+bool SgdKernel<float>::UseMe(const sgd_attr_t& attr) const {
+  return true;
+}
+
+template <>
+bool SgdKernel<double>::UseMe(const sgd_attr_t& attr) const {
+  return true;
+}
+
 template <>
 bool MatMulKernel<float>::UseMe(const matmul_attr_t& attr) const {
   return platform::MayIUse(platform::avx);
@@ -239,5 +249,6 @@ REGISTER_MKL_KERNEL(kVTanh, VTanh);
 REGISTER_MKL_KERNEL(kSeqPool, SeqPool);
 REGISTER_MKL_KERNEL(kEmbSeqPool, EmbSeqPool);
 REGISTER_MKL_KERNEL(kSoftmax, Softmax);
+REGISTER_MKL_KERNEL(kSgd, Sgd);
 
 #undef REGISTER_MKL_KERNEL
diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h
index 9a72ba83022de2beeb760772ee8489477befdd7e..a7bc2de4a3e8e7d8e2a6b00990bfa459b3029c2a 100644
--- a/paddle/fluid/operators/jit/more/mkl/mkl.h
+++ b/paddle/fluid/operators/jit/more/mkl/mkl.h
@@ -142,6 +142,32 @@ void Softmax(const T* x, T* y, int n, int bs) {
   }
 }
 
+template <typename T>
+void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows,
+         T* out, const sgd_attr_t* attr) {
+  PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width);
+  PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height);
+  T scalar = -lr[0];
+  int width = attr->grad_width;
+  if (out == param) {
+    for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
+      auto h_idx = rows[i];
+      PADDLE_ENFORCE_LT(h_idx, attr->param_height);
+      PADDLE_ENFORCE_GE(h_idx, 0);
+      VAXPY(scalar, grad + i * width, out + h_idx * width, width);
+    }
+  } else {
+    for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
+      auto h_idx = rows[i];
+      PADDLE_ENFORCE_LT(h_idx, attr->param_height);
+      PADDLE_ENFORCE_GE(h_idx, 0);
+      VScal(&scalar, grad + i * width, out + h_idx * width, width);
+      VAdd(param + h_idx * width, out + h_idx * width, out + h_idx * width,
+           width);
+    }
+  }
+}
+
 #define DECLARE_MKL_KERNEL(name, tuples)                             \
   template <typename T>                                              \
   class name##Kernel : public KernelMore<tuples<T>> {                \
@@ -173,6 +199,8 @@ DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
 
 DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples);
 
+DECLARE_MKL_KERNEL(Sgd, SgdTuples);
+
 #undef DECLARE_MKL_KERNEL
 
 }  // namespace mkl
diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt
index 218d801c084be455538628d1c1028d8e52142894..cd19dd169d0bfdfe2cb8157ade29f48ad6428453 100644
--- a/paddle/fluid/operators/jit/refer/CMakeLists.txt
+++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt
@@ -33,3 +33,4 @@ USE_JITKERNEL_REFER(kHSum)
 USE_JITKERNEL_REFER(kHMax)
 USE_JITKERNEL_REFER(kSoftmax)
 USE_JITKERNEL_REFER(kEmbSeqPool)
+USE_JITKERNEL_REFER(kSgd)
diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc
index 7e7dd6960b66e4e2f77eca6e96604f2a86553120..0c434bd2b8cacdf4b8872da66bb8e763a6a45cee 100644
--- a/paddle/fluid/operators/jit/refer/refer.cc
+++ b/paddle/fluid/operators/jit/refer/refer.cc
@@ -59,4 +59,6 @@ REGISTER_REFER_KERNEL(kSoftmax, Softmax);
 
 REGISTER_REFER_KERNEL(kEmbSeqPool, EmbSeqPool);
 
+REGISTER_REFER_KERNEL(kSgd, Sgd);
+
 #undef REGISTER_REFER_KERNEL
diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h
index fd1193aa41e50e3ede7f61588dc72389279bb95d..0f714edf85bbbf4838bfe09251bd1c2d5f3b3eb7 100644
--- a/paddle/fluid/operators/jit/refer/refer.h
+++ b/paddle/fluid/operators/jit/refer/refer.h
@@ -446,6 +446,36 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out,
   }
 }
 
+// SGD algorithm:
+// lr is pointor of learning rate scalar
+// param is an input matrix with (param_h, param_w)
+// grad is an input matrix with (grad_h, grad_w), here grad_w == param_w
+// selected_rows is a vectot<int64_t> with size selected_rows_size( <= grad_h )
+// out is an output matrix with (param_h, param_w)
+//
+// support both regular and sparse grad
+// regular SGD: out[:] = param[:] - lr[0] * grad[:];
+// sparse SGD: out[rows[i]][:] = param[rows[i]][:] - lr[0] * grad[i][:]
+//
+// Note: when use sparse SGD, and if out != param,
+// the out rows which are not selected have not beed changed, which maybe empty
+template <typename T>
+void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows,
+         T* out, const sgd_attr_t* attr) {
+  PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width);
+  PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height);
+  for (int64_t i = 0; i < attr->selected_rows_size; ++i) {
+    auto h_idx = rows[i];
+    PADDLE_ENFORCE_LT(h_idx, attr->param_height);
+    PADDLE_ENFORCE_GE(h_idx, 0);
+    for (int64_t j = 0; j < attr->grad_width; ++j) {
+      out[h_idx * attr->grad_width + j] =
+          param[h_idx * attr->grad_width + j] -
+          lr[0] * grad[i * attr->grad_width + j];
+    }
+  }
+}
+
 #define DECLARE_REFER_KERNEL(name, tuples)             \
   template <typename T>                                \
   class name##Kernel : public ReferKernel<tuples<T>> { \
@@ -496,6 +526,8 @@ DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples);
 
 DECLARE_REFER_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
 
+DECLARE_REFER_KERNEL(Sgd, SgdTuples);
+
 #undef DECLARE_REFER_KERNEL
 
 }  // namespace refer
diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc
index 356eba6f86ad180c7d23bf7fa91eb5d455ff5f08..b618cd6a84be752a052f9d49a4a4c772b1d7eeae 100644
--- a/paddle/fluid/operators/jit/test.cc
+++ b/paddle/fluid/operators/jit/test.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <algorithm>
 #include <random>
 #include <string>
 #include <vector>
@@ -36,14 +37,14 @@ void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
 }
 
 template <typename T>
-void ExpectEQ(const T* target, const T* refer, int n) {
+void ExpectEQ(const T* target, const T* refer, size_t n) {
   if (std::is_floating_point<T>::value) {
-    for (int i = 0; i < n; ++i) {
-      EXPECT_NEAR(target[i], refer[i], FLAGS_acc);
+    for (size_t i = 0; i < n; ++i) {
+      EXPECT_NEAR(target[i], refer[i], FLAGS_acc) << " at index : " << i;
     }
   } else {
-    for (int i = 0; i < n; ++i) {
-      EXPECT_EQ(target[i], refer[i]);
+    for (size_t i = 0; i < n; ++i) {
+      EXPECT_EQ(target[i], refer[i]) << " at index : " << i;
     }
   }
 }
@@ -296,6 +297,45 @@ struct TestFuncWithRefer<jit::EmbSeqPoolTuples<T>, std::vector<T>,
   }
 };
 
+template <typename T>
+struct TestFuncWithRefer<jit::SgdTuples<T>, T, std::vector<T>, std::vector<T>,
+                         std::vector<int64_t>, std::vector<T>,
+                         typename jit::SgdTuples<T>::attr_type> {
+  void operator()(const typename jit::SgdTuples<T>::func_type tgt, const T lr,
+                  const std::vector<T>& param, const std::vector<T>& grad,
+                  const std::vector<int64_t>& rows, const std::vector<T>& oref,
+                  const typename jit::SgdTuples<T>::attr_type& attr) {
+    EXPECT_TRUE(tgt != nullptr);
+    EXPECT_EQ(param.size(),
+              static_cast<size_t>(attr.param_height * attr.param_width));
+    EXPECT_EQ(grad.size(),
+              static_cast<size_t>(attr.grad_height * attr.grad_width));
+    EXPECT_EQ(rows.size(), static_cast<size_t>(attr.selected_rows_size));
+    EXPECT_EQ(param.size(), oref.size());
+    const T* param_data = param.data();
+    const T* grad_data = grad.data();
+    const int64_t* rows_data = rows.data();
+    const T* oref_data = oref.data();
+
+    std::vector<T> out(oref.size());
+    T* o_data = out.data();
+    tgt(&lr, param_data, grad_data, rows_data, o_data, &attr);
+    // only the selected rows should be equal
+    for (size_t i = 0; i < rows.size(); ++i) {
+      ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
+                  oref_data + rows[i] * attr.grad_width, attr.grad_width);
+    }
+
+    // inplace
+    std::copy(param.begin(), param.end(), out.begin());
+    tgt(&lr, o_data, grad_data, rows_data, o_data, &attr);
+    for (size_t i = 0; i < rows.size(); ++i) {
+      ExpectEQ<T>(o_data + rows[i] * attr.grad_width,
+                  oref_data + rows[i] * attr.grad_width, attr.grad_width);
+    }
+  }
+};
+
 template <typename T>
 struct TestFuncWithRefer<jit::MatMulTuples<T>, std::vector<T>, std::vector<T>,
                          std::vector<T>,
@@ -407,7 +447,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestXYZNKernel() {
+void TestKernelXYZNTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   for (int d : TestSizes()) {
     auto ref = jit::GetRefer<KT, jit::XYZNTuples<T>>();
@@ -440,7 +480,7 @@ void TestXYZNKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestAXYNKernel() {
+void TestKernelAXYNTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   for (int d : TestSizes()) {
     auto ref = jit::GetRefer<KT, jit::AXYNTuples<T>>();
@@ -466,7 +506,7 @@ void TestAXYNKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestXRNKernel() {
+void TestKernelXRNTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   auto last_acc = FLAGS_acc;
   FLAGS_acc = 1e-4;
@@ -484,7 +524,7 @@ void TestXRNKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestXYNKernel() {
+void TestKernelXYNTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   for (int d : TestSizes()) {
     auto ref = jit::GetRefer<KT, jit::XYNTuples<T>>();
@@ -509,10 +549,12 @@ void TestXYNKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestLSTMKernel() {
+void TestKernelLSTMTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
-  for (int d : TestSizes()) {
+  auto test_sizes = TestSizes();
+  test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
+  for (int d : test_sizes) {
     for (bool use_peephole : {true, false}) {
       for (auto& act_gate : all_acts) {
         for (auto& act_cand : all_acts) {
@@ -559,10 +601,12 @@ void TestLSTMKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestGRUKernel() {
+void TestKernelGRUTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   std::vector<std::string> all_acts = {"sigmoid", "tanh", "relu", "identity"};
-  for (int d : TestSizes()) {
+  auto test_sizes = TestSizes();
+  test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
+  for (int d : test_sizes) {
     for (auto& act_gate : all_acts) {
       for (auto& act_cand : all_acts) {
         const jit::gru_attr_t attr(d, jit::to_kerneltype(act_gate),
@@ -593,14 +637,16 @@ void TestGRUKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestSeqPoolKernel() {
+void TestKernelSeqPoolTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   std::vector<jit::SeqPoolType> pool_types = {
       jit::SeqPoolType::kSum, jit::SeqPoolType::kAvg, jit::SeqPoolType::kSqrt};
+  auto test_sizes = TestSizes();
+  test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
   for (auto type : pool_types) {
-    for (int w : TestSizes()) {
+    for (int w : test_sizes) {
       jit::seq_pool_attr_t attr(w, type);
-      for (int h : TestSizes()) {
+      for (int h : test_sizes) {
         attr.h = h;
         auto ref = jit::GetRefer<KT, jit::SeqPoolTuples<T>>();
         EXPECT_TRUE(ref != nullptr);
@@ -618,11 +664,11 @@ void TestSeqPoolKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestMatMulKernel() {
+void TestKernelMatMulTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   auto last_acc = FLAGS_acc;
-  // TODO(intel): fix MKL acc issue
-  // https://github.com/PaddlePaddle/Paddle/issues/15447
+  // export MKL_CBWR=AVX would make MKL force to use AVX
+  // export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic
   FLAGS_acc = 1e-3;
   for (int m : {1, 2, 3, 4}) {
     for (int n : {1, 2, 3, 4}) {
@@ -646,7 +692,7 @@ void TestMatMulKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestSoftmaxKernel() {
+void TestKernelSoftmaxTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   for (int bs : {1, 2, 10}) {
     for (int n : TestSizes()) {
@@ -671,12 +717,14 @@ void TestSoftmaxKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestEmbSeqPoolKernel() {
+void TestKernelEmbSeqPoolTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   int64_t tbl_h = 1e4;
   std::vector<jit::SeqPoolType> pool_types = {
       jit::SeqPoolType::kSum};  // only support sum yet
-  for (int tbl_w : TestSizes()) {
+  auto test_sizes = TestSizes();
+  test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
+  for (int tbl_w : test_sizes) {
     std::vector<T> table(tbl_h * tbl_w);
     RandomVec<T>(tbl_h * tbl_w, table.data(), -2.f, 2.f);
     const T* table_data = table.data();
@@ -705,7 +753,61 @@ void TestEmbSeqPoolKernel() {
 }
 
 template <jit::KernelType KT, typename T, typename PlaceType>
-void TestNCHW16CMulNCKernel() {
+void TestKernelSgdTuples() {
+  VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
+  const T lr = 0.1;
+  auto UnDuplicatedRandomVec = [](int n, const int64_t lower,
+                                  const int64_t upper) -> std::vector<int64_t> {
+    PADDLE_ENFORCE_LE(static_cast<size_t>(upper - lower), n - 1);
+    PADDLE_ENFORCE_GT(n, 0);
+    std::vector<int64_t> all, out;
+    for (int i = 0; i < n; ++i) {
+      all.push_back(i);
+    }
+    std::random_shuffle(all.begin(), all.end());
+    out.insert(out.begin(), all.begin(), all.begin() + n);
+    return out;
+  };
+  for (int param_h : {1, 10}) {
+    for (int grad_w : TestSizes()) {
+      std::vector<T> param(param_h * grad_w);
+      std::vector<T> param_out(param_h * grad_w);
+      RandomVec<T>(param_h * grad_w, param.data(), -2.f, 2.f);
+      const T* param_data = param.data();
+      T* out_data = param_out.data();
+      for (int rows_size = 1; rows_size <= param_h; ++rows_size) {
+        std::vector<T> grad(rows_size * grad_w);
+        std::vector<int64_t> rows =
+            UnDuplicatedRandomVec(rows_size, 0, rows_size - 1);
+        RandomVec<T>(rows_size * grad_w, grad.data(), -2.f, 2.f);
+        const int64_t* rows_data = rows.data();
+        const T* grad_data = grad.data();
+        auto ref = jit::GetRefer<KT, jit::SgdTuples<T>>();
+        EXPECT_TRUE(ref != nullptr);
+        jit::sgd_attr_t attr(param_h, grad_w, rows_size, grad_w, rows_size);
+        ref(&lr, param_data, grad_data, rows_data, out_data, &attr);
+
+        // inplace test
+        std::vector<T> inp(param.size());
+        std::copy(param.begin(), param.end(), inp.begin());
+        T* inp_data = inp.data();
+        ref(&lr, inp_data, grad_data, rows_data, inp_data, &attr);
+        // only the selected rows should be equal
+        for (int i = 0; i < rows_size; ++i) {
+          ExpectEQ<T>(inp_data + rows[i] * grad_w, out_data + rows[i] * grad_w,
+                      grad_w);
+        }
+
+        TestAllImpls<KT, jit::SgdTuples<T>, PlaceType, T, std::vector<T>,
+                     std::vector<T>, std::vector<int64_t>, std::vector<T>>(
+            attr, lr, param, grad, rows, param_out, attr);
+      }
+    }
+  }
+}
+
+template <jit::KernelType KT, typename T, typename PlaceType>
+void TestKernelNCHW16CMulNCTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   const int n = 3, c = 16 * 4, h = 10, w = 10;
   auto ref = jit::GetRefer<KT, jit::NCHW16CMulNCTuples<T>>();
@@ -758,7 +860,7 @@ void TestNCHW16CMulNCKernel() {
 }
 
 template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
-void TestLayerNormKernel() {
+void TestKernelLayerNormTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   const T epsilon = 9.99999975e-06;
   for (int n : {1, 2, 10}) {
@@ -797,11 +899,13 @@ void TestLayerNormKernel() {
 }
 
 template <paddle::operators::jit::KernelType KT, typename T, typename PlaceType>
-void TestCRFDecodingKernel() {
+void TestKernelCRFDecodingTuples() {
   VLOG(10) << "===== Test JITKernel " << jit::to_string(KT);
   constexpr int state_trans_base_idx = 2;
+  auto test_sizes = TestSizes();
+  test_sizes.erase(std::remove(test_sizes.begin(), test_sizes.end(), 1000));
   for (int seq_len : {1, 11, 17, 50}) {
-    for (int tag_num : TestSizes()) {
+    for (int tag_num : test_sizes) {
       auto ref = jit::GetRefer<KT, jit::CRFDecodingTuples<T>>();
       EXPECT_TRUE(ref != nullptr);
       int x_sz = seq_len * tag_num;
@@ -822,143 +926,76 @@ void TestCRFDecodingKernel() {
   }
 }
 
-// XYZNTuple
-TEST(JITKernel, kVMul) {
-  TestXYZNKernel<jit::kVMul, float, CPUPlace>();
-  TestXYZNKernel<jit::kVMul, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVAdd) {
-  TestXYZNKernel<jit::kVAdd, float, CPUPlace>();
-  TestXYZNKernel<jit::kVAdd, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVAddRelu) {
-  TestXYZNKernel<jit::kVAddRelu, float, CPUPlace>();
-  TestXYZNKernel<jit::kVAddRelu, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVSub) {
-  TestXYZNKernel<jit::kVSub, float, CPUPlace>();
-  TestXYZNKernel<jit::kVSub, double, CPUPlace>();
-}
-
-// AXYNTuples
-TEST(JITKernel, kVScal) {
-  TestAXYNKernel<jit::kVScal, float, CPUPlace>();
-  TestAXYNKernel<jit::kVScal, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVAddBias) {
-  TestAXYNKernel<jit::kVAddBias, float, CPUPlace>();
-  TestAXYNKernel<jit::kVAddBias, double, CPUPlace>();
-}
-
-// XRNTuples
-TEST(JITKernel, kHMax) {
-  TestXRNKernel<jit::kHMax, float, CPUPlace>();
-  TestXRNKernel<jit::kHMax, double, CPUPlace>();
-}
-
-TEST(JITKernel, kHSum) {
-  TestXRNKernel<jit::kHSum, float, CPUPlace>();
-  TestXRNKernel<jit::kHSum, double, CPUPlace>();
-}
-
-// XYNTuples
-TEST(JITKernel, kVRelu) {
-  TestXYNKernel<jit::kVRelu, float, CPUPlace>();
-  TestXYNKernel<jit::kVRelu, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVIdentity) {
-  TestXYNKernel<jit::kVIdentity, float, CPUPlace>();
-  TestXYNKernel<jit::kVIdentity, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVSquare) {
-  TestXYNKernel<jit::kVSquare, float, CPUPlace>();
-  TestXYNKernel<jit::kVSquare, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVExp) {
-  TestXYNKernel<jit::kVExp, float, CPUPlace>();
-  TestXYNKernel<jit::kVExp, double, CPUPlace>();
-}
-
-TEST(JITKernel, kVSigmoid) {
-  TestXYNKernel<jit::kVSigmoid, float, CPUPlace>();
-  TestXYNKernel<jit::kVSigmoid, double, CPUPlace>();
-}
+#define TEST_CPU_KERNEL(test_tuple, kernel_type)                 \
+  TEST(JITKernel, kernel_type) {                                 \
+    TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
+    TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
+  }
 
-TEST(JITKernel, kVTanh) {
-  TestXYNKernel<jit::kVTanh, float, CPUPlace>();
-  TestXYNKernel<jit::kVTanh, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(XYZNTuples, kVMul);
+TEST_CPU_KERNEL(XYZNTuples, kVAdd);
+TEST_CPU_KERNEL(XYZNTuples, kVAddRelu);
+TEST_CPU_KERNEL(XYZNTuples, kVSub);
 
-// LSTM
-TEST(JITKernel, kLSTMCtHt) {
-  TestLSTMKernel<jit::kLSTMCtHt, float, CPUPlace>();
-  TestLSTMKernel<jit::kLSTMCtHt, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(AXYNTuples, kVScal);
+TEST_CPU_KERNEL(AXYNTuples, kVAddBias);
 
-TEST(JITKernel, kLSTMC1H1) {
-  TestLSTMKernel<jit::kLSTMC1H1, float, CPUPlace>();
-  TestLSTMKernel<jit::kLSTMC1H1, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(XRNTuples, kHMax);
+TEST_CPU_KERNEL(XRNTuples, kHSum);
 
-// GRU
-TEST(JITKernel, kGRUH1) {
-  TestGRUKernel<jit::kGRUH1, float, CPUPlace>();
-  TestGRUKernel<jit::kGRUH1, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(XYNTuples, kVRelu);
+TEST_CPU_KERNEL(XYNTuples, kVIdentity);
+TEST_CPU_KERNEL(XYNTuples, kVSquare);
+TEST_CPU_KERNEL(XYNTuples, kVExp);
+TEST_CPU_KERNEL(XYNTuples, kVSigmoid);
+TEST_CPU_KERNEL(XYNTuples, kVTanh);
 
-TEST(JITKernel, kGRUHtPart1) {
-  TestGRUKernel<jit::kGRUHtPart1, float, CPUPlace>();
-  TestGRUKernel<jit::kGRUHtPart1, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(LSTMTuples, kLSTMCtHt);
+TEST_CPU_KERNEL(LSTMTuples, kLSTMC1H1);
 
-TEST(JITKernel, kGRUHtPart2) {
-  TestGRUKernel<jit::kGRUHtPart2, float, CPUPlace>();
-  TestGRUKernel<jit::kGRUHtPart2, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(GRUTuples, kGRUH1);
+TEST_CPU_KERNEL(GRUTuples, kGRUHtPart1);
+TEST_CPU_KERNEL(GRUTuples, kGRUHtPart2);
 
-TEST(JITKernel, kSeqPool) {
-  TestSeqPoolKernel<jit::kSeqPool, float, CPUPlace>();
-  TestSeqPoolKernel<jit::kSeqPool, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(NCHW16CMulNCTuples, kNCHW16CMulNC);
 
-TEST(JITKernel, kMatMul) {
-  TestMatMulKernel<jit::kMatMul, float, CPUPlace>();
-  TestMatMulKernel<jit::kMatMul, double, CPUPlace>();
-}
+TEST_CPU_KERNEL(SeqPoolTuples, kSeqPool);
+TEST_CPU_KERNEL(MatMulTuples, kMatMul);
+TEST_CPU_KERNEL(SoftmaxTuples, kSoftmax);
+TEST_CPU_KERNEL(EmbSeqPoolTuples, kEmbSeqPool);
+TEST_CPU_KERNEL(SgdTuples, kSgd);
+TEST_CPU_KERNEL(LayerNormTuples, kLayerNorm);
+TEST_CPU_KERNEL(CRFDecodingTuples, kCRFDecoding);
 
-TEST(JITKernel, kSoftmax) {
-  TestSoftmaxKernel<jit::kSoftmax, float, CPUPlace>();
-  TestSoftmaxKernel<jit::kSoftmax, double, CPUPlace>();
-}
+TEST(JITKernel_key, lstm) {
+  jit::lstm_attr_t attr1(8, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
+  jit::lstm_attr_t attr2(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
+  jit::lstm_attr_t attr3(9, jit::kVIdentity, jit::kVSigmoid, jit::kVTanh);
+  jit::lstm_attr_t attr4(9, jit::kVRelu, jit::kVSigmoid, jit::kVTanh);
 
-TEST(JITKernel, kEmbSeqPool) {
-  TestEmbSeqPoolKernel<jit::kEmbSeqPool, float, CPUPlace>();
-  TestEmbSeqPoolKernel<jit::kEmbSeqPool, double, CPUPlace>();
-}
+  auto key1 = jit::JitCodeKey<jit::lstm_attr_t>(attr1);
+  auto key2 = jit::JitCodeKey<jit::lstm_attr_t>(attr2);
+  auto key3 = jit::JitCodeKey<jit::lstm_attr_t>(attr3);
+  auto key4 = jit::JitCodeKey<jit::lstm_attr_t>(attr4);
 
-TEST(JITKernel, kNCHW16CMulNC) {
-  TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, float, CPUPlace>();
-  TestNCHW16CMulNCKernel<jit::kNCHW16CMulNC, double, CPUPlace>();
+  EXPECT_TRUE(key1 != key2);
+  EXPECT_TRUE(key2 == key3);
+  EXPECT_TRUE(key3 != key4);
 }
 
-TEST(JITKernel, kLayerNorm) {
-  TestLayerNormKernel<jit::kLayerNorm, float, paddle::platform::CPUPlace>();
-  TestLayerNormKernel<jit::kLayerNorm, double, paddle::platform::CPUPlace>();
-}
+TEST(JITKernel_key, gru) {
+  jit::gru_attr_t attr1(8, jit::kVSigmoid, jit::kVTanh);
+  jit::gru_attr_t attr2(9, jit::kVSigmoid, jit::kVTanh);
+  jit::gru_attr_t attr3(9, jit::kVSigmoid, jit::kVTanh);
+  jit::gru_attr_t attr4(9, jit::kVSigmoid, jit::kVIdentity);
 
-TEST(JITKernel, kCRFDecoding) {
-  TestCRFDecodingKernel<jit::kCRFDecoding, float, paddle::platform::CPUPlace>();
-  TestCRFDecodingKernel<jit::kCRFDecoding, double,
-                        paddle::platform::CPUPlace>();
-}
+  auto key1 = jit::JitCodeKey<jit::gru_attr_t>(attr1);
+  auto key2 = jit::JitCodeKey<jit::gru_attr_t>(attr2);
+  auto key3 = jit::JitCodeKey<jit::gru_attr_t>(attr3);
+  auto key4 = jit::JitCodeKey<jit::gru_attr_t>(attr4);
 
-TEST(JITKernel, pool) {
-  // TODO(TJ): add some test
+  EXPECT_TRUE(key1 != key2);
+  EXPECT_TRUE(key2 == key3);
+  EXPECT_TRUE(key3 != key4);
 }
+// TODO(TJ): add more test about key and pool
diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu
index 61d021ef627f1ccd90b992c2078a7f3ca879422d..d66778a6fe05c0460c805581ee6ffd6d5e9d746e 100644
--- a/paddle/fluid/operators/math/beam_search.cu
+++ b/paddle/fluid/operators/math/beam_search.cu
@@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam(
       __syncthreads();
     }
 
+    if ((num_used_threads & 0x1) != 0) {
+      // If num_used_threads is a odd number, merge local top_beam of thread 0
+      // and num_used_threads - 1
+      if (tid_of_seq == 0) {
+        int index_in_sh = (num_used_threads - 1 + tid) * beam_size;
+        for (int i = 0; i < beam_size; i++) {
+          Insert(top_beam_local, top_beam[index_in_sh], beam_size);
+          index_in_sh++;
+        }
+      }
+    }
+
     num_used_threads = num_used_threads >> 1;
     if (tid_of_seq < num_used_threads) {
       int index_in_sh = (num_used_threads + tid) * beam_size;
diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h
index f67f57827bc03e134bf87edd5bf033adb5098916..ce8109f64d62b0d412419107881952f1b4ffc75e 100644
--- a/paddle/fluid/operators/math/blas.h
+++ b/paddle/fluid/operators/math/blas.h
@@ -184,6 +184,9 @@ class Blas {
   template <typename T>
   void VINV(int n, const T* a, T* y) const;
 
+  template <typename T>
+  void VMERF(int n, const T* a, T* y, int64_t mode) const;
+
  private:
   const DeviceContext& context_;
 };
@@ -290,6 +293,11 @@ class BlasT : private Blas<DeviceContext> {
     Base()->template VINV<T>(args...);
   }
 
+  template <typename... ARGS>
+  void VMERF(ARGS... args) const {
+    Base()->template VMERF<T>(args...);
+  }
+
  private:
   const Blas<DeviceContext>* Base() const {
     return static_cast<const Blas<DeviceContext>*>(this);
diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h
index 972366bc093f4b7f0a090cf31213f75ccd89fd82..ba995dabecbfab8c4952bb7efeaa381f8078821a 100644
--- a/paddle/fluid/operators/math/blas_impl.h
+++ b/paddle/fluid/operators/math/blas_impl.h
@@ -123,6 +123,11 @@ struct CBlas<float> {
   static void VINV(ARGS... args) {
     platform::dynload::vsInv(args...);
   }
+
+  template <typename... ARGS>
+  static void VMERF(ARGS... args) {
+    platform::dynload::vmsErf(args...);
+  }
 };
 
 template <>
@@ -223,6 +228,11 @@ struct CBlas<double> {
   static void VINV(ARGS... args) {
     platform::dynload::vdInv(args...);
   }
+
+  template <typename... ARGS>
+  static void VMERF(ARGS... args) {
+    platform::dynload::vmdErf(args...);
+  }
 };
 
 #else
@@ -625,6 +635,19 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
 #endif
 }
 
+template <>
+template <typename T>
+void Blas<platform::CPUDeviceContext>::VMERF(int n, const T *a, T *y,
+                                             int64_t mode) const {
+#ifdef PADDLE_WITH_MKLML
+  CBlas<T>::VMERF(n, a, y, mode);
+#else
+  for (int i = 0; i < n; ++i) {
+    y[i] = std::erf(a[i]);
+  }
+#endif
+}
+
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
index 5b7505f3c4acdef94fead04efd00b47825274117..43559940d925e6fff29f0c5c66ec1a3dc717aaf4 100644
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -96,8 +96,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
 
   std::vector<int> src_tz = framework::vectorize2int(x->dims());
 
-  auto src_format =
-      src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format();
+  auto src_format = x->format();
 
   const std::string key = gethash(src_tz, algorithm);
   const std::string key_src_data =
@@ -127,10 +126,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
 
   if (p_fwd == nullptr) {
     // create mkldnn memory for input X
-    auto src_md = platform::MKLDNNMemDesc(
-        src_tz, platform::MKLDNNGetDataType<T>(), src_format);
     auto src_memory = std::shared_ptr<memory>(
-        new memory({src_md, mkldnn_engine}, to_void_cast(x_data)));
+        new memory(x->get_mkldnn_prim_desc(), to_void_cast(x_data)));
     // save src_memory to be referred in backward path
     dev_ctx.SetBlob(key_src_mem, src_memory);
 
@@ -177,8 +174,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
   pipeline.push_back(*p_fwd);
   stream(stream::kind::eager).submit(pipeline).wait();
 
-  y->set_layout(DataLayout::kMKLDNN);
-  y->set_format(GetMKLDNNFormat(*dst_memory));
+  y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc());
 }
 
 template <typename T>
@@ -196,9 +192,6 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
 
   std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims());
 
-  auto diff_y_format =
-      diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format();
-
   const std::string key = gethash(diff_dst_tz, algorithm);
   const std::string key_src_data =
       key + ctx.op().Input("Out") + "@eltwise_fwd_src_data";
@@ -210,8 +203,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
       key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem";
   const std::string key_fwd_pd =
       key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd";
-  const std::string key_with_layouts =
-      key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format);
+  const std::string key_with_layouts = key + std::to_string(*p_src_layout) +
+                                       "-" + std::to_string(diff_y->format());
   const std::string key_diff_src_mem =
       key_with_layouts + "@eltwise_diff_src_mem";
   const std::string key_diff_dst_mem =
@@ -234,10 +227,8 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
 
   if (p_grad == nullptr) {
     // create mkldnn memory for input diff_y
-    auto diff_dst_md = platform::MKLDNNMemDesc(
-        diff_dst_tz, platform::MKLDNNGetDataType<T>(), diff_y_format);
     auto diff_dst_memory = std::shared_ptr<memory>(
-        new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data)));
+        new memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data)));
     dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory);
 
     // retrieve eltwise primitive desc from device context
@@ -281,8 +272,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
   pipeline.push_back(*p_grad);
   stream(stream::kind::eager).submit(pipeline).wait();
 
-  diff_x->set_layout(DataLayout::kMKLDNN);
-  diff_x->set_format(GetMKLDNNFormat(*diff_src_memory));
+  diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
 }
 
 template <typename T, mkldnn::algorithm algorithm>
diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
index bddca232e6c8a2a7fde998877006e37ee6d3d0dc..04e45d4853907bb7d6b5ce362892a2183fd4b60e 100644
--- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
@@ -206,17 +206,14 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu;
 
     // create mkldnn memory from input x tensor
-    mkldnn::memory::format input_format =
-        platform::MKLDNNFormatForSize(src_tz.size(), x->format());
 
     // keys for backward pass
     const std::string key = BatchNormMKLDNNHandler::GetHash(
-        src_tz, epsilon, flags, global_stats, input_format,
+        src_tz, epsilon, flags, global_stats, x->format(),
         ctx.op().Output("SavedMean"));
     const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
 
-    auto user_src_md = platform::MKLDNNMemDesc(
-        {src_tz}, platform::MKLDNNGetDataType<T>(), input_format);
+    auto user_src_md = x->get_mkldnn_prim_desc().desc();
 
     // create primitive descriptor for batch norm forward
     using bn_fwd_types = bn_type_traits<mkldnn::batch_normalization_forward>;
@@ -230,8 +227,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     BatchNormMKLDNNHandler handler(batch_norm_fwd_pd, dev_ctx, mkldnn_engine,
                                    key);
 
-    auto src_memory =
-        handler.AcquireSrcMemory(user_src_md, to_void_cast(x_data));
+    auto src_memory = handler.AcquireSrcMemory(x->get_mkldnn_prim_desc(),
+                                               to_void_cast(x_data));
 
     // crate mkldnn memory for weights(scale/shift)
     auto scaleshift_memory =
@@ -265,8 +262,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
           variance_memory, false);
     }
 
-    y->set_layout(DataLayout::kMKLDNN);
-    y->set_format(platform::GetMKLDNNFormat(*dst_memory));
+    y->set_mkldnn_prim_desc(dst_memory->get_primitive_desc());
 
     std::vector<mkldnn::primitive> pipeline;
     pipeline.push_back(*batch_norm_p);
@@ -336,9 +332,6 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
 
     using bn_bwd_types = bn_type_traits<mkldnn::batch_normalization_backward>;
 
-    mkldnn::memory::format dst_format =
-        platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format());
-
     mkldnn::memory::format input_format =
         platform::MKLDNNFormatForSize(src_tz.size(), x->format());
 
@@ -346,14 +339,14 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
 
     // keys from forward pass
     const std::string key = BatchNormMKLDNNHandler::GetHash(
-        src_tz, epsilon, flags, false, input_format,
+        src_tz, epsilon, flags, false, x->format(),
         ctx.op().Input("SavedMean"));
     const std::string key_batch_norm_fwd_pd = key + "@bn_fwd_pd";
 
     // keys for primitives reuse
     const std::string key_with_hash =
         key + BatchNormMKLDNNHandler::GetHash(src_tz, epsilon, flags, false,
-                                              input_format);
+                                              x->format());
     const std::string key_batch_norm_bwd_p =
         key_with_hash + "@batch_norm_bwd_p";
     const std::string key_batch_norm_src_mem_p =
@@ -373,9 +366,8 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
 
     primitive reorder_diff_dst;
     bool is_diff_dst_reordered = false;
-    auto user_diff_dst_memory = memory(
-        {{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine},
-        to_void_cast(diff_y_data));
+    auto user_diff_dst_memory =
+        memory(diff_y->get_mkldnn_prim_desc(), to_void_cast(diff_y_data));
 
     // MKLDNN requires a single piece of memory for scale and shift/bias data
     const size_t scaleshift_size = 2 * ic;
@@ -459,10 +451,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
       dev_ctx.SetBlob(key_batch_norm_diff_dst_mem_p, diff_dst_memory);
 
       // set layout/format of output tensors
-      diff_x->set_layout(DataLayout::kMKLDNN);
-      diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc()
-                             .desc()
-                             .data.format);
+      diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
     } else {
       // primitives already exist
       UpdateMemoryData(dev_ctx, key_batch_norm_src_mem_p, to_void_cast(x_data));
@@ -487,10 +476,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
       }
 
       // set layout/format of output tensors
-      diff_x->set_layout(DataLayout::kMKLDNN);
-      diff_x->set_format((memory::format)diff_src_memory->get_primitive_desc()
-                             .desc()
-                             .data.format);
+      diff_x->set_mkldnn_prim_desc(diff_src_memory->get_primitive_desc());
     }
 
     // execute optional reorder and batch_norm backward primitive
diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
index 7ad674056f0d753d79408a11eff1aca47a84998a..54c6a71111a2cc2f9e5004922ae5d3541a9d0a70 100644
--- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
@@ -47,11 +47,6 @@ static memory::primitive_desc CreateMemPrimDesc(const Tensor& input,
   return mem_prim_desc;
 }
 
-static mkldnn::memory::format GetDstMemFormat(
-    const concat::primitive_desc& concat_pd) {
-  return (memory::format)concat_pd.dst_primitive_desc().desc().data.format;
-}
-
 static platform::CPUPlace GetCpuPlace(
     const paddle::framework::ExecutionContext& ctx) {
   auto place = ctx.GetPlace();
@@ -139,8 +134,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     auto concat = prim_creator.CreateConcatPrimitive(concat_pd, output, place);
     stream(stream::kind::eager).submit({concat}).wait();
 
-    output->set_layout(DataLayout::kMKLDNN);
-    output->set_format(GetDstMemFormat(concat_pd));
+    output->set_mkldnn_prim_desc(concat_pd.dst_primitive_desc());
   }
 };
 }  // namespace operators
diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
index 7ac64e6ba134c034acc58c7310cd51da0f03d16d..14ca3e8073b9512732876e512a30968b15884495 100644
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -282,8 +282,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     pipeline.push_back(*conv_p);
     stream(stream::kind::eager).submit(pipeline).wait();
 
-    auto dst_mpd = dst_memory_p->get_primitive_desc();
-    output->set_mkldnn_prim_desc(dst_mpd);
+    output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc());
   }
   void ComputeINT8(const paddle::framework::ExecutionContext& ctx) const {
     const bool is_test = ctx.Attr<bool>("is_test");
@@ -972,8 +971,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
 
       pipeline.push_back(*conv_bwd_data_p);
 
-      input_grad->set_layout(DataLayout::kMKLDNN);
-      input_grad->set_format(GetMKLDNNFormat(*diff_src_memory_p));
+      input_grad->set_mkldnn_prim_desc(diff_src_memory_p->get_primitive_desc());
     }
     stream(stream::kind::eager).submit(pipeline).wait();
   }
@@ -991,12 +989,12 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
 
 REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
                                     ::paddle::platform::CPUPlace, U8,
-                                    ops::kConvMKLDNNFP32,
+                                    ops::kConvMKLDNNINT8,
                                     ops::ConvMKLDNNOpKernel<uint8_t, float>);
 
 REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN,
                                     ::paddle::platform::CPUPlace, S8,
-                                    ops::kConvMKLDNNFP32,
+                                    ops::kConvMKLDNNINT8,
                                     ops::ConvMKLDNNOpKernel<int8_t, float>);
 
 REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d_grad, MKLDNN,
diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
index 317d4cebe26b81ff03c212e6328233d5152ed1b4..79a0c5c7683d677daeb4feea10deab86407f944c 100644
--- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
@@ -221,8 +221,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     pipeline.push_back(*conv_p);
     mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
 
-    output->set_layout(DataLayout::kMKLDNN);
-    output->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
+    output->set_mkldnn_prim_desc(dst_memory_p->get_primitive_desc());
   }
 
  private:
diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
index 262b7408a7f5f65c4d97120914c16f38ce5fdbe7..accc9a9d71ffccf2812d57a7516eaf7e0f83275c 100644
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/dequantize_op.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
 
 namespace paddle {
 namespace operators {
@@ -30,6 +31,18 @@ using framework::DataLayout;
 using mkldnn::stream;
 using platform::GetMKLDNNFormat;
 
+std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
+                      const mkldnn::memory::data_type& src_dt,
+                      const std::vector<int>& src_tz, const float scale_data) {
+  std::string key;
+  key.reserve(platform::MKLDNNHandler::MaxKeyLength);
+  platform::MKLDNNHandler::AppendKey(&key, std::to_string(src_dt));
+  platform::MKLDNNHandler::AppendKeyDims(&key, src_tz);
+  platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data));
+  platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output"));
+  return key;
+}
+
 template <typename T>
 class DeQuantOpKernel : public framework::OpKernel<T> {
  public:
@@ -51,31 +64,55 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
     mkldnn::memory::data_type src_dt =
         paddle::framework::ToMKLDNNDataType(input->type());
     mkldnn::memory::format src_fmt = input->format();
+    std::string key = CreateKey(ctx, src_dt, src_tz, reorder_scale[0]);
+    const std::string key_prim = key + "@reorder_p";
+    const std::string key_src_mem = key + "@src_mem";
+    const std::string key_dst_mem = key + "@dst_mem";
+
+    std::shared_ptr<mkldnn::memory> src_memory;
+    std::shared_ptr<mkldnn::memory> dst_memory;
+    std::shared_ptr<reorder> reorder_p;
+    reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
+
+    if (reorder_p == nullptr) {
+      mkldnn::primitive_attr attri;
+      int mask = 0;
+      attri.set_output_scales(mask, reorder_scale);
+
+      auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
+      auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
+      src_memory =
+          std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
+      std::shared_ptr<primitive::at> src_memory_p =
+          std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
+
+      auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
+                                            memory::format::nchw);
+      auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
+      dst_memory = std::make_shared<mkldnn::memory>(
+          dst_pd, to_void_cast<float>(output_data));
+
+      auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
+          new reorder::primitive_desc(src_pd, dst_pd, attri));
+      reorder_p = std::shared_ptr<reorder>(
+          new reorder(*reorder_pd, *src_memory_p, *dst_memory));
+      dev_ctx.SetBlob(key_prim, reorder_p);
+      dev_ctx.SetBlob(key_src_mem, src_memory);
+      dev_ctx.SetBlob(key_dst_mem, dst_memory);
+    } else {
+      src_memory = std::static_pointer_cast<mkldnn::memory>(
+          dev_ctx.GetBlob(key_src_mem));
+      src_memory->set_data_handle(to_void_cast<T>(input_data));
+
+      dst_memory = std::static_pointer_cast<mkldnn::memory>(
+          dev_ctx.GetBlob(key_dst_mem));
+      dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
+    }
 
-    mkldnn::primitive_attr attri;
-    int mask = 0;
-    attri.set_output_scales(mask, reorder_scale);
-
-    auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
-    auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
-    auto src_memory =
-        std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
-    std::shared_ptr<primitive::at> src_memory_p =
-        std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
-
-    auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32,
-                                          memory::format::nchw);
-    auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
-    auto dst_memory = mkldnn::memory(dst_pd, to_void_cast<float>(output_data));
-
-    auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
-        new reorder::primitive_desc(src_pd, dst_pd, attri));
-    auto reorder_p = std::shared_ptr<reorder>(
-        new reorder(*reorder_pd, *src_memory_p, dst_memory));
     pipeline.push_back(*reorder_p);
     stream(stream::kind::eager).submit(pipeline).wait();
 
-    output->set_format(GetMKLDNNFormat(dst_memory));
+    output->set_format(GetMKLDNNFormat(*dst_memory));
   }
 };
 
diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
index 097ba01d401dbc7969e30f576cac2567c874ed99..4ff27ab12280b56abdf72056fe69ec713f2f2f46 100644
--- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
@@ -81,10 +81,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
     auto e_mid = framework::EigenTensor<T, 4>::From(*mid);
     e_mid = e_mid.constant(k);
 
-    auto dims = paddle::framework::vectorize2int(x->dims());
-
-    auto src_md = paddle::platform::MKLDNNMemDesc(
-        dims, mkldnn::memory::data_type::f32, x->format());
+    auto src_md = x->get_mkldnn_prim_desc().desc();
 
     auto forward_desc = mkldnn::lrn_forward::desc{mkldnn::prop_kind::forward,
                                                   mkldnn::lrn_across_channels,
@@ -94,7 +91,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
                                                   beta,
                                                   k};
 
-    auto src_memory_pd = mkldnn::memory::primitive_desc{src_md, mkldnn_engine};
+    auto src_memory_pd = x->get_mkldnn_prim_desc();
 
     if (!is_test) {
       const std::string key = ctx.op().Output("Out");
@@ -111,16 +108,15 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
       src_memory->set_data_handle(
           static_cast<void*>(const_cast<T*>(input_data)));
 
-      auto dst_memory = mkldnn::memory(forward_pd->dst_primitive_desc(),
-                                       static_cast<void*>(output_data));
+      auto dst_memory_pd = forward_pd->dst_primitive_desc();
+      auto dst_memory =
+          mkldnn::memory(dst_memory_pd, static_cast<void*>(output_data));
       auto workspace_memory = insert_to_context<mkldnn::memory>(
           key_workspace_memory, dev_ctx,
           forward_pd->workspace_primitive_desc());
 
       run_primitive(*forward_pd, *src_memory, *workspace_memory, dst_memory);
-
-      out->set_layout(framework::DataLayout::kMKLDNN);
-      out->set_format(platform::GetMKLDNNFormat(dst_memory));
+      out->set_mkldnn_prim_desc(dst_memory_pd);
     } else {
       auto forward_pd =
           mkldnn::lrn_forward::primitive_desc{forward_desc, mkldnn_engine};
@@ -128,13 +124,12 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
           src_memory_pd, static_cast<void*>(const_cast<T*>(input_data))};
       auto workspace_memory =
           mkldnn::memory{forward_pd.workspace_primitive_desc()};
+      auto dst_memory_pd = forward_pd.dst_primitive_desc();
       auto dst_memory = mkldnn::memory(forward_pd.dst_primitive_desc(),
                                        static_cast<void*>(output_data));
 
       run_primitive(forward_pd, src_memory, workspace_memory, dst_memory);
-
-      out->set_layout(framework::DataLayout::kMKLDNN);
-      out->set_format(platform::GetMKLDNNFormat(dst_memory));
+      out->set_mkldnn_prim_desc(dst_memory_pd);
     }
   }
 };
diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
index 38a65b50bd22354bea54819e8e71015202e96e9f..5d8e81921157cbdf35f7016741ab45c362b7261f 100644
--- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/operators/pool_op.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
 
 namespace paddle {
 namespace operators {
@@ -29,23 +30,23 @@ using mkldnn::stream;
 using platform::to_void_cast;
 
 // Generate keys for storing/retriving primitives for this operator
-// TODO(jczaja): Make hashing function more optimial
-static std::string gethash(const memory::dims& input_dims,
-                           const std::string& pooling_type,
-                           const std::vector<int>& ksize,
-                           const std::vector<int>& strides,
-                           const std::vector<int>& paddings,
-                           const memory::data_type& dt,
-                           const std::string& suffix) {
-  auto dims2str = [](const memory::dims& operand_dims) {
-    std::string dstr = "";
-    for (size_t i = 0; i < operand_dims.size(); ++i) {
-      dstr += std::to_string(operand_dims[i]) + "-";
-    }
-    return dstr;
-  };
-  return dims2str(input_dims) + dims2str(ksize) + dims2str(strides) +
-         dims2str(paddings) + std::to_string(dt) + pooling_type + suffix;
+std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
+                      const memory::dims& input_dims,
+                      const std::string& pooling_type,
+                      const std::vector<int>& ksize,
+                      const std::vector<int>& strides,
+                      const std::vector<int>& paddings,
+                      const memory::data_type& dt, const std::string& suffix) {
+  std::string key;
+  key.reserve(platform::MKLDNNHandler::MaxKeyLength);
+  platform::MKLDNNHandler::AppendKeyDims(&key, input_dims);
+  platform::MKLDNNHandler::AppendKey(&key, pooling_type);
+  platform::MKLDNNHandler::AppendKeyVec(&key, ksize);
+  platform::MKLDNNHandler::AppendKeyVec(&key, strides);
+  platform::MKLDNNHandler::AppendKeyVec(&key, paddings);
+  platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
+  platform::MKLDNNHandler::AppendKey(&key, suffix);
+  return key;
 }
 
 static inline int ComputeCeiledOutput(int input_size, int kernel_size,
@@ -114,8 +115,8 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     mkldnn::memory::data_type dt =
         paddle::framework::ToMKLDNNDataType(input->type());
-    const std::string key = gethash(src_tz, pooling_type, ksize, strides,
-                                    paddings, dt, ctx.op().Output("Out"));
+    const std::string key = CreateKey(ctx, src_tz, pooling_type, ksize, strides,
+                                      paddings, dt, ctx.op().Output("Out"));
     const std::string key_pool_p = key + "@pool_p";
     const std::string key_pool_pd = key + "@pool_pd";
     const std::string key_pool_src_mem_p = key + "@pool_src_mem_p";
@@ -294,8 +295,8 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
     // Get an unique name from "argument" name of "Out" variable
     // This name will be used as key when referring info from device context
     const std::string key =
-        gethash(diff_src_tz, pooling_type, ksize, strides, paddings,
-                memory::data_type::f32, ctx.op().Input("Out"));
+        CreateKey(ctx, diff_src_tz, pooling_type, ksize, strides, paddings,
+                  memory::data_type::f32, ctx.op().Input("Out"));
     const std::string key_pool_bwd_p = key + "@pool_bwd_p";
     const std::string key_pool_diff_src_mem_p = key + "@pool_diff_src_mem_p";
     const std::string key_pool_diff_dst_mem_p = key + "@pool_diff_dst_mem_p";
diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
index 0638e42873376bcec6e4de61494da46d1f0073d1..04cd60be964a3967a45e73122324c4b3fdf0b3d0 100644
--- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
@@ -30,6 +30,18 @@ using framework::DataLayout;
 using mkldnn::stream;
 using platform::GetMKLDNNFormat;
 
+std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
+                      const std::vector<int>& src_tz, const float scale_data,
+                      const bool is_negative) {
+  std::string key;
+  key.reserve(platform::MKLDNNHandler::MaxKeyLength);
+  platform::MKLDNNHandler::AppendKeyDims(&key, src_tz);
+  platform::MKLDNNHandler::AppendKey(&key, std::to_string(scale_data));
+  platform::MKLDNNHandler::AppendKey(&key, std::to_string(is_negative));
+  platform::MKLDNNHandler::AppendKey(&key, ctx.op().Output("Output"));
+  return key;
+}
+
 template <typename T>
 class QuantOpKernel : public framework::OpKernel<T> {
  public:
@@ -47,32 +59,61 @@ class QuantOpKernel : public framework::OpKernel<T> {
 
     const T* input_data = input->data<T>();
 
-    mkldnn::primitive_attr attri;
-    int mask = 0;
-    attri.set_output_scales(mask, {scale_data});
-
-    auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
-                                          input->format());
-    auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
-    auto src_memory =
-        std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
-    std::shared_ptr<primitive::at> src_memory_p =
-        std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
-
     bool is_negative = ctx.Attr<bool>("is_negative_input");
-    std::shared_ptr<mkldnn::memory::primitive_desc> dst_pd;
+    std::string key = CreateKey(ctx, src_tz, scale_data, is_negative);
+    const std::string key_prim = key + "@reorder_p";
+    const std::string key_src_mem = key + "@src_mem";
+    const std::string key_dst_mem = key + "@dst_mem";
+
+    std::shared_ptr<mkldnn::memory> src_memory;
     std::shared_ptr<mkldnn::memory> dst_memory;
-    if (is_negative) {
-      platform::ConvMKLDNNHandler::SetDstMemory<int8_t>(
-          ctx, output, dst_tz, engine, dst_pd, dst_memory);
+    std::shared_ptr<reorder> reorder_p;
+    reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
+
+    if (reorder_p == nullptr) {
+      mkldnn::primitive_attr attri;
+      int mask = 0;
+      attri.set_output_scales(mask, {scale_data});
+
+      auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
+                                            input->format());
+      auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
+      src_memory =
+          std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
+      std::shared_ptr<primitive::at> src_memory_p =
+          std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
+
+      std::shared_ptr<mkldnn::memory::primitive_desc> dst_pd;
+      if (is_negative) {
+        platform::ConvMKLDNNHandler::SetDstMemory<int8_t>(
+            ctx, output, dst_tz, engine, dst_pd, dst_memory);
+      } else {
+        platform::ConvMKLDNNHandler::SetDstMemory<uint8_t>(
+            ctx, output, dst_tz, engine, dst_pd, dst_memory);
+      }
+      auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
+          new reorder::primitive_desc(src_pd, *dst_pd, attri));
+      reorder_p = std::shared_ptr<reorder>(
+          new reorder(*reorder_pd, *src_memory_p, *dst_memory));
+
+      dev_ctx.SetBlob(key_prim, reorder_p);
+      dev_ctx.SetBlob(key_src_mem, src_memory);
+      dev_ctx.SetBlob(key_dst_mem, dst_memory);
     } else {
-      platform::ConvMKLDNNHandler::SetDstMemory<uint8_t>(
-          ctx, output, dst_tz, engine, dst_pd, dst_memory);
+      src_memory = std::static_pointer_cast<mkldnn::memory>(
+          dev_ctx.GetBlob(key_src_mem));
+      src_memory->set_data_handle(to_void_cast<T>(input_data));
+
+      dst_memory = std::static_pointer_cast<mkldnn::memory>(
+          dev_ctx.GetBlob(key_dst_mem));
+      auto place = ctx.GetPlace();
+      if (is_negative) {
+        dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
+      } else {
+        dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
+      }
     }
-    auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
-        new reorder::primitive_desc(src_pd, *dst_pd, attri));
-    auto reorder_p = std::shared_ptr<reorder>(
-        new reorder(*reorder_pd, *src_memory_p, *dst_memory));
+
     pipeline.push_back(*reorder_p);
     stream(stream::kind::eager).submit(pipeline).wait();
     output->set_layout(DataLayout::kMKLDNN);
diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
index dc1176f0848b93dd6872f676c3a71dab4f3455fd..0ce552219458859e147ba207c94270bf84a1fe75 100644
--- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
@@ -158,6 +158,14 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
     auto softmax_p =
         handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p);
 
+    // We cannot use softmax_dst_memory_p to get prim desc as
+    // it contains flattened dims (2D) while output tensor can
+    // have 2,3,4+ dims
+    auto output_mem_pd = paddle::platform::create_prim_desc_from_dims(
+        paddle::framework::vectorize2int(output->dims()),
+        mkldnn::memory::format::blocked);
+    output->set_mkldnn_prim_desc(output_mem_pd);
+
     std::vector<primitive> pipeline{
         *(static_cast<softmax_forward::primitive*>(softmax_p.get()))};
     stream(stream::kind::eager).submit(pipeline).wait();
diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
index fe4131df2c77ed28cd36f23002d000dac3e8a129..aef5b7d4311adfedb3db157f17506c3a2c76fbf6 100644
--- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
@@ -79,15 +79,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
       memory::format input_format = input0.format();
 
-      if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
-                                 input_format == memory::format::nhwc)) {
-        input_format = memory::format::x;
-      }
-      if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
-                                 input_format == memory::format::nhwc)) {
-        input_format = memory::format::nc;
-      }
-
       for (int i = 0; i < N; i++) {
         PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
                        "all inputs must be all LoDTensors");
@@ -115,12 +106,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
           memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
 
       auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
-
+      auto dst_mem_pd = sum_pd.dst_primitive_desc();
       std::shared_ptr<memory> dst_mem;
       if (in_place) {
-        dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
+        dst_mem.reset(new memory(dst_mem_pd));
       } else {
-        dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
+        dst_mem.reset(new memory(dst_mem_pd, output_data));
       }
       std::vector<mkldnn::primitive::at> inputs;
       for (size_t i = 0; i < srcs_mem.size(); ++i) {
@@ -145,107 +136,11 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
       if (in_place) pipeline.push_back(reorder_prim);
       stream(stream::kind::eager).submit(pipeline).wait();
 
-      output->set_layout(DataLayout::kMKLDNN);
-      output->set_format(output_format);
-    } else if (out_var->IsType<framework::SelectedRows>()) {
-      // TODO(@mozga-intel) Add MKLDNN SelectedRows support
-      std::unique_ptr<framework::SelectedRows> in0;
-      if (in_place) {
-        // If is in_place, we store the input[0] to in0
-        auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
-        auto& rows = in_sel0.rows();
-        in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
-        in0->mutable_value()->ShareDataWith(in_sel0.value());
-      }
-
-      auto get_selected_row = [&](size_t i) -> const SelectedRows& {
-        if (i == 0 && in0) {
-          return *in0;
-        } else {
-          return in_vars[i]->Get<SelectedRows>();
-        }
-      };
-      auto* out = ctx.Output<SelectedRows>("Out");
-      out->mutable_rows()->clear();
-      auto* out_value = out->mutable_value();
-
-      // Runtime InferShape
-      size_t first_dim = 0;
-      for (int i = 0; i < N; i++) {
-        auto& sel_row = get_selected_row(i);
-        first_dim += sel_row.rows().size();
-      }
-
-      std::vector<int64_t> in_dim;
-      for (int i = 0; i < N; i++) {
-        auto& sel_row = get_selected_row(i);
-        if (sel_row.rows().size() > 0) {
-          in_dim = framework::vectorize(sel_row.value().dims());
-          break;
-        }
-      }
-
-      if (in_dim.empty()) {
-        VLOG(3) << "WARNING: all the inputs are empty";
-        in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
-      } else {
-        in_dim[0] = static_cast<int64_t>(first_dim);
-      }
-
-      in_dim[0] = static_cast<int64_t>(first_dim);
-
-      out_value->Resize(framework::make_ddim(in_dim));
-
-      out_value->mutable_data<T>(ctx.GetPlace());
-
-      // if all the input sparse vars are empty, no need to
-      // merge these vars.
-      if (first_dim == 0UL) {
-        return;
-      }
-
-      math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
-      int64_t offset = 0;
-      for (int i = 0; i < N; i++) {
-        auto& sel_row = get_selected_row(i);
-        if (sel_row.rows().size() == 0) {
-          continue;
-        }
-        PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
-        functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
-                offset, out);
-        offset += sel_row.value().numel();
-      }
-    } else if (out_var->IsType<framework::LoDTensorArray>()) {
-      // TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
-      auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
-      for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
-        PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
-                       "Only support all inputs are TensorArray");
-        auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
-
-        for (size_t i = 0; i < in_array.size(); ++i) {
-          if (in_array[i].numel() != 0) {
-            if (i >= out_array.size()) {
-              out_array.resize(i + 1);
-            }
-            if (out_array[i].numel() == 0) {
-              framework::TensorCopy(in_array[i], in_array[i].place(),
-                                    ctx.device_context(), &out_array[i]);
-              out_array[i].set_lod(in_array[i].lod());
-            } else {
-              PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
-              auto in = EigenVector<T>::Flatten(in_array[i]);
-              auto result = EigenVector<T>::Flatten(out_array[i]);
-              result.device(*ctx.template device_context<MKLDNNDeviceContext>()
-                                 .eigen_device()) = result + in;
-            }
-          }
-        }
-      }
-    } else {
-      PADDLE_THROW("Unexpected branch, output variable type is %s",
-                   framework::ToTypeName(out_var->Type()));
+      output->set_mkldnn_prim_desc(dst_mem_pd);
+    } else {  // Fallback to naive version
+      // TODO(@mozga-intel) Add MKLDNN SelectedRows & LoDTensorArray support
+      SumKernel<CPUDeviceContext, T> reference_kernel;
+      reference_kernel.Compute(ctx);
     }
   }
 };
diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.cc b/paddle/fluid/operators/ngraph/ngraph_bridge.cc
index 996376c53f07b5c26eccad382e734f187f75f5a1..dafc31b546e3ca6d8dc8d5634dd51cff9fe5bfb7 100644
--- a/paddle/fluid/operators/ngraph/ngraph_bridge.cc
+++ b/paddle/fluid/operators/ngraph/ngraph_bridge.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <algorithm>
 #include <functional>
+#include <memory>
 #include <vector>
 
 #include "ngraph/ngraph.hpp"
diff --git a/paddle/fluid/operators/ngraph/ngraph_bridge.h b/paddle/fluid/operators/ngraph/ngraph_bridge.h
index 952d5b0b4362aa1c1112782885ab5d30698f5cff..b609c284959238689eaf35c87d1bc4e4330b5c1f 100644
--- a/paddle/fluid/operators/ngraph/ngraph_bridge.h
+++ b/paddle/fluid/operators/ngraph/ngraph_bridge.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <algorithm>
 #include <map>
+#include <memory>
 #include <string>
 #include <unordered_map>
 
diff --git a/paddle/fluid/operators/ngraph/ngraph_engine.cc b/paddle/fluid/operators/ngraph/ngraph_engine.cc
index 660a3298cbe4bf5d83851a916bb3ea8d260214a3..41037d9039bb53038af80eafa269ee9246dc9980 100644
--- a/paddle/fluid/operators/ngraph/ngraph_engine.cc
+++ b/paddle/fluid/operators/ngraph/ngraph_engine.cc
@@ -16,7 +16,10 @@ limitations under the License. */
 
 #include <algorithm>
 #include <map>
+#include <memory>
 #include <string>
+#include <unordered_set>
+#include <utility>
 #include <vector>
 
 #include "paddle/fluid/framework/block_desc.h"
@@ -483,7 +486,8 @@ void NgraphEngine::Run(const framework::Scope& scope,
     }
   }
 
-  backend_->call(backend_->compile(ngraph_function_), t_out, t_in);
+  auto handle = backend_->compile(ngraph_function_);
+  handle->call_with_validate(t_out, t_in);
 }  // NgraphEngine::Run
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/ngraph/ops/accuracy_op.h b/paddle/fluid/operators/ngraph/ops/accuracy_op.h
index d90ec97298b0f6fb8480e97ca57cb427784261e4..0da57517a733985ce1208732f13b08cd7bb8ca30 100644
--- a/paddle/fluid/operators/ngraph/ops/accuracy_op.h
+++ b/paddle/fluid/operators/ngraph/ops/accuracy_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/activation_op.h b/paddle/fluid/operators/ngraph/ops/activation_op.h
index d1b0b80d227a5042219a17e35255617726aa8042..d04dbf648616d9957e2dfb0c416b624540747fe2 100644
--- a/paddle/fluid/operators/ngraph/ops/activation_op.h
+++ b/paddle/fluid/operators/ngraph/ops/activation_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/adam_op.h b/paddle/fluid/operators/ngraph/ops/adam_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..beba5d3d237d4dea578651f440b65a15251d5ad2
--- /dev/null
+++ b/paddle/fluid/operators/ngraph/ops/adam_op.h
@@ -0,0 +1,79 @@
+/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+#include "ngraph/ngraph.hpp"
+#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
+#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
+#include "paddle/fluid/platform/ngraph_helper.h"
+
+namespace paddle {
+namespace operators {
+namespace ngraphs {
+
+void BuildAdamNode(
+    const std::shared_ptr<framework::OperatorBase>& op,
+    std::shared_ptr<
+        std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
+        ngb_node_map) {
+  auto op_attrs = framework::AttrReader(op->Attrs());
+  auto beta1pow = platform::GetInputNode(op, "Beta1Pow", ngb_node_map);
+  auto beta2pow = platform::GetInputNode(op, "Beta2Pow", ngb_node_map);
+  auto grad = platform::GetInputNode(op, "Grad", ngb_node_map);
+  auto learning_rate = platform::GetInputNode(op, "LearningRate", ngb_node_map);
+  auto moment1 = platform::GetInputNode(op, "Moment1", ngb_node_map);
+  auto moment2 = platform::GetInputNode(op, "Moment2", ngb_node_map);
+  auto param = platform::GetInputNode(op, "Param", ngb_node_map);
+
+  auto epsilon = op_attrs.Get<float>("epsilon");
+  auto beta2 = op_attrs.Get<float>("beta2");
+  auto beta1 = op_attrs.Get<float>("beta1");
+
+  auto moment1_shape = moment1->get_shape();
+  auto grad_shape = grad->get_shape();
+
+  auto moment1out = std::make_shared<ngraph::op::Add>(
+      ElementwiseScalar<ngraph::op::Multiply>(beta1, moment1),
+      ElementwiseScalar<ngraph::op::Multiply>(1. - beta1, grad));
+
+  auto grad_square = std::make_shared<ngraph::op::Multiply>(grad, grad);
+  auto moment2out = std::make_shared<ngraph::op::Add>(
+      ElementwiseScalar<ngraph::op::Multiply>(beta2, moment2),
+      ElementwiseScalar<ngraph::op::Multiply>(1. - beta2, grad_square));
+  auto node_sqrt = std::make_shared<ngraph::op::Sqrt>(
+      ElementwiseScalar<ngraph::op::Subtract>(1., beta2pow));
+  auto lr = std::make_shared<ngraph::op::Divide>(
+      node_sqrt, ElementwiseScalar<ngraph::op::Subtract>(1., beta1pow));
+  auto updated_lr = std::make_shared<ngraph::op::Multiply>(learning_rate, lr);
+
+  auto moment2_sqrt = std::make_shared<ngraph::op::Sqrt>(moment2out);
+  auto param_grad = std::make_shared<ngraph::op::Divide>(
+      moment1out, ElementwiseScalar<ngraph::op::Add>(epsilon, moment2_sqrt));
+  auto delta = ElementwiseScalar<ngraph::op::Multiply>(updated_lr, param_grad);
+  auto param_out = std::make_shared<ngraph::op::Subtract>(param, delta);
+
+  platform::SetOutputNode(op, "Moment1Out", moment1out, ngb_node_map);
+  platform::SetOutputNode(op, "Moment2Out", moment2out, ngb_node_map);
+  platform::SetOutputNode(op, "ParamOut", param_out, ngb_node_map);
+}
+}  // namespace ngraphs
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_NG_OP(adam, BuildAdamNode);
diff --git a/paddle/fluid/operators/ngraph/ops/batch_norm_op.h b/paddle/fluid/operators/ngraph/ops/batch_norm_op.h
index 2d638bb53f084ee75014d64302ec3d86b3bcf26f..01fe78cdb24652429f713d09ea2abb8c73bbddf5 100644
--- a/paddle/fluid/operators/ngraph/ops/batch_norm_op.h
+++ b/paddle/fluid/operators/ngraph/ops/batch_norm_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "ngraph/ngraph.hpp"
diff --git a/paddle/fluid/operators/ngraph/ops/binary_unary_op.h b/paddle/fluid/operators/ngraph/ops/binary_unary_op.h
index 375f188286c123b1d652f8780989404760c8e1a4..2d11775849a778262dcd3e36ff35d8851fb350f1 100644
--- a/paddle/fluid/operators/ngraph/ops/binary_unary_op.h
+++ b/paddle/fluid/operators/ngraph/ops/binary_unary_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
 #include "paddle/fluid/platform/ngraph_helper.h"
diff --git a/paddle/fluid/operators/ngraph/ops/concat_op.h b/paddle/fluid/operators/ngraph/ops/concat_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..27d796851501b9158e1ce7f6415b4d5373e88e2d
--- /dev/null
+++ b/paddle/fluid/operators/ngraph/ops/concat_op.h
@@ -0,0 +1,50 @@
+/*Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "ngraph/ngraph.hpp"
+#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
+#include "paddle/fluid/platform/ngraph_helper.h"
+
+namespace paddle {
+namespace operators {
+namespace ngraphs {
+
+void BuildConcatNode(
+    const std::shared_ptr<framework::OperatorBase>& op,
+    std::shared_ptr<
+        std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
+        ngb_node_map) {
+  std::vector<std::shared_ptr<ngraph::Node>> args;
+  for (auto& var_name_item : op->Inputs()) {
+    for (auto& var_name : var_name_item.second) {
+      auto& node0 = ngb_node_map->at(var_name);
+      args.push_back(node0);
+    }
+  }
+  auto op_attrs = framework::AttrReader(op->Attrs());
+  const size_t axis = op_attrs.Get<int>("axis");
+  auto out = std::make_shared<ngraph::op::Concat>(args, axis);
+  platform::SetOutputNode(op, "Out", out, ngb_node_map);
+}
+}  // namespace ngraphs
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_NG_OP(concat, BuildConcatNode);
diff --git a/paddle/fluid/operators/ngraph/ops/conv2d_op.h b/paddle/fluid/operators/ngraph/ops/conv2d_op.h
index d664825c53ebf17435a0ec532969978abe6d30ca..be766ebeb4796be102c917296238b8ab14710131 100644
--- a/paddle/fluid/operators/ngraph/ops/conv2d_op.h
+++ b/paddle/fluid/operators/ngraph/ops/conv2d_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h b/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
index 3ab158f3e13a33bdb7e423919c7592831fa9831a..be36b9d21ef6ebe5c11d783462e7dc564afe2aba 100644
--- a/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
+++ b/paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
@@ -15,7 +15,9 @@ limitations under the License. */
 #pragma once
 
 #include <functional>
+#include <memory>
 #include <string>
+#include <unordered_map>
 
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h b/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h
index fb796c336a9b45966a0ff703286faa8b61752483..d7485a706a193a52113cb993a3604c444b4303c0 100644
--- a/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h
+++ b/paddle/fluid/operators/ngraph/ops/elementwise_add_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "ngraph/ngraph.hpp"
diff --git a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h
index bc958f2ba27cf929408d56d41bf22976caf7d6ae..42c2df5259242b7ae28613ab12c237834febc574 100644
--- a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h
+++ b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/mean_op.h b/paddle/fluid/operators/ngraph/ops/mean_op.h
index f839d9978d71c2967a7f2c2f22622dc615907831..86e697d260eb0f26428258b5faea958a7319948c 100644
--- a/paddle/fluid/operators/ngraph/ops/mean_op.h
+++ b/paddle/fluid/operators/ngraph/ops/mean_op.h
@@ -15,7 +15,9 @@ limitations under the License. */
 #pragma once
 
 #include <functional>
+#include <memory>
 #include <string>
+#include <unordered_map>
 
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
diff --git a/paddle/fluid/operators/ngraph/ops/momentum_op.h b/paddle/fluid/operators/ngraph/ops/momentum_op.h
index b8291a08a28b585a7ceb67642ba28c3314195790..84bddacba89d2921bca4915af7f64dcfbfdd42db 100644
--- a/paddle/fluid/operators/ngraph/ops/momentum_op.h
+++ b/paddle/fluid/operators/ngraph/ops/momentum_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/mul_op.h b/paddle/fluid/operators/ngraph/ops/mul_op.h
index 98c70a1a99aa899ed8fdd3c4674668cefd14c4ae..d13665864b8950436298b7cf685c803593007803 100644
--- a/paddle/fluid/operators/ngraph/ops/mul_op.h
+++ b/paddle/fluid/operators/ngraph/ops/mul_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
 #include "paddle/fluid/platform/ngraph_helper.h"
diff --git a/paddle/fluid/operators/ngraph/ops/pool2d_op.h b/paddle/fluid/operators/ngraph/ops/pool2d_op.h
index a6371372ef10c093c41153cb0dc73f4f9e95687f..c7b9c9316171a448d16ed68339f5754d25f3cabd 100644
--- a/paddle/fluid/operators/ngraph/ops/pool2d_op.h
+++ b/paddle/fluid/operators/ngraph/ops/pool2d_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "ngraph/ngraph.hpp"
diff --git a/paddle/fluid/operators/ngraph/ops/scale_op.h b/paddle/fluid/operators/ngraph/ops/scale_op.h
index a334192419f572c429f5842cd9e418d8945eb0ef..1461b85b16ece79548f3ca95be811fb31136c610 100644
--- a/paddle/fluid/operators/ngraph/ops/scale_op.h
+++ b/paddle/fluid/operators/ngraph/ops/scale_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
diff --git a/paddle/fluid/operators/ngraph/ops/softmax_op.h b/paddle/fluid/operators/ngraph/ops/softmax_op.h
index 1df6418de06d000892d2802596df61320fcdc759..7d5720c460c4194ce06670a715b8d7ff4435bb2a 100644
--- a/paddle/fluid/operators/ngraph/ops/softmax_op.h
+++ b/paddle/fluid/operators/ngraph/ops/softmax_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
diff --git a/paddle/fluid/operators/ngraph/ops/sum_op.h b/paddle/fluid/operators/ngraph/ops/sum_op.h
index 97f4ce64aa58bfa8cb70c36f9a12b7b8135da637..ab8cdb8f4d847c0acb60b39d07dc83f085b60bbd 100644
--- a/paddle/fluid/operators/ngraph/ops/sum_op.h
+++ b/paddle/fluid/operators/ngraph/ops/sum_op.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <vector>
 
 #include "ngraph/ngraph.hpp"
+#include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
 #include "paddle/fluid/platform/ngraph_helper.h"
 
 namespace paddle {
@@ -53,3 +54,5 @@ void BuildSumNode(
 }  // namespace ngraphs
 }  // namespace operators
 }  // namespace paddle
+
+REGISTER_NG_OP(sum, BuildSumNode);
diff --git a/paddle/fluid/operators/ngraph/ops/top_k_op.h b/paddle/fluid/operators/ngraph/ops/top_k_op.h
index 6d10faa7c2efb9cbd87fa8ef1c6ecb4fa350d8f6..cdc26f6afd58700c3a1f57fa955d60bc8925d2d1 100644
--- a/paddle/fluid/operators/ngraph/ops/top_k_op.h
+++ b/paddle/fluid/operators/ngraph/ops/top_k_op.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include <memory>
 #include <string>
+#include <unordered_map>
 #include "ngraph/ngraph.hpp"
 #include "paddle/fluid/operators/ngraph/ops/op_bridge.h"
 #include "paddle/fluid/platform/ngraph_helper.h"
diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h
index 98bae5e1d329005f9463fd7bb0751c44952dea88..c9c9f530fe846c1713ad176e05a377996d04470b 100644
--- a/paddle/fluid/operators/optimizers/sgd_op.h
+++ b/paddle/fluid/operators/optimizers/sgd_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/selected_rows.h"
+#include "paddle/fluid/operators/jit/kernels.h"
 
 namespace paddle {
 namespace operators {
@@ -32,53 +33,57 @@ class SGDOpKernel : public framework::OpKernel<T> {
     if (param_var->IsType<framework::LoDTensor>()) {
       const auto *param = ctx.Input<framework::Tensor>("Param");
       auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
-
       // Actually, all tensors are LoDTensor except SelectedRows.
       if (grad_var->IsType<framework::LoDTensor>()) {
-        param_out->mutable_data<T>(ctx.GetPlace());
         const auto *grad = ctx.Input<framework::Tensor>("Grad");
-
-        auto p = framework::EigenVector<T>::Flatten(*param);
-        auto g = framework::EigenVector<T>::Flatten(*grad);
-        auto o = framework::EigenVector<T>::Flatten(*param_out);
-        auto *lr = learning_rate->data<T>();
-
-        o = p - lr[0] * g;
+        auto sz = param_out->numel();
+        PADDLE_ENFORCE_EQ(param->numel(), sz);
+        PADDLE_ENFORCE_EQ(grad->numel(), sz);
+
+        jit::sgd_attr_t attr(1, sz, 1, sz, 1);
+        const T *lr = learning_rate->data<T>();
+        const T *param_data = param->data<T>();
+        const T *grad_data = grad->data<T>();
+        int64_t rows_idx = 0;
+        T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
+
+        auto sgd =
+            jit::Get<jit::kSgd, jit::SgdTuples<T>, platform::CPUPlace>(attr);
+        sgd(lr, param_data, grad_data, &rows_idx, out_data, &attr);
       } else if (grad_var->IsType<framework::SelectedRows>()) {
         // TODO(qijun): In Sparse SGD operator, in-place update is enforced.
         // This manual optimization brings difficulty to track data dependency.
         // It's better to find a more elegant solution.
         PADDLE_ENFORCE_EQ(param, param_out);
         const auto *grad = ctx.Input<framework::SelectedRows>("Grad");
+        auto &grad_rows = grad->rows();
 
         // for distributed training, a sparse var may be empty,
         // just skip updating.
-        if (grad->rows().size() == 0) {
+        if (grad_rows.size() == 0) {
           return;
         }
 
-        auto grad_height = grad->height();
         auto out_dims = param_out->dims();
-        PADDLE_ENFORCE_EQ(grad_height, out_dims[0]);
-
+        PADDLE_ENFORCE_EQ(grad->height(), out_dims[0]);
         auto &grad_value = grad->value();
-        auto &grad_rows = grad->rows();
-
-        size_t grad_row_numel = grad_value.numel() / grad_rows.size();
-        PADDLE_ENFORCE_EQ(static_cast<int64_t>(grad_row_numel),
-                          param_out->numel() / grad_height);
-
-        auto *grad_data = grad_value.data<T>();
-        auto *out_data = param_out->data<T>();
-        auto *lr = learning_rate->data<T>();
-        for (size_t i = 0; i < grad_rows.size(); i++) {
-          PADDLE_ENFORCE(grad_rows[i] < grad_height,
-                         "Input rows index should less than height");
-          for (size_t j = 0; j < grad_row_numel; j++) {
-            out_data[grad_rows[i] * grad_row_numel + j] -=
-                lr[0] * grad_data[i * grad_row_numel + j];
-          }
-        }
+        const T *param_data = param->data<T>();
+        const T *grad_data = grad_value.data<T>();
+        const T *lr = learning_rate->data<T>();
+        const int64_t *rows_data = grad_rows.data();
+        T *out_data = param_out->mutable_data<T>(ctx.GetPlace());
+
+        jit::sgd_attr_t attr;
+        attr.param_height = out_dims[0];
+        attr.param_width = param_out->numel() / attr.param_height;
+        attr.grad_height = grad_rows.size();  // note: it is not grad->height()
+        attr.grad_width = grad_value.numel() / attr.grad_height;
+        attr.selected_rows_size = grad_rows.size();
+        PADDLE_ENFORCE_EQ(attr.grad_width, attr.param_width);
+
+        auto sgd =
+            jit::Get<jit::kSgd, jit::SgdTuples<T>, platform::CPUPlace>(attr);
+        sgd(lr, param_data, grad_data, rows_data, out_data, &attr);
       } else {
         PADDLE_THROW("Unsupported Variable Type of Grad");
       }
diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc
index db80fda695dc7666ebe8d930427dd781449e26dd..719e0ee8a1709c89a94dcce7088622157eafc6d9 100644
--- a/paddle/fluid/operators/reader/buffered_reader.cc
+++ b/paddle/fluid/operators/reader/buffered_reader.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/operators/reader/buffered_reader.h"
+#include <memory>
 #include <vector>
 #include "paddle/fluid/framework/data_type.h"
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
index 0932211cadf30d0c464d43ca652a5c52df15747e..d3dcd1f96a986d2450c8af780a12183f7dfc66d5 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
@@ -22,6 +22,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override {
+    if (ctx->IsRuntime()) {
+      return;
+    }
     PADDLE_ENFORCE(
         ctx->HasInput("X"),
         "Input(X) of SequecceEnumerate operator should not be null.");
@@ -33,9 +36,9 @@ class SequenceEnumerateOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_EQ(
         x_dims.size(), 2,
         "Input(X) of SequenceEnumerate operator's rank should be 2.");
-    PADDLE_ENFORCE_EQ(
-        x_dims[1], 1,
-        "Input(X) of SequenceEnumerate operator's 2nd dimension should be 1.");
+    PADDLE_ENFORCE_EQ(x_dims[1], 1,
+                      "Input(X) of SequenceEnumerate operator's 2nd "
+                      "dimension should be 1.");
 
     const auto win_size = ctx->Attrs().Get<int>("win_size");
     ctx->SetOutputDim("Out", {x_dims[0], win_size});
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
index 28821e7129c1601f1214b0b56696fbf526a2123f..d5deb7582c7c00f3102ea568a716b715611212ce 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
@@ -65,6 +65,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
     auto lod0 = in_lod[0];
     auto in_len = in->numel();
     auto in_data = in->data<T>();
+    out->Resize({in_dims[0], win_size});
     auto out_data = out->mutable_data<T>(context.GetPlace());
     // Copy LoD to GPU
     const size_t* dev_in_lod_ptr = lod0.CUDAData(context.GetPlace());
@@ -72,6 +73,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
     CalcOutPut<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
                  PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
         in_data, dev_in_lod_ptr, lod0.size(), win_size, pad_value, out_data);
+    out->set_lod(in->lod());
   }
 };
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
index dc18d9b2071303377505155476b87ed029eaf986..18da69993b2ad5879dd4678ec0d4b06d7e30cb0a 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
@@ -39,6 +39,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
     // Generate enumerate sequence set
     auto lod0 = in_lod[0];
     auto in_data = in->data<T>();
+    out->Resize({in_dims[0], win_size});
     auto out_data = out->mutable_data<T>(context.GetPlace());
     for (size_t i = 0; i < lod0.size() - 1; ++i) {
       for (size_t idx = lod0[i]; idx < lod0[i + 1]; ++idx) {
@@ -49,6 +50,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
         }
       }
     }
+    out->set_lod(in->lod());
   }
 };
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu
index afc08c7b3f6596efd3b6e0b74c17aa3c9268c47d..888d1a12e6751eeb91f0af04b50cf6d5bea74162 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <algorithm>
+#include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
 
@@ -88,6 +89,49 @@ void GetOutputOffset(const framework::Vector<size_t>& x_lod,
   }
 }
 
+template <typename T>
+static int ExpandByMemoryCopy(const platform::CUDADeviceContext& context,
+                              const LoDTensor& x, LoDTensor* out,
+                              const framework::Vector<size_t>& x_lod,
+                              const framework::Vector<size_t>& ref_lod,
+                              bool do_copy) {
+  auto out_data = out->data<T>();
+  auto x_data = x.data<T>();
+
+  auto& gpu_place = boost::get<platform::CUDAPlace>(context.GetPlace());
+
+  int x_item_length = x.numel() / x.dims()[0];
+  int out_offset = 0;
+  int num_copys = 0;
+  for (size_t i = 1; i < ref_lod.size(); ++i) {
+    int repeat_num = ref_lod[i] - ref_lod[i - 1];
+    int x_start = x_lod[i - 1];
+    int x_end = x_lod[i];
+    int x_seq_len = x_end - x_start;
+    if (repeat_num > 0) {
+      if (do_copy) {
+        int out_start = out_offset;
+        if (out->lod().size() == 1) {
+          out_start = out->lod()[0][out_offset];
+        }
+        for (int j = 0; j < repeat_num; j++) {
+          for (int k = 0; k < x_seq_len; k++) {
+            memory::Copy(
+                gpu_place,
+                out_data + (out_start + j * x_seq_len + k) * x_item_length,
+                gpu_place, x_data + (x_start + k) * x_item_length,
+                sizeof(T) * x_item_length, context.stream());
+          }
+        }
+      } else {
+        num_copys += repeat_num * x_seq_len;
+      }
+    }
+    out_offset += repeat_num;
+  }
+  return num_copys;
+}
+
 template <typename T>
 struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
   void operator()(
@@ -95,22 +139,40 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
       LoDTensor* out) {
-    int x_item_length = x.numel() / x.dims()[0];
-    framework::Vector<size_t> out_offset(x_lod.size());
-    GetOutputOffset(x_lod, ref_lod, &out_offset);
-
-    int thread_x = std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
-    int thread_y = 16;
-    int thread_z = 1024 / thread_x / thread_y;
-    int block_x = static_cast<int>(ref_lod.size());
-    dim3 block_size(thread_x, thread_y, thread_z);
-    dim3 grid_size(block_x, 1);
+    int num_copys =
+        ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, false);
+    // Sometimes direct copies will be faster, this maybe need deeply analysis.
+    if (num_copys < 5) {
+      ExpandByMemoryCopy<T>(context, x, out, x_lod, ref_lod, true);
+    } else {
+      int x_item_length = x.numel() / x.dims()[0];
+      size_t x_lod_size = x_lod.size();
+      framework::Vector<size_t> out_offset(x_lod_size * 2 + ref_lod.size());
+      GetOutputOffset(x_lod, ref_lod, &out_offset);
+
+      for (size_t i = 0; i < x_lod_size; ++i) {
+        out_offset[x_lod_size + i] = x_lod[i];
+      }
+      for (size_t i = 0; i < ref_lod.size(); ++i) {
+        out_offset[2 * x_lod_size + i] = ref_lod[i];
+      }
 
-    sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>(
-        x.data<T>(), x_lod.CUDAData(context.GetPlace()),
-        ref_lod.CUDAData(context.GetPlace()),
-        out_offset.CUDAData(context.GetPlace()), x_lod.size(), x_item_length,
-        out->mutable_data<T>(context.GetPlace()));
+      const size_t* out_offset_data = out_offset.CUDAData(context.GetPlace());
+      const size_t* x_lod_data = out_offset_data + x_lod_size;
+      const size_t* ref_lod_data = out_offset_data + 2 * x_lod_size;
+
+      int thread_x =
+          std::min(32, std::max(static_cast<int>(ref_lod.size()), 16));
+      int thread_y = 16;
+      int thread_z = 1024 / thread_x / thread_y;
+      int block_x = static_cast<int>(ref_lod.size());
+      dim3 block_size(thread_x, thread_y, thread_z);
+      dim3 grid_size(block_x, 1);
+
+      sequence_expand_kernel<<<grid_size, block_size, 0, context.stream()>>>(
+          x.data<T>(), x_lod_data, ref_lod_data, out_offset_data, x_lod_size,
+          x_item_length, out->mutable_data<T>(context.GetPlace()));
+    }
   }
 };
 
diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc
index 0397c7791e1768393ff642743d2f7085b25fb551..7754d2bfebdbc81e25432641b2eb4315386f75ff 100644
--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc
@@ -46,10 +46,10 @@ class SoftmaxWithCrossEntropyOpMaker
         .SetDefault(false);
     AddAttr<bool>(
         "numeric_stable_mode",
-        "(bool, default: false), A flag to indicate whether to use more "
+        "(bool, default: true), A flag to indicate whether to use more "
         "numerically stable algorithm. This flag is only valid when "
         "soft_label is false and GPU is used.")
-        .SetDefault(false);
+        .SetDefault(true);
     AddAttr<int>(
         "ignore_index",
         "(int, default -100), Specifies a target value that is ignored and"
diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc
index c8ee13875c5ae772de3c09f97fded8f70c5698e6..640644a94690d9682a5e6b1aa788a9ebdc5d2a54 100644
--- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc
+++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc
@@ -117,11 +117,11 @@ class TeacherStudentSigmoidLossOpMaker
               "[N x 1]. The teacher student sigmoid loss.");
     AddAttr<float>(
         "soft_max_up_bound",
-        "fp32, if input > soft_max_up_bound, will be bound, default 15.0")
+        "fp32, if input > soft_max_up_bound, input will be bound, default 15.0")
         .SetDefault(15.0);
-    AddAttr<float>(
-        "soft_max_lower_bound",
-        "fp32, if input < soft_max_lower_bound, will be bound, default -15.0")
+    AddAttr<float>("soft_max_lower_bound",
+                   "fp32, if input < soft_max_lower_bound, input will be "
+                   "bound, default -15.0")
         .SetDefault(-15.0);
     AddComment(R"DOC(
 TeacherStudentSigmoidLoss Operator.
@@ -134,7 +134,7 @@ we add another label(z') to original.
         label = {-2, -1, [0, 2]}
         when z' is not exist, clk = 0 : label = -2;
         when z' is not exist, clk = 1 : label = -1;
-        when z' is exist    , clk = 0 : label = 0 + z';
+        when z' is exist , clk = 0 : label = 0 + z';
         when z' is exist    , clk = 1 : label = 1 + z';
 
 )DOC");
diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt
index 1838506c8931b2e1ff82adf6f277925dc9d53374..9220d35707b286d76ab4824e3f1080453f60bfe6 100644
--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
@@ -82,6 +82,7 @@ nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_
 cc_test(init_test SRCS init_test.cc DEPS device_context)
 
 nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
+nv_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda)
 nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
 
 cc_library(timer SRCS timer.cc)
diff --git a/paddle/fluid/platform/cudnn_desc.h b/paddle/fluid/platform/cudnn_desc.h
new file mode 100644
index 0000000000000000000000000000000000000000..1062b403f289610a6dec28dead9177d387f0d4e0
--- /dev/null
+++ b/paddle/fluid/platform/cudnn_desc.h
@@ -0,0 +1,124 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+#include "paddle/fluid/platform/cudnn_helper.h"
+
+namespace paddle {
+namespace platform {
+using framework::Tensor;
+
+template <typename T>
+cudnnDataType_t ToCudnnDataType(const T& t) {
+  auto type = framework::ToDataType(t);
+  return ToCudnnDataType(type);
+}
+
+template <>
+cudnnDataType_t ToCudnnDataType(const framework::proto::VarType::Type& t) {
+  cudnnDataType_t type = CUDNN_DATA_FLOAT;
+  switch (t) {
+    case framework::proto::VarType::FP16:
+      type = CUDNN_DATA_HALF;
+      break;
+    case framework::proto::VarType::FP32:
+      type = CUDNN_DATA_FLOAT;
+      break;
+    case framework::proto::VarType::FP64:
+      type = CUDNN_DATA_DOUBLE;
+      break;
+    default:
+      break;
+  }
+  return type;
+}
+
+class ActivationDescriptor {
+ public:
+  using T = cudnnActivationStruct;
+  struct Deleter {
+    void operator()(T* t) {
+      if (t != nullptr) {
+        PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t));
+        t = nullptr;
+      }
+    }
+  };
+  ActivationDescriptor() {
+    T* raw_ptr;
+    PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr));
+    desc_.reset(raw_ptr);
+  }
+  template <typename T>
+  void set(cudnnActivationMode_t mode, const T& coef) {
+    CUDNN_ENFORCE(dynload::cudnnSetActivationDescriptor(
+        desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast<double>(coef)));
+  }
+
+  T* desc() { return desc_.get(); }
+  T* desc() const { return desc_.get(); }
+
+ private:
+  std::unique_ptr<T, Deleter> desc_;
+};
+
+class TensorDescriptor {
+ public:
+  using T = cudnnTensorStruct;
+  struct Deleter {
+    void operator()(T* t) {
+      if (t != nullptr) {
+        PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t));
+        t = nullptr;
+      }
+    }
+  };
+  TensorDescriptor() {
+    T* raw_ptr;
+    PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
+    desc_.reset(raw_ptr);
+  }
+  T* desc() { return desc_.get(); }
+  T* desc() const { return desc_.get(); }
+  void set(const Tensor& tensor, const int groups = 1) {
+    auto dims = framework::vectorize2int(tensor.dims());
+    std::vector<int> strides(dims.size());
+    strides[dims.size() - 1] = 1;
+    for (int i = dims.size() - 2; i >= 0; i--) {
+      strides[i] = dims[i + 1] * strides[i + 1];
+    }
+    std::vector<int> dims_with_group(dims.begin(), dims.end());
+    if (groups > 1) {
+      dims_with_group[1] = dims_with_group[1] / groups;
+    }
+    PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
+        desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(),
+        dims_with_group.data(), strides.data()));
+  }
+
+ private:
+  std::unique_ptr<T, Deleter> desc_;
+};
+
+}  // namespace platform
+}  // namespace paddle
diff --git a/paddle/fluid/platform/cudnn_desc_test.cc b/paddle/fluid/platform/cudnn_desc_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a60102a54899b25c89d8c131220dde21f77bba70
--- /dev/null
+++ b/paddle/fluid/platform/cudnn_desc_test.cc
@@ -0,0 +1,41 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/platform/cudnn_desc.h"
+#include <gtest/gtest.h>
+
+namespace paddle {
+namespace platform {
+
+TEST(TensorDescriptor, Empty) {
+  ActivationDescriptor a;
+  TensorDescriptor t;
+  TensorDescriptor t1;
+  TensorDescriptor *t11 = new TensorDescriptor();
+  delete t11;
+  std::unique_ptr<TensorDescriptor> tt(new TensorDescriptor());
+}
+
+TEST(TensorDescriptor, Normal) {
+  framework::Tensor tt;
+  tt.Resize({2, 3, 4});
+  tt.mutable_data<float>(platform::CPUPlace());
+
+  TensorDescriptor desc;
+  desc.set(tt);
+  EXPECT_TRUE(desc.desc() != nullptr);
+}
+
+}  // namespace platform
+}  // namespace paddle
diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h
index 2f4f8101e4b957634d68fb0d64649ff8afba7c54..3008c166938d7db190e8f716ca925fda5ccebc25 100644
--- a/paddle/fluid/platform/dynload/cudnn.h
+++ b/paddle/fluid/platform/dynload/cudnn.h
@@ -99,6 +99,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
   __macro(cudnnDestroy);                                  \
   __macro(cudnnSetStream);                                \
   __macro(cudnnActivationForward);                        \
+  __macro(cudnnActivationBackward);                       \
   __macro(cudnnConvolutionForward);                       \
   __macro(cudnnConvolutionBackwardBias);                  \
   __macro(cudnnGetConvolutionForwardWorkspaceSize);       \
diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h
index a260cda49138580b209e647af459e9392d9f18f1..a5b846f500f3677188b170dda76c65047d628064 100644
--- a/paddle/fluid/platform/dynload/mklml.h
+++ b/paddle/fluid/platform/dynload/mklml.h
@@ -86,6 +86,8 @@ extern void* mklml_dso_handle;
   __macro(vdPowx);                  \
   __macro(vsInv);                   \
   __macro(vdInv);                   \
+  __macro(vmsErf);                  \
+  __macro(vmdErf);                  \
   __macro(MKL_Set_Num_Threads)
 
 MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h
index a4db23758b1c477114cd03dcd0e9f51296c575c6..2dcf966754cbed2670acb9c3548c23355be5503c 100644
--- a/paddle/fluid/platform/event.h
+++ b/paddle/fluid/platform/event.h
@@ -14,6 +14,9 @@ limitations under the License. */
 
 #pragma once
 #include <string>
+#ifdef PADDLE_WITH_CUDA
+#include <cuda_runtime.h>
+#endif
 
 namespace paddle {
 namespace platform {
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 4a674ca526f455314613d43847faa7e01f4d7802..4fa6774f028bef901f6e11f2d3dafe52a10a548e 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -271,7 +271,6 @@ class MKLDNNHandler {
     AppendKey(key, suffix);
   }
 
- protected:
   static void AppendKeyDims(std::string* key,
                             const mkldnn::memory::dims& dims) {
     for (unsigned int i = 0; i < dims.size(); i++) {
@@ -289,6 +288,7 @@ class MKLDNNHandler {
     key->append(s);
   }
 
+ protected:
   static std::string dims2str(const mkldnn::memory::dims& operand_dims) {
     std::string dstr = "";
     for (size_t i = 0; i < operand_dims.size(); ++i) {
@@ -302,6 +302,9 @@ class MKLDNNHandler {
   mkldnn::engine engine_;
   std::string key_;
   bool is_reusing_;
+
+ public:
+  static constexpr int MaxKeyLength = 256;
 };
 
 class TransposeMKLDNNHandler : public MKLDNNHandler {
diff --git a/paddle/fluid/pybind/imperative.h b/paddle/fluid/pybind/imperative.h
index f947b743f99d5d4994b1a87f89fd6815357d8125..8c48b2a7153c566930a074bd0bab1f054c13c2d5 100644
--- a/paddle/fluid/pybind/imperative.h
+++ b/paddle/fluid/pybind/imperative.h
@@ -33,7 +33,7 @@ class Layer : public imperative::Layer {
   }
 };
 
-class PyOpBase : public imperative::OpBase {
+class PYBIND11_HIDDEN PyOpBase : public imperative::OpBase {
  public:
   using imperative::OpBase::OpBase;  // Inherit constructors
 };
diff --git a/paddle/fluid/pybind/ir.cc b/paddle/fluid/pybind/ir.cc
index 069750e2406bcbf327591641bf624f36969acc25..68f74a8531fff0c49c8a62d12f5cde7af77faf8a 100644
--- a/paddle/fluid/pybind/ir.cc
+++ b/paddle/fluid/pybind/ir.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/pybind/ir.h"
 #include <algorithm>
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -116,7 +117,7 @@ void BindNode(py::module *m) {
       .def("is_var", &Node::IsVar)
       .def("is_ctrl_var", &Node::IsCtrlVar)
       .def("clear_inputs", [](Node &self) { self.inputs.clear(); })
-      .def("inputs_remove",
+      .def("remove_input",
            [](Node &self, int node_id) {
              auto pos = std::find_if(
                  self.inputs.begin(), self.inputs.end(),
@@ -125,7 +126,7 @@ void BindNode(py::module *m) {
                self.inputs.erase(pos);
              }
            })
-      .def("inputs_remove",
+      .def("remove_input",
            [](Node &self, Node &node) {
              auto pos =
                  std::find(self.inputs.begin(), self.inputs.end(), &node);
@@ -133,10 +134,10 @@ void BindNode(py::module *m) {
                self.inputs.erase(pos);
              }
            })
-      .def("inputs_append",
+      .def("append_input",
            [](Node &self, Node &node) { self.inputs.push_back(&node); })
       .def("clear_outputs", [](Node &self) { self.outputs.clear(); })
-      .def("outputs_remove",
+      .def("remove_output",
            [](Node &self, int node_id) {
              auto pos = std::find_if(
                  self.outputs.begin(), self.outputs.end(),
@@ -145,7 +146,7 @@ void BindNode(py::module *m) {
                self.outputs.erase(pos);
              }
            })
-      .def("outputs_remove",
+      .def("remove_output",
            [](Node &self, Node &node) {
              auto pos =
                  std::find(self.outputs.begin(), self.outputs.end(), &node);
@@ -153,7 +154,7 @@ void BindNode(py::module *m) {
                self.outputs.erase(pos);
              }
            })
-      .def("outputs_append",
+      .def("append_output",
            [](Node &self, Node &node) { self.outputs.push_back(&node); })
       .def_readwrite("inputs", &Node::inputs)
       .def_readwrite("outputs", &Node::outputs);
diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc
index 48fe445b7d01287c37bcf7d4811f687785ca78d5..e729be4a95a58510f1e0162af4216feaa400d971 100644
--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -189,8 +189,6 @@ void BindBlockDesc(pybind11::module *m) {
              return self.HasVar(name);
            },
            pybind11::return_value_policy::reference)
-      .def("_clear_block", [](pd::BlockDesc &self) { return self.Clear(); },
-           pybind11::return_value_policy::reference)
       .def("_rename_var",
            [](pd::BlockDesc &self, const pybind11::bytes &byte_name,
               const pybind11::bytes &byte_name_new) {
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index af049127aa3960cb283345e314d680a85056fbd5..6d1fc0be232ec8e43c77e64b71666fdd5af92d86 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -86,6 +86,14 @@ bool IsCompiledWithCUDA() {
 #endif
 }
 
+bool IsCompiledWithMKLDNN() {
+#ifndef PADDLE_WITH_MKLDNN
+  return false;
+#else
+  return true;
+#endif
+}
+
 bool IsCompiledWithBrpc() {
 #ifndef PADDLE_WITH_DISTRIBUTE
   return false;
@@ -169,6 +177,23 @@ PYBIND11_MODULE(core, m) {
            py::return_value_policy::take_ownership)
       .def("value", [](const imperative::VarBase &self) { return self.var_; },
            py::return_value_policy::reference)
+      .def_property("name",
+                    [](const imperative::VarBase &self) { return self.name_; },
+                    [](imperative::VarBase &self, const std::string &name) {
+                      self.name_ = name;
+                    })
+      .def_property("block",
+                    [](const imperative::VarBase &self) { return self.block_; },
+                    [](imperative::VarBase &self, framework::BlockDesc *block) {
+                      self.block_ = block;
+                    },
+                    py::return_value_policy::reference)
+      .def_property(
+          "persistable",
+          [](const imperative::VarBase &self) { return self.persistable_; },
+          [](imperative::VarBase &self, const bool persistable) {
+            self.persistable_ = persistable;
+          })
       .def_property(
           "desc",
           [](const imperative::VarBase &self) { return self.var_desc_; },
@@ -185,6 +210,10 @@ PYBIND11_MODULE(core, m) {
 
   py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
       .def(py::init<>())
+      .def("register_backward_hooks",
+           [](imperative::OpBase &self, const py::object &callable) {
+             self.RegisterBackwardHooks(callable);
+           })
       .def_property(
           "desc", [](const imperative::OpBase &self) { return self.op_desc_; },
           [](imperative::OpBase &self, framework::OpDesc *op_desc) {
@@ -193,6 +222,16 @@ PYBIND11_MODULE(core, m) {
             }
           },
           py::return_value_policy::reference)
+      .def_property("_trace_id",
+                    [](const imperative::OpBase &self) {
+                      pybind11::gil_scoped_release release;
+                      return self.trace_id_;
+                    },
+                    [](imperative::OpBase &self, int trace_id) {
+                      pybind11::gil_scoped_release release;
+                      self.trace_id_ = trace_id;
+                    },
+                    py::return_value_policy::reference)
       .def_property(
           "forward_id",
           [](const imperative::OpBase &self) { return self.forward_id_; },
@@ -405,11 +444,11 @@ PYBIND11_MODULE(core, m) {
            Set LoD of the LoDTensor according to recursive sequence length.
 
            For example, if recursive_sequence_lengths=[[2, 3]], meaning that
-           there are two sequences with length 2 and 3 respectively, the 
-           corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].  
+           there are two sequences with length 2 and 3 respectively, the
+           corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
 
            Args:
-                recursive_sequence_lengths (List[List[int]]): sequence lengths. 
+                recursive_sequence_lengths (List[List[int]]): sequence lengths.
            )DOC")
       .def("lod",
            [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
@@ -440,7 +479,7 @@ PYBIND11_MODULE(core, m) {
            Return the sequence length of the LoDTensor corresponding to LoD.
 
            Returns:
-               out (List[List[int]): the sequence lengths. 
+               out (List[List[int]): the sequence lengths.
            )DOC")
       .def("has_valid_recursive_sequence_lengths",
            [](LoDTensor &self) -> bool {
@@ -592,29 +631,29 @@ All parameter, weight, gradient are variables in Paddle.
            },
            py::arg("name"),
            R"DOC(
-           Find or create variable named :code:`name` in the current scope. 
+           Find or create variable named :code:`name` in the current scope.
 
-           If the variable named :code:`name` does not exist in the 
+           If the variable named :code:`name` does not exist in the
            current scope, the variable would be created. Otherwise,
-           return the existing variable. 
+           return the existing variable.
 
            Args:
-               name (str): the variable name.  
-          
+               name (str): the variable name.
+
            Returns:
-               out (core.Variable): the found or created variable. 
+               out (core.Variable): the found or created variable.
            )DOC",
            py::return_value_policy::reference)
       .def("find_var", &Scope::FindVar, py::arg("name"),
            R"DOC(
-           Find variable named :code:`name` in the current scope or 
+           Find variable named :code:`name` in the current scope or
            its parent scope. Return None if not found.
-        
+
            Args:
                name (str): the variable name.
-            
+
            Returns:
-               out (core.Variable|None): the found variable or None.   
+               out (core.Variable|None): the found variable or None.
            )DOC",
            py::return_value_policy::reference)
       .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); },
@@ -638,7 +677,7 @@ All parameter, weight, gradient are variables in Paddle.
         },
         R"DOC(
         Create a new scope.
-        
+
         Returns:
             out (core._Scope): the created scope.
         )DOC",
@@ -849,6 +888,7 @@ All parameter, weight, gradient are variables in Paddle.
         [](bool init_p2p) { framework::InitDevices(init_p2p); });
 
   m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
+  m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN);
   m.def("is_compiled_with_brpc", IsCompiledWithBrpc);
   m.def("is_compiled_with_dist", IsCompiledWithDIST);
 #ifdef PADDLE_WITH_CUDA
diff --git a/paddle/scripts/cpplint.py b/paddle/scripts/cpplint.py
deleted file mode 100644
index dff4339ea33b72e22104a56183e3302067dc583d..0000000000000000000000000000000000000000
--- a/paddle/scripts/cpplint.py
+++ /dev/null
@@ -1,6425 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (c) 2009 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#    * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#    * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#    * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Does google-lint on c++ files.
-
-The goal of this script is to identify places in the code that *may*
-be in non-compliance with google style.  It does not attempt to fix
-up these problems -- the point is to educate.  It does also not
-attempt to find all problems, or to ensure that everything it does
-find is legitimately a problem.
-
-In particular, we can get very confused by /* and // inside strings!
-We do a small hack, which is to ignore //'s with "'s after them on the
-same line, but it is far from perfect (in either direction).
-
-EDIT(yuyang18): Add #pragma once as include guard.
-EDIT(yuyang18): Add NOLINTNEXTLINES_ to suppress multiline lint.
-"""
-
-import codecs
-import copy
-import getopt
-import math  # for log
-import os
-import re
-import sre_compile
-import string
-import sys
-import unicodedata
-
-_USAGE = """
-Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
-                   [--counting=total|toplevel|detailed] [--root=subdir]
-                   [--linelength=digits]
-                   [--write-success=success_status_file]
-        <file> [file] ...
-
-  The style guidelines this tries to follow are those in
-    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
-
-  Every problem is given a confidence score from 1-5, with 5 meaning we are
-  certain of the problem, and 1 meaning it could be a legitimate construct.
-  This will miss some errors, and is not a substitute for a code review.
-
-  To suppress false-positive errors of a certain category, add a
-  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
-  suppresses errors of all categories on that line.
-
-  The files passed in will be linted; at least one file must be provided.
-  Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
-  extensions with the --extensions flag.
-
-  Flags:
-
-    output=vs7
-      By default, the output is formatted to ease emacs parsing.  Visual Studio
-      compatible output (vs7) may also be used.  Other formats are unsupported.
-
-    verbose=#
-      Specify a number 0-5 to restrict errors to certain verbosity levels.
-
-    filter=-x,+y,...
-      Specify a comma-separated list of category-filters to apply: only
-      error messages whose category names pass the filters will be printed.
-      (Category names are printed with the message and look like
-      "[whitespace/indent]".)  Filters are evaluated left to right.
-      "-FOO" and "FOO" means "do not print categories that start with FOO".
-      "+FOO" means "do print categories that start with FOO".
-
-      Examples: --filter=-whitespace,+whitespace/braces
-                --filter=whitespace,runtime/printf,+runtime/printf_format
-                --filter=-,+build/include_what_you_use
-
-      To see a list of all the categories used in cpplint, pass no arg:
-         --filter=
-
-    counting=total|toplevel|detailed
-      The total number of errors found is always printed. If
-      'toplevel' is provided, then the count of errors in each of
-      the top-level categories like 'build' and 'whitespace' will
-      also be printed. If 'detailed' is provided, then a count
-      is provided for each category like 'build/class'.
-
-    root=subdir
-      The root directory used for deriving header guard CPP variable.
-      By default, the header guard CPP variable is calculated as the relative
-      path to the directory that contains .git, .hg, or .svn.  When this flag
-      is specified, the relative path is calculated from the specified
-      directory. If the specified directory does not exist, this flag is
-      ignored.
-
-      Examples:
-        Assuming that src/.git exists, the header guard CPP variables for
-        src/chrome/browser/ui/browser.h are:
-
-        No flag => CHROME_BROWSER_UI_BROWSER_H_
-        --root=chrome => BROWSER_UI_BROWSER_H_
-        --root=chrome/browser => UI_BROWSER_H_
-
-    linelength=digits
-      This is the allowed line length for the project. The default value is
-      80 characters.
-
-      Examples:
-        --linelength=120
-
-    extensions=extension,extension,...
-      The allowed file extensions that cpplint will check
-
-      Examples:
-        --extensions=hpp,cpp
-
-    cpplint.py supports per-directory configurations specified in CPPLINT.cfg
-    files. CPPLINT.cfg file can contain a number of key=value pairs.
-    Currently the following options are supported:
-
-      set noparent
-      filter=+filter1,-filter2,...
-      exclude_files=regex
-      linelength=80
-
-    "set noparent" option prevents cpplint from traversing directory tree
-    upwards looking for more .cfg files in parent directories. This option
-    is usually placed in the top-level project directory.
-
-    The "filter" option is similar in function to --filter flag. It specifies
-    message filters in addition to the |_DEFAULT_FILTERS| and those specified
-    through --filter command-line flag.
-
-    "exclude_files" allows to specify a regular expression to be matched against
-    a file name. If the expression matches, the file is skipped and not run
-    through liner.
-
-    "linelength" allows to specify the allowed line length for the project.
-
-    CPPLINT.cfg has an effect on files in the same directory and all
-    sub-directories, unless overridden by a nested configuration file.
-
-      Example file:
-        filter=-build/include_order,+build/include_alpha
-        exclude_files=.*\.cc
-
-    The above example disables build/include_order warning and enables
-    build/include_alpha as well as excludes all .cc from being
-    processed by linter, in the current directory (where the .cfg
-    file is located) and all sub-directories.
-"""
-
-# We categorize each error message we print.  Here are the categories.
-# We want an explicit list so we can list them all in cpplint --filter=.
-# If you add a new error message with a new category, add it to the list
-# here!  cpplint_unittest.py should tell you if you forget to do this.
-_ERROR_CATEGORIES = [
-    'build/class',
-    'build/c++11',
-    'build/deprecated',
-    'build/endif_comment',
-    'build/explicit_make_pair',
-    'build/forward_decl',
-    'build/header_guard',
-    'build/include',
-    'build/include_alpha',
-    'build/include_order',
-    'build/include_what_you_use',
-    'build/namespaces',
-    'build/printf_format',
-    'build/storage_class',
-    'legal/copyright',
-    'readability/alt_tokens',
-    'readability/braces',
-    'readability/casting',
-    'readability/check',
-    'readability/constructors',
-    'readability/fn_size',
-    'readability/function',
-    'readability/inheritance',
-    'readability/multiline_comment',
-    'readability/multiline_string',
-    'readability/namespace',
-    'readability/nolint',
-    'readability/nul',
-    'readability/strings',
-    'readability/todo',
-    'readability/utf8',
-    'runtime/arrays',
-    'runtime/casting',
-    'runtime/explicit',
-    'runtime/int',
-    'runtime/init',
-    'runtime/invalid_increment',
-    'runtime/member_string_references',
-    'runtime/memset',
-    'runtime/indentation_namespace',
-    'runtime/operator',
-    'runtime/printf',
-    'runtime/printf_format',
-    'runtime/references',
-    'runtime/string',
-    'runtime/threadsafe_fn',
-    'runtime/vlog',
-    'whitespace/blank_line',
-    'whitespace/braces',
-    'whitespace/comma',
-    'whitespace/comments',
-    'whitespace/empty_conditional_body',
-    'whitespace/empty_loop_body',
-    'whitespace/end_of_line',
-    'whitespace/ending_newline',
-    'whitespace/forcolon',
-    'whitespace/indent',
-    'whitespace/line_length',
-    'whitespace/newline',
-    'whitespace/operators',
-    'whitespace/parens',
-    'whitespace/semicolon',
-    'whitespace/tab',
-    'whitespace/todo',
-]
-
-# These error categories are no longer enforced by cpplint, but for backwards-
-# compatibility they may still appear in NOLINT comments.
-_LEGACY_ERROR_CATEGORIES = ['readability/streams', ]
-
-# The default state of the category filter. This is overridden by the --filter=
-# flag. By default all errors are on, so only add here categories that should be
-# off by default (i.e., categories that must be enabled by the --filter= flags).
-# All entries here should start with a '-' or '+', as in the --filter= flag.
-_DEFAULT_FILTERS = ['-build/include_alpha']
-
-# We used to check for high-bit characters, but after much discussion we
-# decided those were OK, as long as they were in UTF-8 and didn't represent
-# hard-coded international strings, which belong in a separate i18n file.
-
-# C++ headers
-_CPP_HEADERS = frozenset([
-    # Legacy
-    'algobase.h',
-    'algo.h',
-    'alloc.h',
-    'builtinbuf.h',
-    'bvector.h',
-    'complex.h',
-    'defalloc.h',
-    'deque.h',
-    'editbuf.h',
-    'fstream.h',
-    'function.h',
-    'hash_map',
-    'hash_map.h',
-    'hash_set',
-    'hash_set.h',
-    'hashtable.h',
-    'heap.h',
-    'indstream.h',
-    'iomanip.h',
-    'iostream.h',
-    'istream.h',
-    'iterator.h',
-    'list.h',
-    'map.h',
-    'multimap.h',
-    'multiset.h',
-    'ostream.h',
-    'pair.h',
-    'parsestream.h',
-    'pfstream.h',
-    'procbuf.h',
-    'pthread_alloc',
-    'pthread_alloc.h',
-    'rope',
-    'rope.h',
-    'ropeimpl.h',
-    'set.h',
-    'slist',
-    'slist.h',
-    'stack.h',
-    'stdiostream.h',
-    'stl_alloc.h',
-    'stl_relops.h',
-    'streambuf.h',
-    'stream.h',
-    'strfile.h',
-    'strstream.h',
-    'tempbuf.h',
-    'tree.h',
-    'type_traits.h',
-    'vector.h',
-    # 17.6.1.2 C++ library headers
-    'algorithm',
-    'array',
-    'atomic',
-    'bitset',
-    'chrono',
-    'codecvt',
-    'complex',
-    'condition_variable',
-    'deque',
-    'exception',
-    'forward_list',
-    'fstream',
-    'functional',
-    'future',
-    'initializer_list',
-    'iomanip',
-    'ios',
-    'iosfwd',
-    'iostream',
-    'istream',
-    'iterator',
-    'limits',
-    'list',
-    'locale',
-    'map',
-    'memory',
-    'mutex',
-    'new',
-    'numeric',
-    'ostream',
-    'queue',
-    'random',
-    'ratio',
-    'regex',
-    'set',
-    'sstream',
-    'stack',
-    'stdexcept',
-    'streambuf',
-    'string',
-    'strstream',
-    'system_error',
-    'thread',
-    'tuple',
-    'typeindex',
-    'typeinfo',
-    'type_traits',
-    'unordered_map',
-    'unordered_set',
-    'utility',
-    'valarray',
-    'vector',
-    # 17.6.1.2 C++ headers for C library facilities
-    'cassert',
-    'ccomplex',
-    'cctype',
-    'cerrno',
-    'cfenv',
-    'cfloat',
-    'cinttypes',
-    'ciso646',
-    'climits',
-    'clocale',
-    'cmath',
-    'csetjmp',
-    'csignal',
-    'cstdalign',
-    'cstdarg',
-    'cstdbool',
-    'cstddef',
-    'cstdint',
-    'cstdio',
-    'cstdlib',
-    'cstring',
-    'ctgmath',
-    'ctime',
-    'cuchar',
-    'cwchar',
-    'cwctype',
-])
-
-# These headers are excluded from [build/include] and [build/include_order]
-# checks:
-# - Anything not following google file name conventions (containing an
-#   uppercase character, such as Python.h or nsStringAPI.h, for example).
-# - Lua headers.
-_THIRD_PARTY_HEADERS_PATTERN = re.compile(
-    r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
-
-# Assertion macros.  These are defined in base/logging.h and
-# testing/base/gunit.h.  Note that the _M versions need to come first
-# for substring matching to work.
-_CHECK_MACROS = [
-    'DCHECK',
-    'CHECK',
-    'EXPECT_TRUE_M',
-    'EXPECT_TRUE',
-    'ASSERT_TRUE_M',
-    'ASSERT_TRUE',
-    'EXPECT_FALSE_M',
-    'EXPECT_FALSE',
-    'ASSERT_FALSE_M',
-    'ASSERT_FALSE',
-]
-
-# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
-_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
-
-for op, replacement in [('==', 'EQ'), ('!=', 'NE'), ('>=', 'GE'), ('>', 'GT'),
-                        ('<=', 'LE'), ('<', 'LT')]:
-    _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
-    _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
-    _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
-    _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
-    _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
-    _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
-
-for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), ('>=', 'LT'),
-                            ('>', 'LE'), ('<=', 'GT'), ('<', 'GE')]:
-    _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
-    _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
-    _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
-    _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
-
-# Alternative tokens and their replacements.  For full list, see section 2.5
-# Alternative tokens [lex.digraph] in the C++ standard.
-#
-# Digraphs (such as '%:') are not included here since it's a mess to
-# match those on a word boundary.
-_ALT_TOKEN_REPLACEMENT = {
-    'and': '&&',
-    'bitor': '|',
-    'or': '||',
-    'xor': '^',
-    'compl': '~',
-    'bitand': '&',
-    'and_eq': '&=',
-    'or_eq': '|=',
-    'xor_eq': '^=',
-    'not': '!',
-    'not_eq': '!='
-}
-
-# Compile regular expression that matches all the above keywords.  The "[ =()]"
-# bit is meant to avoid matching these keywords outside of boolean expressions.
-#
-# False positives include C-style multi-line comments and multi-line strings
-# but those have always been troublesome for cpplint.
-_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(r'[ =()](' + ('|'.join(
-    _ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
-
-# These constants define types of headers for use with
-# _IncludeState.CheckNextIncludeOrder().
-_C_SYS_HEADER = 1
-_CPP_SYS_HEADER = 2
-_LIKELY_MY_HEADER = 3
-_POSSIBLE_MY_HEADER = 4
-_OTHER_HEADER = 5
-
-# These constants define the current inline assembly state
-_NO_ASM = 0  # Outside of inline assembly block
-_INSIDE_ASM = 1  # Inside inline assembly block
-_END_ASM = 2  # Last line of inline assembly block
-_BLOCK_ASM = 3  # The whole block is an inline assembly block
-
-# Match start of assembly blocks
-_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
-                        r'(?:\s+(volatile|__volatile__))?'
-                        r'\s*[{(]')
-
-_regexp_compile_cache = {}
-
-# {str, set(int)}: a map from error categories to sets of linenumbers
-# on which those errors are expected and should be suppressed.
-_error_suppressions = {}
-
-# The root directory used for deriving header guard CPP variable.
-# This is set by --root flag.
-_root = None
-
-# The allowed line length of files.
-# This is set by --linelength flag.
-_line_length = 80
-
-# The allowed extensions for file names
-# This is set by --extensions flag.
-_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
-
-_write_success = None
-
-
-def ParseNolintSuppressions(filename, raw_line, linenum, error):
-    """Updates the global list of error-suppressions.
-
-  Parses any NOLINT comments on the current line, updating the global
-  error_suppressions store.  Reports an error if the NOLINT comment
-  was malformed.
-
-  Args:
-    filename: str, the name of the input file.
-    raw_line: str, the line of input text, with comments.
-    linenum: int, the number of the current line.
-    error: function, an error handler.
-  """
-    matched = Search(r'\bNOLINT(NEXTLINE(S_\d+)?)?\b(\([^)]+\))?', raw_line)
-    if matched:
-        if matched.group(1):
-            lines = matched.group(2)
-            if lines:
-                lines = int(lines[2:])
-                suppressed_line = [linenum + i for i in xrange(lines)]
-            else:
-                suppressed_line = linenum + 1
-        else:
-            suppressed_line = linenum
-        category = matched.group(3)
-        if category in (None, '(*)'):  # => "suppress all"
-            if isinstance(suppressed_line, int):
-                _error_suppressions.setdefault(None, set()).add(suppressed_line)
-            else:
-                for _line in suppressed_line:
-                    _error_suppressions.setdefault(None, set()).add(_line)
-        else:
-            if category.startswith('(') and category.endswith(')'):
-                category = category[1:-1]
-                if category in _ERROR_CATEGORIES:
-                    if isinstance(suppressed_line, int):
-                        _error_suppressions.setdefault(
-                            category, set()).add(suppressed_line)
-                    else:
-                        for _line in suppressed_line:
-                            _error_suppressions.setdefault(category,
-                                                           set()).add(_line)
-                elif category not in _LEGACY_ERROR_CATEGORIES:
-                    error(filename, linenum, 'readability/nolint', 5,
-                          'Unknown NOLINT error category: %s' % category)
-
-
-def ResetNolintSuppressions():
-    """Resets the set of NOLINT suppressions to empty."""
-    _error_suppressions.clear()
-
-
-def IsErrorSuppressedByNolint(category, linenum):
-    """Returns true if the specified error category is suppressed on this line.
-
-  Consults the global error_suppressions map populated by
-  ParseNolintSuppressions/ResetNolintSuppressions.
-
-  Args:
-    category: str, the category of the error.
-    linenum: int, the current line number.
-  Returns:
-    bool, True iff the error should be suppressed due to a NOLINT comment.
-  """
-    return (linenum in _error_suppressions.get(category, set()) or
-            linenum in _error_suppressions.get(None, set()))
-
-
-def Match(pattern, s):
-    """Matches the string with the pattern, caching the compiled regexp."""
-    # The regexp compilation caching is inlined in both Match and Search for
-    # performance reasons; factoring it out into a separate function turns out
-    # to be noticeably expensive.
-    if pattern not in _regexp_compile_cache:
-        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
-    return _regexp_compile_cache[pattern].match(s)
-
-
-def ReplaceAll(pattern, rep, s):
-    """Replaces instances of pattern in a string with a replacement.
-
-  The compiled regex is kept in a cache shared by Match and Search.
-
-  Args:
-    pattern: regex pattern
-    rep: replacement text
-    s: search string
-
-  Returns:
-    string with replacements made (or original string if no replacements)
-  """
-    if pattern not in _regexp_compile_cache:
-        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
-    return _regexp_compile_cache[pattern].sub(rep, s)
-
-
-def Search(pattern, s):
-    """Searches the string for the pattern, caching the compiled regexp."""
-    if pattern not in _regexp_compile_cache:
-        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
-    return _regexp_compile_cache[pattern].search(s)
-
-
-class _IncludeState(object):
-    """Tracks line numbers for includes, and the order in which includes appear.
-
-  include_list contains list of lists of (header, line number) pairs.
-  It's a lists of lists rather than just one flat list to make it
-  easier to update across preprocessor boundaries.
-
-  Call CheckNextIncludeOrder() once for each header in the file, passing
-  in the type constants defined above. Calls in an illegal order will
-  raise an _IncludeError with an appropriate error message.
-
-  """
-    # self._section will move monotonically through this set. If it ever
-    # needs to move backwards, CheckNextIncludeOrder will raise an error.
-    _INITIAL_SECTION = 0
-    _MY_H_SECTION = 1
-    _C_SECTION = 2
-    _CPP_SECTION = 3
-    _OTHER_H_SECTION = 4
-
-    _TYPE_NAMES = {
-        _C_SYS_HEADER: 'C system header',
-        _CPP_SYS_HEADER: 'C++ system header',
-        _LIKELY_MY_HEADER: 'header this file implements',
-        _POSSIBLE_MY_HEADER: 'header this file may implement',
-        _OTHER_HEADER: 'other header',
-    }
-    _SECTION_NAMES = {
-        _INITIAL_SECTION: "... nothing. (This can't be an error.)",
-        _MY_H_SECTION: 'a header this file implements',
-        _C_SECTION: 'C system header',
-        _CPP_SECTION: 'C++ system header',
-        _OTHER_H_SECTION: 'other header',
-    }
-
-    def __init__(self):
-        self.include_list = [[]]
-        self.ResetSection('')
-
-    def FindHeader(self, header):
-        """Check if a header has already been included.
-
-    Args:
-      header: header to check.
-    Returns:
-      Line number of previous occurrence, or -1 if the header has not
-      been seen before.
-    """
-        for section_list in self.include_list:
-            for f in section_list:
-                if f[0] == header:
-                    return f[1]
-        return -1
-
-    def ResetSection(self, directive):
-        """Reset section checking for preprocessor directive.
-
-    Args:
-      directive: preprocessor directive (e.g. "if", "else").
-    """
-        # The name of the current section.
-        self._section = self._INITIAL_SECTION
-        # The path of last found header.
-        self._last_header = ''
-
-        # Update list of includes.  Note that we never pop from the
-        # include list.
-        if directive in ('if', 'ifdef', 'ifndef'):
-            self.include_list.append([])
-        elif directive in ('else', 'elif'):
-            self.include_list[-1] = []
-
-    def SetLastHeader(self, header_path):
-        self._last_header = header_path
-
-    def CanonicalizeAlphabeticalOrder(self, header_path):
-        """Returns a path canonicalized for alphabetical comparison.
-
-    - replaces "-" with "_" so they both cmp the same.
-    - removes '-inl' since we don't require them to be after the main header.
-    - lowercase everything, just in case.
-
-    Args:
-      header_path: Path to be canonicalized.
-
-    Returns:
-      Canonicalized path.
-    """
-        return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
-
-    def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
-        """Check if a header is in alphabetical order with the previous header.
-
-    Args:
-      clean_lines: A CleansedLines instance containing the file.
-      linenum: The number of the line to check.
-      header_path: Canonicalized header to be checked.
-
-    Returns:
-      Returns true if the header is in alphabetical order.
-    """
-        # If previous section is different from current section, _last_header will
-        # be reset to empty string, so it's always less than current header.
-        #
-        # If previous line was a blank line, assume that the headers are
-        # intentionally sorted the way they are.
-        if (self._last_header > header_path and
-                Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
-            return False
-        return True
-
-    def CheckNextIncludeOrder(self, header_type):
-        """Returns a non-empty error message if the next header is out of order.
-
-    This function also updates the internal state to be ready to check
-    the next include.
-
-    Args:
-      header_type: One of the _XXX_HEADER constants defined above.
-
-    Returns:
-      The empty string if the header is in the right order, or an
-      error message describing what's wrong.
-
-    """
-        error_message = ('Found %s after %s' % (
-            self._TYPE_NAMES[header_type], self._SECTION_NAMES[self._section]))
-
-        last_section = self._section
-
-        if header_type == _C_SYS_HEADER:
-            if self._section <= self._C_SECTION:
-                self._section = self._C_SECTION
-            else:
-                self._last_header = ''
-                return error_message
-        elif header_type == _CPP_SYS_HEADER:
-            if self._section <= self._CPP_SECTION:
-                self._section = self._CPP_SECTION
-            else:
-                self._last_header = ''
-                return error_message
-        elif header_type == _LIKELY_MY_HEADER:
-            if self._section <= self._MY_H_SECTION:
-                self._section = self._MY_H_SECTION
-            else:
-                self._section = self._OTHER_H_SECTION
-        elif header_type == _POSSIBLE_MY_HEADER:
-            if self._section <= self._MY_H_SECTION:
-                self._section = self._MY_H_SECTION
-            else:
-                # This will always be the fallback because we're not sure
-                # enough that the header is associated with this file.
-                self._section = self._OTHER_H_SECTION
-        else:
-            assert header_type == _OTHER_HEADER
-            self._section = self._OTHER_H_SECTION
-
-        if last_section != self._section:
-            self._last_header = ''
-
-        return ''
-
-
-class _CppLintState(object):
-    """Maintains module-wide state.."""
-
-    def __init__(self):
-        self.verbose_level = 1  # global setting.
-        self.error_count = 0  # global count of reported errors
-        # filters to apply when emitting error messages
-        self.filters = _DEFAULT_FILTERS[:]
-        # backup of filter list. Used to restore the state after each file.
-        self._filters_backup = self.filters[:]
-        self.counting = 'total'  # In what way are we counting errors?
-        self.errors_by_category = {}  # string to int dict storing error counts
-
-        # output format:
-        # "emacs" - format that emacs can parse (default)
-        # "vs7" - format that Microsoft Visual Studio 7 can parse
-        self.output_format = 'emacs'
-
-    def SetOutputFormat(self, output_format):
-        """Sets the output format for errors."""
-        self.output_format = output_format
-
-    def SetVerboseLevel(self, level):
-        """Sets the module's verbosity, and returns the previous setting."""
-        last_verbose_level = self.verbose_level
-        self.verbose_level = level
-        return last_verbose_level
-
-    def SetCountingStyle(self, counting_style):
-        """Sets the module's counting options."""
-        self.counting = counting_style
-
-    def SetFilters(self, filters):
-        """Sets the error-message filters.
-
-    These filters are applied when deciding whether to emit a given
-    error message.
-
-    Args:
-      filters: A string of comma-separated filters (eg "+whitespace/indent").
-               Each filter should start with + or -; else we die.
-
-    Raises:
-      ValueError: The comma-separated filters did not all start with '+' or '-'.
-                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
-    """
-        # Default filters always have less priority than the flag ones.
-        self.filters = _DEFAULT_FILTERS[:]
-        self.AddFilters(filters)
-
-    def AddFilters(self, filters):
-        """ Adds more filters to the existing list of error-message filters. """
-        for filt in filters.split(','):
-            clean_filt = filt.strip()
-            if clean_filt:
-                self.filters.append(clean_filt)
-        for filt in self.filters:
-            if not (filt.startswith('+') or filt.startswith('-')):
-                raise ValueError(
-                    'Every filter in --filters must start with + or -'
-                    ' (%s does not)' % filt)
-
-    def BackupFilters(self):
-        """ Saves the current filter list to backup storage."""
-        self._filters_backup = self.filters[:]
-
-    def RestoreFilters(self):
-        """ Restores filters previously backed up."""
-        self.filters = self._filters_backup[:]
-
-    def ResetErrorCounts(self):
-        """Sets the module's error statistic back to zero."""
-        self.error_count = 0
-        self.errors_by_category = {}
-
-    def IncrementErrorCount(self, category):
-        """Bumps the module's error statistic."""
-        self.error_count += 1
-        if self.counting in ('toplevel', 'detailed'):
-            if self.counting != 'detailed':
-                category = category.split('/')[0]
-            if category not in self.errors_by_category:
-                self.errors_by_category[category] = 0
-            self.errors_by_category[category] += 1
-
-    def PrintErrorCounts(self):
-        """Print a summary of errors by category, and the total."""
-        for category, count in self.errors_by_category.iteritems():
-            sys.stdout.write('Category \'%s\' errors found: %d\n' %
-                             (category, count))
-        sys.stdout.write('Total errors found: %d\n' % self.error_count)
-
-
-_cpplint_state = _CppLintState()
-
-
-def _OutputFormat():
-    """Gets the module's output format."""
-    return _cpplint_state.output_format
-
-
-def _SetOutputFormat(output_format):
-    """Sets the module's output format."""
-    _cpplint_state.SetOutputFormat(output_format)
-
-
-def _VerboseLevel():
-    """Returns the module's verbosity setting."""
-    return _cpplint_state.verbose_level
-
-
-def _SetVerboseLevel(level):
-    """Sets the module's verbosity, and returns the previous setting."""
-    return _cpplint_state.SetVerboseLevel(level)
-
-
-def _SetCountingStyle(level):
-    """Sets the module's counting options."""
-    _cpplint_state.SetCountingStyle(level)
-
-
-def _Filters():
-    """Returns the module's list of output filters, as a list."""
-    return _cpplint_state.filters
-
-
-def _SetFilters(filters):
-    """Sets the module's error-message filters.
-
-  These filters are applied when deciding whether to emit a given
-  error message.
-
-  Args:
-    filters: A string of comma-separated filters (eg "whitespace/indent").
-             Each filter should start with + or -; else we die.
-  """
-    _cpplint_state.SetFilters(filters)
-
-
-def _AddFilters(filters):
-    """Adds more filter overrides.
-
-  Unlike _SetFilters, this function does not reset the current list of filters
-  available.
-
-  Args:
-    filters: A string of comma-separated filters (eg "whitespace/indent").
-             Each filter should start with + or -; else we die.
-  """
-    _cpplint_state.AddFilters(filters)
-
-
-def _BackupFilters():
-    """ Saves the current filter list to backup storage."""
-    _cpplint_state.BackupFilters()
-
-
-def _RestoreFilters():
-    """ Restores filters previously backed up."""
-    _cpplint_state.RestoreFilters()
-
-
-class _FunctionState(object):
-    """Tracks current function name and the number of lines in its body."""
-
-    _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
-    _TEST_TRIGGER = 400  # about 50% more than _NORMAL_TRIGGER.
-
-    def __init__(self):
-        self.in_a_function = False
-        self.lines_in_function = 0
-        self.current_function = ''
-
-    def Begin(self, function_name):
-        """Start analyzing function body.
-
-    Args:
-      function_name: The name of the function being tracked.
-    """
-        self.in_a_function = True
-        self.lines_in_function = 0
-        self.current_function = function_name
-
-    def Count(self):
-        """Count line in current function body."""
-        if self.in_a_function:
-            self.lines_in_function += 1
-
-    def Check(self, error, filename, linenum):
-        """Report if too many lines in function body.
-
-    Args:
-      error: The function to call with any errors found.
-      filename: The name of the current file.
-      linenum: The number of the line to check.
-    """
-        if Match(r'T(EST|est)', self.current_function):
-            base_trigger = self._TEST_TRIGGER
-        else:
-            base_trigger = self._NORMAL_TRIGGER
-        trigger = base_trigger * 2**_VerboseLevel()
-
-        if self.lines_in_function > trigger:
-            error_level = int(
-                math.log(self.lines_in_function / base_trigger, 2))
-            # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
-            if error_level > 5:
-                error_level = 5
-            error(filename, linenum, 'readability/fn_size', error_level,
-                  'Small and focused functions are preferred:'
-                  ' %s has %d non-comment lines'
-                  ' (error triggered by exceeding %d lines).' % (
-                      self.current_function, self.lines_in_function, trigger))
-
-    def End(self):
-        """Stop analyzing function body."""
-        self.in_a_function = False
-
-
-class _IncludeError(Exception):
-    """Indicates a problem with the include order in a file."""
-    pass
-
-
-class FileInfo(object):
-    """Provides utility functions for filenames.
-
-  FileInfo provides easy access to the components of a file's path
-  relative to the project root.
-  """
-
-    def __init__(self, filename):
-        self._filename = filename
-
-    def FullName(self):
-        """Make Windows paths like Unix."""
-        return os.path.abspath(self._filename).replace('\\', '/')
-
-    def RepositoryName(self):
-        """FullName after removing the local path to the repository.
-
-    If we have a real absolute path name here we can try to do something smart:
-    detecting the root of the checkout and truncating /path/to/checkout from
-    the name so that we get header guards that don't include things like
-    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
-    people on different computers who have checked the source out to different
-    locations won't see bogus errors.
-    """
-        fullname = self.FullName()
-
-        if os.path.exists(fullname):
-            project_dir = os.path.dirname(fullname)
-
-            if os.path.exists(os.path.join(project_dir, ".svn")):
-                # If there's a .svn file in the current directory, we recursively look
-                # up the directory tree for the top of the SVN checkout
-                root_dir = project_dir
-                one_up_dir = os.path.dirname(root_dir)
-                while os.path.exists(os.path.join(one_up_dir, ".svn")):
-                    root_dir = os.path.dirname(root_dir)
-                    one_up_dir = os.path.dirname(one_up_dir)
-
-                prefix = os.path.commonprefix([root_dir, project_dir])
-                return fullname[len(prefix) + 1:]
-
-            # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
-            # searching up from the current path.
-            root_dir = os.path.dirname(fullname)
-            while (root_dir != os.path.dirname(root_dir) and
-                   not os.path.exists(os.path.join(root_dir, ".git")) and
-                   not os.path.exists(os.path.join(root_dir, ".hg")) and
-                   not os.path.exists(os.path.join(root_dir, ".svn"))):
-                root_dir = os.path.dirname(root_dir)
-
-            if (os.path.exists(os.path.join(root_dir, ".git")) or
-                    os.path.exists(os.path.join(root_dir, ".hg")) or
-                    os.path.exists(os.path.join(root_dir, ".svn"))):
-                prefix = os.path.commonprefix([root_dir, project_dir])
-                return fullname[len(prefix) + 1:]
-
-        # Don't know what to do; header guard warnings may be wrong...
-        return fullname
-
-    def Split(self):
-        """Splits the file into the directory, basename, and extension.
-
-    For 'chrome/browser/browser.cc', Split() would
-    return ('chrome/browser', 'browser', '.cc')
-
-    Returns:
-      A tuple of (directory, basename, extension).
-    """
-
-        googlename = self.RepositoryName()
-        project, rest = os.path.split(googlename)
-        return (project, ) + os.path.splitext(rest)
-
-    def BaseName(self):
-        """File base name - text after the final slash, before the final period."""
-        return self.Split()[1]
-
-    def Extension(self):
-        """File extension - text following the final period."""
-        return self.Split()[2]
-
-    def NoExtension(self):
-        """File has no source file extension."""
-        return '/'.join(self.Split()[0:2])
-
-    def IsSource(self):
-        """File has a source file extension."""
-        return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
-
-
-def _ShouldPrintError(category, confidence, linenum):
-    """If confidence >= verbose, category passes filter and is not suppressed."""
-
-    # There are three ways we might decide not to print an error message:
-    # a "NOLINT(category)" comment appears in the source,
-    # the verbosity level isn't high enough, or the filters filter it out.
-    if IsErrorSuppressedByNolint(category, linenum):
-        return False
-
-    if confidence < _cpplint_state.verbose_level:
-        return False
-
-    is_filtered = False
-    for one_filter in _Filters():
-        if one_filter.startswith('-'):
-            if category.startswith(one_filter[1:]):
-                is_filtered = True
-        elif one_filter.startswith('+'):
-            if category.startswith(one_filter[1:]):
-                is_filtered = False
-        else:
-            assert False  # should have been checked for in SetFilter.
-    if is_filtered:
-        return False
-
-    return True
-
-
-def Error(filename, linenum, category, confidence, message):
-    """Logs the fact we've found a lint error.
-
-  We log where the error was found, and also our confidence in the error,
-  that is, how certain we are this is a legitimate style regression, and
-  not a misidentification or a use that's sometimes justified.
-
-  False positives can be suppressed by the use of
-  "cpplint(category)"  comments on the offending line.  These are
-  parsed into _error_suppressions.
-
-  Args:
-    filename: The name of the file containing the error.
-    linenum: The number of the line containing the error.
-    category: A string used to describe the "category" this bug
-      falls under: "whitespace", say, or "runtime".  Categories
-      may have a hierarchy separated by slashes: "whitespace/indent".
-    confidence: A number from 1-5 representing a confidence score for
-      the error, with 5 meaning that we are certain of the problem,
-      and 1 meaning that it could be a legitimate construct.
-    message: The error message.
-  """
-    if _ShouldPrintError(category, confidence, linenum):
-        _cpplint_state.IncrementErrorCount(category)
-        if _cpplint_state.output_format == 'vs7':
-            sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' %
-                             (filename, linenum, message, category, confidence))
-        elif _cpplint_state.output_format == 'eclipse':
-            sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' %
-                             (filename, linenum, message, category, confidence))
-        else:
-            sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' %
-                             (filename, linenum, message, category, confidence))
-
-
-# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
-_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
-    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
-# Match a single C style comment on the same line.
-_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
-# Matches multi-line C style comments.
-# This RE is a little bit more complicated than one might expect, because we
-# have to take care of space removals tools so we can handle comments inside
-# statements better.
-# The current rule is: We only clear spaces from both sides when we're at the
-# end of the line. Otherwise, we try to remove spaces from the right side,
-# if this doesn't work we try on left side but only if there's a non-character
-# on the right.
-_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
-    r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + _RE_PATTERN_C_COMMENTS +
-    r'\s+|' + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
-    _RE_PATTERN_C_COMMENTS + r')')
-
-
-def IsCppString(line):
-    """Does line terminate so, that the next symbol is in string constant.
-
-  This function does not consider single-line nor multi-line comments.
-
-  Args:
-    line: is a partial line of code starting from the 0..n.
-
-  Returns:
-    True, if next character appended to 'line' is inside a
-    string constant.
-  """
-
-    line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
-    return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
-
-
-def CleanseRawStrings(raw_lines):
-    """Removes C++11 raw strings from lines.
-
-    Before:
-      static const char kData[] = R"(
-          multi-line string
-          )";
-
-    After:
-      static const char kData[] = ""
-          (replaced by blank line)
-          "";
-
-  Args:
-    raw_lines: list of raw lines.
-
-  Returns:
-    list of lines with C++11 raw strings replaced by empty strings.
-  """
-
-    delimiter = None
-    lines_without_raw_strings = []
-    for line in raw_lines:
-        if delimiter:
-            # Inside a raw string, look for the end
-            end = line.find(delimiter)
-            if end >= 0:
-                # Found the end of the string, match leading space for this
-                # line and resume copying the original lines, and also insert
-                # a "" on the last line.
-                leading_space = Match(r'^(\s*)\S', line)
-                line = leading_space.group(1) + '""' + line[end + len(
-                    delimiter):]
-                delimiter = None
-            else:
-                # Haven't found the end yet, append a blank line.
-                line = '""'
-
-        # Look for beginning of a raw string, and replace them with
-        # empty strings.  This is done in a loop to handle multiple raw
-        # strings on the same line.
-        while delimiter is None:
-            # Look for beginning of a raw string.
-            # See 2.14.15 [lex.string] for syntax.
-            matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$',
-                            line)
-            if matched:
-                delimiter = ')' + matched.group(2) + '"'
-
-                end = matched.group(3).find(delimiter)
-                if end >= 0:
-                    # Raw string ended on same line
-                    line = (matched.group(1) + '""' +
-                            matched.group(3)[end + len(delimiter):])
-                    delimiter = None
-                else:
-                    # Start of a multi-line raw string
-                    line = matched.group(1) + '""'
-            else:
-                break
-
-        lines_without_raw_strings.append(line)
-
-    # TODO(unknown): if delimiter is not None here, we might want to
-    # emit a warning for unterminated string.
-    return lines_without_raw_strings
-
-
-def FindNextMultiLineCommentStart(lines, lineix):
-    """Find the beginning marker for a multiline comment."""
-    while lineix < len(lines):
-        if lines[lineix].strip().startswith('/*'):
-            # Only return this marker if the comment goes beyond this line
-            if lines[lineix].strip().find('*/', 2) < 0:
-                return lineix
-        lineix += 1
-    return len(lines)
-
-
-def FindNextMultiLineCommentEnd(lines, lineix):
-    """We are inside a comment, find the end marker."""
-    while lineix < len(lines):
-        if lines[lineix].strip().endswith('*/'):
-            return lineix
-        lineix += 1
-    return len(lines)
-
-
-def RemoveMultiLineCommentsFromRange(lines, begin, end):
-    """Clears a range of lines for multi-line comments."""
-    # Having // dummy comments makes the lines non-empty, so we will not get
-    # unnecessary blank line warnings later in the code.
-    for i in range(begin, end):
-        lines[i] = '/**/'
-
-
-def RemoveMultiLineComments(filename, lines, error):
-    """Removes multiline (c-style) comments from lines."""
-    lineix = 0
-    while lineix < len(lines):
-        lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
-        if lineix_begin >= len(lines):
-            return
-        lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
-        if lineix_end >= len(lines):
-            error(filename, lineix_begin + 1, 'readability/multiline_comment',
-                  5, 'Could not find end of multi-line comment')
-            return
-        RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
-        lineix = lineix_end + 1
-
-
-def CleanseComments(line):
-    """Removes //-comments and single-line C-style /* */ comments.
-
-  Args:
-    line: A line of C++ source.
-
-  Returns:
-    The line with single-line comments removed.
-  """
-    commentpos = line.find('//')
-    if commentpos != -1 and not IsCppString(line[:commentpos]):
-        line = line[:commentpos].rstrip()
-    # get rid of /* ... */
-    return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
-
-
-class CleansedLines(object):
-    """Holds 4 copies of all lines with different preprocessing applied to them.
-
-  1) elided member contains lines without strings and comments.
-  2) lines member contains lines without comments.
-  3) raw_lines member contains all the lines without processing.
-  4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
-     strings removed.
-  All these members are of <type 'list'>, and of the same length.
-  """
-
-    def __init__(self, lines):
-        self.elided = []
-        self.lines = []
-        self.raw_lines = lines
-        self.num_lines = len(lines)
-        self.lines_without_raw_strings = CleanseRawStrings(lines)
-        for linenum in range(len(self.lines_without_raw_strings)):
-            self.lines.append(
-                CleanseComments(self.lines_without_raw_strings[linenum]))
-            elided = self._CollapseStrings(self.lines_without_raw_strings[
-                linenum])
-            self.elided.append(CleanseComments(elided))
-
-    def NumLines(self):
-        """Returns the number of lines represented."""
-        return self.num_lines
-
-    @staticmethod
-    def _CollapseStrings(elided):
-        """Collapses strings and chars on a line to simple "" or '' blocks.
-
-    We nix strings first so we're not fooled by text like '"http://"'
-
-    Args:
-      elided: The line being processed.
-
-    Returns:
-      The line with collapsed strings.
-    """
-        if _RE_PATTERN_INCLUDE.match(elided):
-            return elided
-
-        # Remove escaped characters first to make quote/single quote collapsing
-        # basic.  Things that look like escaped characters shouldn't occur
-        # outside of strings and chars.
-        elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
-
-        # Replace quoted strings and digit separators.  Both single quotes
-        # and double quotes are processed in the same loop, otherwise
-        # nested quotes wouldn't work.
-        collapsed = ''
-        while True:
-            # Find the first quote character
-            match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
-            if not match:
-                collapsed += elided
-                break
-            head, quote, tail = match.groups()
-
-            if quote == '"':
-                # Collapse double quoted strings
-                second_quote = tail.find('"')
-                if second_quote >= 0:
-                    collapsed += head + '""'
-                    elided = tail[second_quote + 1:]
-                else:
-                    # Unmatched double quote, don't bother processing the rest
-                    # of the line since this is probably a multiline string.
-                    collapsed += elided
-                    break
-            else:
-                # Found single quote, check nearby text to eliminate digit separators.
-                #
-                # There is no special handling for floating point here, because
-                # the integer/fractional/exponent parts would all be parsed
-                # correctly as long as there are digits on both sides of the
-                # separator.  So we are fine as long as we don't see something
-                # like "0.'3" (gcc 4.9.0 will not allow this literal).
-                if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
-                    match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$',
-                                          "'" + tail)
-                    collapsed += head + match_literal.group(1).replace("'", '')
-                    elided = match_literal.group(2)
-                else:
-                    second_quote = tail.find('\'')
-                    if second_quote >= 0:
-                        collapsed += head + "''"
-                        elided = tail[second_quote + 1:]
-                    else:
-                        # Unmatched single quote
-                        collapsed += elided
-                        break
-
-        return collapsed
-
-
-def FindEndOfExpressionInLine(line, startpos, stack):
-    """Find the position just after the end of current parenthesized expression.
-
-  Args:
-    line: a CleansedLines line.
-    startpos: start searching at this position.
-    stack: nesting stack at startpos.
-
-  Returns:
-    On finding matching end: (index just after matching end, None)
-    On finding an unclosed expression: (-1, None)
-    Otherwise: (-1, new stack at end of this line)
-  """
-    for i in xrange(startpos, len(line)):
-        char = line[i]
-        if char in '([{':
-            # Found start of parenthesized expression, push to expression stack
-            stack.append(char)
-        elif char == '<':
-            # Found potential start of template argument list
-            if i > 0 and line[i - 1] == '<':
-                # Left shift operator
-                if stack and stack[-1] == '<':
-                    stack.pop()
-                    if not stack:
-                        return (-1, None)
-            elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
-                # operator<, don't add to stack
-                continue
-            else:
-                # Tentative start of template argument list
-                stack.append('<')
-        elif char in ')]}':
-            # Found end of parenthesized expression.
-            #
-            # If we are currently expecting a matching '>', the pending '<'
-            # must have been an operator.  Remove them from expression stack.
-            while stack and stack[-1] == '<':
-                stack.pop()
-            if not stack:
-                return (-1, None)
-            if ((stack[-1] == '(' and char == ')') or
-                (stack[-1] == '[' and char == ']') or
-                (stack[-1] == '{' and char == '}')):
-                stack.pop()
-                if not stack:
-                    return (i + 1, None)
-            else:
-                # Mismatched parentheses
-                return (-1, None)
-        elif char == '>':
-            # Found potential end of template argument list.
-
-            # Ignore "->" and operator functions
-            if (i > 0 and (line[i - 1] == '-' or Search(r'\boperator\s*$',
-                                                        line[0:i - 1]))):
-                continue
-
-            # Pop the stack if there is a matching '<'.  Otherwise, ignore
-            # this '>' since it must be an operator.
-            if stack:
-                if stack[-1] == '<':
-                    stack.pop()
-                    if not stack:
-                        return (i + 1, None)
-        elif char == ';':
-            # Found something that look like end of statements.  If we are currently
-            # expecting a '>', the matching '<' must have been an operator, since
-            # template argument list should not contain statements.
-            while stack and stack[-1] == '<':
-                stack.pop()
-            if not stack:
-                return (-1, None)
-
-    # Did not find end of expression or unbalanced parentheses on this line
-    return (-1, stack)
-
-
-def CloseExpression(clean_lines, linenum, pos):
-    """If input points to ( or { or [ or <, finds the position that closes it.
-
-  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
-  linenum/pos that correspond to the closing of the expression.
-
-  TODO(unknown): cpplint spends a fair bit of time matching parentheses.
-  Ideally we would want to index all opening and closing parentheses once
-  and have CloseExpression be just a simple lookup, but due to preprocessor
-  tricks, this is not so easy.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    pos: A position on the line.
-
-  Returns:
-    A tuple (line, linenum, pos) pointer *past* the closing brace, or
-    (line, len(lines), -1) if we never find a close.  Note we ignore
-    strings and comments when matching; and the line we return is the
-    'cleansed' line at linenum.
-  """
-
-    line = clean_lines.elided[linenum]
-    if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
-        return (line, clean_lines.NumLines(), -1)
-
-    # Check first line
-    (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
-    if end_pos > -1:
-        return (line, linenum, end_pos)
-
-    # Continue scanning forward
-    while stack and linenum < clean_lines.NumLines() - 1:
-        linenum += 1
-        line = clean_lines.elided[linenum]
-        (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
-        if end_pos > -1:
-            return (line, linenum, end_pos)
-
-    # Did not find end of expression before end of file, give up
-    return (line, clean_lines.NumLines(), -1)
-
-
-def FindStartOfExpressionInLine(line, endpos, stack):
-    """Find position at the matching start of current expression.
-
-  This is almost the reverse of FindEndOfExpressionInLine, but note
-  that the input position and returned position differs by 1.
-
-  Args:
-    line: a CleansedLines line.
-    endpos: start searching at this position.
-    stack: nesting stack at endpos.
-
-  Returns:
-    On finding matching start: (index at matching start, None)
-    On finding an unclosed expression: (-1, None)
-    Otherwise: (-1, new stack at beginning of this line)
-  """
-    i = endpos
-    while i >= 0:
-        char = line[i]
-        if char in ')]}':
-            # Found end of expression, push to expression stack
-            stack.append(char)
-        elif char == '>':
-            # Found potential end of template argument list.
-            #
-            # Ignore it if it's a "->" or ">=" or "operator>"
-            if (i > 0 and
-                (line[i - 1] == '-' or Match(r'\s>=\s', line[i - 1:]) or
-                 Search(r'\boperator\s*$', line[0:i]))):
-                i -= 1
-            else:
-                stack.append('>')
-        elif char == '<':
-            # Found potential start of template argument list
-            if i > 0 and line[i - 1] == '<':
-                # Left shift operator
-                i -= 1
-            else:
-                # If there is a matching '>', we can pop the expression stack.
-                # Otherwise, ignore this '<' since it must be an operator.
-                if stack and stack[-1] == '>':
-                    stack.pop()
-                    if not stack:
-                        return (i, None)
-        elif char in '([{':
-            # Found start of expression.
-            #
-            # If there are any unmatched '>' on the stack, they must be
-            # operators.  Remove those.
-            while stack and stack[-1] == '>':
-                stack.pop()
-            if not stack:
-                return (-1, None)
-            if ((char == '(' and stack[-1] == ')') or
-                (char == '[' and stack[-1] == ']') or
-                (char == '{' and stack[-1] == '}')):
-                stack.pop()
-                if not stack:
-                    return (i, None)
-            else:
-                # Mismatched parentheses
-                return (-1, None)
-        elif char == ';':
-            # Found something that look like end of statements.  If we are currently
-            # expecting a '<', the matching '>' must have been an operator, since
-            # template argument list should not contain statements.
-            while stack and stack[-1] == '>':
-                stack.pop()
-            if not stack:
-                return (-1, None)
-
-        i -= 1
-
-    return (-1, stack)
-
-
-def ReverseCloseExpression(clean_lines, linenum, pos):
-    """If input points to ) or } or ] or >, finds the position that opens it.
-
-  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
-  linenum/pos that correspond to the opening of the expression.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    pos: A position on the line.
-
-  Returns:
-    A tuple (line, linenum, pos) pointer *at* the opening brace, or
-    (line, 0, -1) if we never find the matching opening brace.  Note
-    we ignore strings and comments when matching; and the line we
-    return is the 'cleansed' line at linenum.
-  """
-    line = clean_lines.elided[linenum]
-    if line[pos] not in ')}]>':
-        return (line, 0, -1)
-
-    # Check last line
-    (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
-    if start_pos > -1:
-        return (line, linenum, start_pos)
-
-    # Continue scanning backward
-    while stack and linenum > 0:
-        linenum -= 1
-        line = clean_lines.elided[linenum]
-        (start_pos, stack) = FindStartOfExpressionInLine(line,
-                                                         len(line) - 1, stack)
-        if start_pos > -1:
-            return (line, linenum, start_pos)
-
-    # Did not find start of expression before beginning of file, give up
-    return (line, 0, -1)
-
-
-def CheckForCopyright(filename, lines, error):
-    """Logs an error if no Copyright message appears at the top of the file."""
-
-    # We'll say it should occur by line 10. Don't forget there's a
-    # dummy line at the front.
-    for line in xrange(1, min(len(lines), 11)):
-        if re.search(r'Copyright', lines[line], re.I): break
-    else:  # means no copyright line was found
-        error(filename, 0, 'legal/copyright', 5, 'No copyright message found.  '
-              'You should have a line: "Copyright [year] <Copyright Owner>"')
-
-
-def GetIndentLevel(line):
-    """Return the number of leading spaces in line.
-
-  Args:
-    line: A string to check.
-
-  Returns:
-    An integer count of leading spaces, possibly zero.
-  """
-    indent = Match(r'^( *)\S', line)
-    if indent:
-        return len(indent.group(1))
-    else:
-        return 0
-
-
-def GetHeaderGuardCPPVariable(filename):
-    """Returns the CPP variable that should be used as a header guard.
-
-  Args:
-    filename: The name of a C++ header file.
-
-  Returns:
-    The CPP variable that should be used as a header guard in the
-    named file.
-
-  """
-    filename = os.path.basename(filename)
-    return re.sub(r'[^a-zA-Z0-9]', '_', filename).upper() + '_'
-
-
-def CheckForHeaderGuard(filename, clean_lines, error):
-    """Checks that the file contains a header guard.
-
-  Logs an error if no #ifndef header guard is present.  For other
-  headers, checks that the full pathname is used.
-
-  Args:
-    filename: The name of the C++ header file.
-    clean_lines: A CleansedLines instance containing the file.
-    error: The function to call with any errors found.
-  """
-
-    # Don't check for header guards if there are error suppression
-    # comments somewhere in this file.
-    #
-    # Because this is silencing a warning for a nonexistent line, we
-    # only support the very specific NOLINT(build/header_guard) syntax,
-    # and not the general NOLINT or NOLINT(*) syntax.
-    raw_lines = clean_lines.lines_without_raw_strings
-    for i in raw_lines:
-        if Search(r'//\s*NOLINT\(build/header_guard\)', i):
-            return
-
-    cppvar = GetHeaderGuardCPPVariable(filename)
-
-    ifndef = ''
-    ifndef_linenum = 0
-    define = ''
-    endif = ''
-    endif_linenum = 0
-    pragma_linenum = -1
-    for linenum, line in enumerate(raw_lines):
-        linesplit = line.split()
-        if len(linesplit) >= 2:
-            if linesplit[0] == '#pragma' and linesplit[1] == 'once':
-                pragma_linenum = linenum
-            # find the first occurrence of #ifndef and #define, save arg
-            if not ifndef and linesplit[0] == '#ifndef':
-                # set ifndef to the header guard presented on the #ifndef line.
-                ifndef = linesplit[1]
-                ifndef_linenum = linenum
-            if not define and linesplit[0] == '#define':
-                define = linesplit[1]
-        # find the last occurrence of #endif, save entire line
-        if line.startswith('#endif'):
-            endif = line
-            endif_linenum = linenum
-    if pragma_linenum != -1:
-        return  # short path for pragma once
-    if not ifndef or not define or ifndef != define:
-        error(filename, 0, 'build/header_guard', 5,
-              'No #ifndef header guard found, suggested CPP variable is: %s' %
-              cppvar)
-        return
-
-    # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
-    # for backward compatibility.
-    if ifndef != cppvar:
-        error_level = 0
-        if ifndef != cppvar + '_':
-            error_level = 5
-
-        ParseNolintSuppressions(filename, raw_lines[ifndef_linenum],
-                                ifndef_linenum, error)
-        error(filename, ifndef_linenum, 'build/header_guard', error_level,
-              '#ifndef header guard has wrong style, please use: %s' % cppvar)
-
-    # Check for "//" comments on endif line.
-    ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
-                            error)
-    match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
-    if match:
-        if match.group(1) == '_':
-            # Issue low severity warning for deprecated double trailing underscore
-            error(filename, endif_linenum, 'build/header_guard', 0,
-                  '#endif line should be "#endif  // %s"' % cppvar)
-        return
-
-    # Didn't find the corresponding "//" comment.  If this file does not
-    # contain any "//" comments at all, it could be that the compiler
-    # only wants "/**/" comments, look for those instead.
-    no_single_line_comments = True
-    for i in xrange(1, len(raw_lines) - 1):
-        line = raw_lines[i]
-        if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//',
-                 line):
-            no_single_line_comments = False
-            break
-
-    if no_single_line_comments:
-        match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
-        if match:
-            if match.group(1) == '_':
-                # Low severity warning for double trailing underscore
-                error(filename, endif_linenum, 'build/header_guard', 0,
-                      '#endif line should be "#endif  /* %s */"' % cppvar)
-            return
-
-    # Didn't find anything
-    error(filename, endif_linenum, 'build/header_guard', 5,
-          '#endif line should be "#endif  // %s"' % cppvar)
-
-
-def CheckHeaderFileIncluded(filename, include_state, error):
-    """Logs an error if a .cc file does not include its header."""
-
-    # Do not check test files
-    if filename.endswith('_test.cc') or filename.endswith('_unittest.cc'):
-        return
-
-    fileinfo = FileInfo(filename)
-    headerfile = filename[0:len(filename) - 2] + 'h'
-    if not os.path.exists(headerfile):
-        return
-    headername = FileInfo(headerfile).RepositoryName()
-    first_include = 0
-    for section_list in include_state.include_list:
-        for f in section_list:
-            if headername in f[0] or f[0] in headername:
-                return
-            if not first_include:
-                first_include = f[1]
-
-    error(filename, first_include, 'build/include', 5,
-          '%s should include its header file %s' % (fileinfo.RepositoryName(),
-                                                    headername))
-
-
-def CheckForBadCharacters(filename, lines, error):
-    """Logs an error for each line containing bad characters.
-
-  Two kinds of bad characters:
-
-  1. Unicode replacement characters: These indicate that either the file
-  contained invalid UTF-8 (likely) or Unicode replacement characters (which
-  it shouldn't).  Note that it's possible for this to throw off line
-  numbering if the invalid UTF-8 occurred adjacent to a newline.
-
-  2. NUL bytes.  These are problematic for some tools.
-
-  Args:
-    filename: The name of the current file.
-    lines: An array of strings, each representing a line of the file.
-    error: The function to call with any errors found.
-  """
-    for linenum, line in enumerate(lines):
-        if u'\ufffd' in line:
-            error(
-                filename, linenum, 'readability/utf8', 5,
-                'Line contains invalid UTF-8 (or Unicode replacement character).'
-            )
-        if '\0' in line:
-            error(filename, linenum, 'readability/nul', 5,
-                  'Line contains NUL byte.')
-
-
-def CheckForNewlineAtEOF(filename, lines, error):
-    """Logs an error if there is no newline char at the end of the file.
-
-  Args:
-    filename: The name of the current file.
-    lines: An array of strings, each representing a line of the file.
-    error: The function to call with any errors found.
-  """
-
-    # The array lines() was created by adding two newlines to the
-    # original file (go figure), then splitting on \n.
-    # To verify that the file ends in \n, we just have to make sure the
-    # last-but-two element of lines() exists and is empty.
-    if len(lines) < 3 or lines[-2]:
-        error(filename,
-              len(lines) - 2, 'whitespace/ending_newline', 5,
-              'Could not find a newline character at the end of the file.')
-
-
-def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
-    """Logs an error if we see /* ... */ or "..." that extend past one line.
-
-  /* ... */ comments are legit inside macros, for one line.
-  Otherwise, we prefer // comments, so it's ok to warn about the
-  other.  Likewise, it's ok for strings to extend across multiple
-  lines, as long as a line continuation character (backslash)
-  terminates each line. Although not currently prohibited by the C++
-  style guide, it's ugly and unnecessary. We don't do well with either
-  in this lint program, so we warn about both.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Remove all \\ (escaped backslashes) from the line. They are OK, and the
-    # second (escaped) slash may trigger later \" detection erroneously.
-    line = line.replace('\\\\', '')
-
-    if line.count('/*') > line.count('*/'):
-        error(filename, linenum, 'readability/multiline_comment', 5,
-              'Complex multi-line /*...*/-style comment found. '
-              'Lint may give bogus warnings.  '
-              'Consider replacing these with //-style comments, '
-              'with #if 0...#endif, '
-              'or with more clearly structured multi-line comments.')
-
-    if (line.count('"') - line.count('\\"')) % 2:
-        error(filename, linenum, 'readability/multiline_string', 5,
-              'Multi-line string ("...") found.  This lint script doesn\'t '
-              'do well with such strings, and may give bogus warnings.  '
-              'Use C++11 raw strings or concatenation instead.')
-
-
-# (non-threadsafe name, thread-safe alternative, validation pattern)
-#
-# The validation pattern is used to eliminate false positives such as:
-#  _rand();               // false positive due to substring match.
-#  ->rand();              // some member function rand().
-#  ACMRandom rand(seed);  // some variable named rand.
-#  ISAACRandom rand();    // another variable named rand.
-#
-# Basically we require the return value of these functions to be used
-# in some expression context on the same line by matching on some
-# operator before the function name.  This eliminates constructors and
-# member function calls.
-_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
-_THREADING_LIST = (
-    ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
-    ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
-    ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
-    ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
-    ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
-    ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
-    ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
-    ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
-    ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
-    ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
-    ('strtok(', 'strtok_r(', _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
-    ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), )
-
-
-def CheckPosixThreading(filename, clean_lines, linenum, error):
-    """Checks for calls to thread-unsafe functions.
-
-  Much code has been originally written without consideration of
-  multi-threading. Also, engineers are relying on their old experience;
-  they have learned posix before threading extensions were added. These
-  tests guide the engineers to use thread-safe functions (when using
-  posix directly).
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-    for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
-        # Additional pattern matching check to confirm that this is the
-        # function we are looking for
-        if Search(pattern, line):
-            error(filename, linenum, 'runtime/threadsafe_fn', 2,
-                  'Consider using ' + multithread_safe_func + '...) instead of '
-                  + single_thread_func + '...) for improved thread safety.')
-
-
-def CheckVlogArguments(filename, clean_lines, linenum, error):
-    """Checks that VLOG() is only used for defining a logging level.
-
-  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
-  VLOG(FATAL) are not.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-    if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
-        error(filename, linenum, 'runtime/vlog', 5,
-              'VLOG() should be used with numeric verbosity level.  '
-              'Use LOG() if you want symbolic severity levels.')
-
-
-# Matches invalid increment: *count++, which moves pointer instead of
-# incrementing a value.
-_RE_PATTERN_INVALID_INCREMENT = re.compile(r'^\s*\*\w+(\+\+|--);')
-
-
-def CheckInvalidIncrement(filename, clean_lines, linenum, error):
-    """Checks for invalid increment *count++.
-
-  For example following function:
-  void increment_counter(int* count) {
-    *count++;
-  }
-  is invalid, because it effectively does count++, moving pointer, and should
-  be replaced with ++*count, (*count)++ or *count += 1.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-    if _RE_PATTERN_INVALID_INCREMENT.match(line):
-        error(
-            filename, linenum, 'runtime/invalid_increment', 5,
-            'Changing pointer instead of value (or unused value of operator*).')
-
-
-def IsMacroDefinition(clean_lines, linenum):
-    if Search(r'^#define', clean_lines[linenum]):
-        return True
-
-    if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
-        return True
-
-    return False
-
-
-def IsForwardClassDeclaration(clean_lines, linenum):
-    return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
-
-
-class _BlockInfo(object):
-    """Stores information about a generic block of code."""
-
-    def __init__(self, seen_open_brace):
-        self.seen_open_brace = seen_open_brace
-        self.open_parentheses = 0
-        self.inline_asm = _NO_ASM
-        self.check_namespace_indentation = False
-
-    def CheckBegin(self, filename, clean_lines, linenum, error):
-        """Run checks that applies to text up to the opening brace.
-
-    This is mostly for checking the text after the class identifier
-    and the "{", usually where the base class is specified.  For other
-    blocks, there isn't much to check, so we always pass.
-
-    Args:
-      filename: The name of the current file.
-      clean_lines: A CleansedLines instance containing the file.
-      linenum: The number of the line to check.
-      error: The function to call with any errors found.
-    """
-        pass
-
-    def CheckEnd(self, filename, clean_lines, linenum, error):
-        """Run checks that applies to text after the closing brace.
-
-    This is mostly used for checking end of namespace comments.
-
-    Args:
-      filename: The name of the current file.
-      clean_lines: A CleansedLines instance containing the file.
-      linenum: The number of the line to check.
-      error: The function to call with any errors found.
-    """
-        pass
-
-    def IsBlockInfo(self):
-        """Returns true if this block is a _BlockInfo.
-
-    This is convenient for verifying that an object is an instance of
-    a _BlockInfo, but not an instance of any of the derived classes.
-
-    Returns:
-      True for this class, False for derived classes.
-    """
-        return self.__class__ == _BlockInfo
-
-
-class _ExternCInfo(_BlockInfo):
-    """Stores information about an 'extern "C"' block."""
-
-    def __init__(self):
-        _BlockInfo.__init__(self, True)
-
-
-class _ClassInfo(_BlockInfo):
-    """Stores information about a class."""
-
-    def __init__(self, name, class_or_struct, clean_lines, linenum):
-        _BlockInfo.__init__(self, False)
-        self.name = name
-        self.starting_linenum = linenum
-        self.is_derived = False
-        self.check_namespace_indentation = True
-        if class_or_struct == 'struct':
-            self.access = 'public'
-            self.is_struct = True
-        else:
-            self.access = 'private'
-            self.is_struct = False
-
-        # Remember initial indentation level for this class.  Using raw_lines here
-        # instead of elided to account for leading comments.
-        self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
-
-        # Try to find the end of the class.  This will be confused by things like:
-        #   class A {
-        #   } *x = { ...
-        #
-        # But it's still good enough for CheckSectionSpacing.
-        self.last_line = 0
-        depth = 0
-        for i in range(linenum, clean_lines.NumLines()):
-            line = clean_lines.elided[i]
-            depth += line.count('{') - line.count('}')
-            if not depth:
-                self.last_line = i
-                break
-
-    def CheckBegin(self, filename, clean_lines, linenum, error):
-        # Look for a bare ':'
-        if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
-            self.is_derived = True
-
-    def CheckEnd(self, filename, clean_lines, linenum, error):
-        # If there is a DISALLOW macro, it should appear near the end of
-        # the class.
-        seen_last_thing_in_class = False
-        for i in xrange(linenum - 1, self.starting_linenum, -1):
-            match = Search(
-                r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\('
-                + self.name + r'\)', clean_lines.elided[i])
-            if match:
-                if seen_last_thing_in_class:
-                    error(filename, i, 'readability/constructors', 3,
-                          match.group(1) +
-                          ' should be the last thing in the class')
-                break
-
-            if not Match(r'^\s*$', clean_lines.elided[i]):
-                seen_last_thing_in_class = True
-
-        # Check that closing brace is aligned with beginning of the class.
-        # Only do this if the closing brace is indented by only whitespaces.
-        # This means we will not check single-line class definitions.
-        indent = Match(r'^( *)\}', clean_lines.elided[linenum])
-        if indent and len(indent.group(1)) != self.class_indent:
-            if self.is_struct:
-                parent = 'struct ' + self.name
-            else:
-                parent = 'class ' + self.name
-            error(filename, linenum, 'whitespace/indent', 3,
-                  'Closing brace should be aligned with beginning of %s' %
-                  parent)
-
-
-class _NamespaceInfo(_BlockInfo):
-    """Stores information about a namespace."""
-
-    def __init__(self, name, linenum):
-        _BlockInfo.__init__(self, False)
-        self.name = name or ''
-        self.starting_linenum = linenum
-        self.check_namespace_indentation = True
-
-    def CheckEnd(self, filename, clean_lines, linenum, error):
-        """Check end of namespace comments."""
-        line = clean_lines.raw_lines[linenum]
-
-        # Check how many lines is enclosed in this namespace.  Don't issue
-        # warning for missing namespace comments if there aren't enough
-        # lines.  However, do apply checks if there is already an end of
-        # namespace comment and it's incorrect.
-        #
-        # TODO(unknown): We always want to check end of namespace comments
-        # if a namespace is large, but sometimes we also want to apply the
-        # check if a short namespace contained nontrivial things (something
-        # other than forward declarations).  There is currently no logic on
-        # deciding what these nontrivial things are, so this check is
-        # triggered by namespace size only, which works most of the time.
-        if (linenum - self.starting_linenum < 10 and
-                not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
-            return
-
-        # Look for matching comment at end of namespace.
-        #
-        # Note that we accept C style "/* */" comments for terminating
-        # namespaces, so that code that terminate namespaces inside
-        # preprocessor macros can be cpplint clean.
-        #
-        # We also accept stuff like "// end of namespace <name>." with the
-        # period at the end.
-        #
-        # Besides these, we don't accept anything else, otherwise we might
-        # get false negatives when existing comment is a substring of the
-        # expected namespace.
-        if self.name:
-            # Named namespace
-            if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' +
-                          re.escape(self.name) + r'[\*/\.\\\s]*$'), line):
-                error(filename, linenum, 'readability/namespace', 5,
-                      'Namespace should be terminated with "// namespace %s"' %
-                      self.name)
-        else:
-            # Anonymous namespace
-            if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
-                # If "// namespace anonymous" or "// anonymous namespace (more text)",
-                # mention "// anonymous namespace" as an acceptable form
-                if Match(r'}.*\b(namespace anonymous|anonymous namespace)\b',
-                         line):
-                    error(
-                        filename, linenum, 'readability/namespace', 5,
-                        'Anonymous namespace should be terminated with "// namespace"'
-                        ' or "// anonymous namespace"')
-                else:
-                    error(
-                        filename, linenum, 'readability/namespace', 5,
-                        'Anonymous namespace should be terminated with "// namespace"'
-                    )
-
-
-class _PreprocessorInfo(object):
-    """Stores checkpoints of nesting stacks when #if/#else is seen."""
-
-    def __init__(self, stack_before_if):
-        # The entire nesting stack before #if
-        self.stack_before_if = stack_before_if
-
-        # The entire nesting stack up to #else
-        self.stack_before_else = []
-
-        # Whether we have already seen #else or #elif
-        self.seen_else = False
-
-
-class NestingState(object):
-    """Holds states related to parsing braces."""
-
-    def __init__(self):
-        # Stack for tracking all braces.  An object is pushed whenever we
-        # see a "{", and popped when we see a "}".  Only 3 types of
-        # objects are possible:
-        # - _ClassInfo: a class or struct.
-        # - _NamespaceInfo: a namespace.
-        # - _BlockInfo: some other type of block.
-        self.stack = []
-
-        # Top of the previous stack before each Update().
-        #
-        # Because the nesting_stack is updated at the end of each line, we
-        # had to do some convoluted checks to find out what is the current
-        # scope at the beginning of the line.  This check is simplified by
-        # saving the previous top of nesting stack.
-        #
-        # We could save the full stack, but we only need the top.  Copying
-        # the full nesting stack would slow down cpplint by ~10%.
-        self.previous_stack_top = []
-
-        # Stack of _PreprocessorInfo objects.
-        self.pp_stack = []
-
-    def SeenOpenBrace(self):
-        """Check if we have seen the opening brace for the innermost block.
-
-    Returns:
-      True if we have seen the opening brace, False if the innermost
-      block is still expecting an opening brace.
-    """
-        return (not self.stack) or self.stack[-1].seen_open_brace
-
-    def InNamespaceBody(self):
-        """Check if we are currently one level inside a namespace body.
-
-    Returns:
-      True if top of the stack is a namespace block, False otherwise.
-    """
-        return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
-
-    def InExternC(self):
-        """Check if we are currently one level inside an 'extern "C"' block.
-
-    Returns:
-      True if top of the stack is an extern block, False otherwise.
-    """
-        return self.stack and isinstance(self.stack[-1], _ExternCInfo)
-
-    def InClassDeclaration(self):
-        """Check if we are currently one level inside a class or struct declaration.
-
-    Returns:
-      True if top of the stack is a class/struct, False otherwise.
-    """
-        return self.stack and isinstance(self.stack[-1], _ClassInfo)
-
-    def InAsmBlock(self):
-        """Check if we are currently one level inside an inline ASM block.
-
-    Returns:
-      True if the top of the stack is a block containing inline ASM.
-    """
-        return self.stack and self.stack[-1].inline_asm != _NO_ASM
-
-    def InTemplateArgumentList(self, clean_lines, linenum, pos):
-        """Check if current position is inside template argument list.
-
-    Args:
-      clean_lines: A CleansedLines instance containing the file.
-      linenum: The number of the line to check.
-      pos: position just after the suspected template argument.
-    Returns:
-      True if (linenum, pos) is inside template arguments.
-    """
-        while linenum < clean_lines.NumLines():
-            # Find the earliest character that might indicate a template argument
-            line = clean_lines.elided[linenum]
-            match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
-            if not match:
-                linenum += 1
-                pos = 0
-                continue
-            token = match.group(1)
-            pos += len(match.group(0))
-
-            # These things do not look like template argument list:
-            #   class Suspect {
-            #   class Suspect x; }
-            if token in ('{', '}', ';'): return False
-
-            # These things look like template argument list:
-            #   template <class Suspect>
-            #   template <class Suspect = default_value>
-            #   template <class Suspect[]>
-            #   template <class Suspect...>
-            if token in ('>', '=', '[', ']', '.'): return True
-
-            # Check if token is an unmatched '<'.
-            # If not, move on to the next character.
-            if token != '<':
-                pos += 1
-                if pos >= len(line):
-                    linenum += 1
-                    pos = 0
-                continue
-
-            # We can't be sure if we just find a single '<', and need to
-            # find the matching '>'.
-            (_, end_line, end_pos) = CloseExpression(clean_lines, linenum,
-                                                     pos - 1)
-            if end_pos < 0:
-                # Not sure if template argument list or syntax error in file
-                return False
-            linenum = end_line
-            pos = end_pos
-        return False
-
-    def UpdatePreprocessor(self, line):
-        """Update preprocessor stack.
-
-    We need to handle preprocessors due to classes like this:
-      #ifdef SWIG
-      struct ResultDetailsPageElementExtensionPoint {
-      #else
-      struct ResultDetailsPageElementExtensionPoint : public Extension {
-      #endif
-
-    We make the following assumptions (good enough for most files):
-    - Preprocessor condition evaluates to true from #if up to first
-      #else/#elif/#endif.
-
-    - Preprocessor condition evaluates to false from #else/#elif up
-      to #endif.  We still perform lint checks on these lines, but
-      these do not affect nesting stack.
-
-    Args:
-      line: current line to check.
-    """
-        if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
-            # Beginning of #if block, save the nesting stack here.  The saved
-            # stack will allow us to restore the parsing state in the #else case.
-            self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
-        elif Match(r'^\s*#\s*(else|elif)\b', line):
-            # Beginning of #else block
-            if self.pp_stack:
-                if not self.pp_stack[-1].seen_else:
-                    # This is the first #else or #elif block.  Remember the
-                    # whole nesting stack up to this point.  This is what we
-                    # keep after the #endif.
-                    self.pp_stack[-1].seen_else = True
-                    self.pp_stack[-1].stack_before_else = copy.deepcopy(
-                        self.stack)
-
-                # Restore the stack to how it was before the #if
-                self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
-            else:
-                # TODO(unknown): unexpected #else, issue warning?
-                pass
-        elif Match(r'^\s*#\s*endif\b', line):
-            # End of #if or #else blocks.
-            if self.pp_stack:
-                # If we saw an #else, we will need to restore the nesting
-                # stack to its former state before the #else, otherwise we
-                # will just continue from where we left off.
-                if self.pp_stack[-1].seen_else:
-                    # Here we can just use a shallow copy since we are the last
-                    # reference to it.
-                    self.stack = self.pp_stack[-1].stack_before_else
-                # Drop the corresponding #if
-                self.pp_stack.pop()
-            else:
-                # TODO(unknown): unexpected #endif, issue warning?
-                pass
-
-    # TODO(unknown): Update() is too long, but we will refactor later.
-    def Update(self, filename, clean_lines, linenum, error):
-        """Update nesting state with current line.
-
-    Args:
-      filename: The name of the current file.
-      clean_lines: A CleansedLines instance containing the file.
-      linenum: The number of the line to check.
-      error: The function to call with any errors found.
-    """
-        line = clean_lines.elided[linenum]
-
-        # Remember top of the previous nesting stack.
-        #
-        # The stack is always pushed/popped and not modified in place, so
-        # we can just do a shallow copy instead of copy.deepcopy.  Using
-        # deepcopy would slow down cpplint by ~28%.
-        if self.stack:
-            self.previous_stack_top = self.stack[-1]
-        else:
-            self.previous_stack_top = None
-
-        # Update pp_stack
-        self.UpdatePreprocessor(line)
-
-        # Count parentheses.  This is to avoid adding struct arguments to
-        # the nesting stack.
-        if self.stack:
-            inner_block = self.stack[-1]
-            depth_change = line.count('(') - line.count(')')
-            inner_block.open_parentheses += depth_change
-
-            # Also check if we are starting or ending an inline assembly block.
-            if inner_block.inline_asm in (_NO_ASM, _END_ASM):
-                if (depth_change != 0 and inner_block.open_parentheses == 1 and
-                        _MATCH_ASM.match(line)):
-                    # Enter assembly block
-                    inner_block.inline_asm = _INSIDE_ASM
-                else:
-                    # Not entering assembly block.  If previous line was _END_ASM,
-                    # we will now shift to _NO_ASM state.
-                    inner_block.inline_asm = _NO_ASM
-            elif (inner_block.inline_asm == _INSIDE_ASM and
-                  inner_block.open_parentheses == 0):
-                # Exit assembly block
-                inner_block.inline_asm = _END_ASM
-
-        # Consume namespace declaration at the beginning of the line.  Do
-        # this in a loop so that we catch same line declarations like this:
-        #   namespace proto2 { namespace bridge { class MessageSet; } }
-        while True:
-            # Match start of namespace.  The "\b\s*" below catches namespace
-            # declarations even if it weren't followed by a whitespace, this
-            # is so that we don't confuse our namespace checker.  The
-            # missing spaces will be flagged by CheckSpacing.
-            namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$',
-                                         line)
-            if not namespace_decl_match:
-                break
-
-            new_namespace = _NamespaceInfo(
-                namespace_decl_match.group(1), linenum)
-            self.stack.append(new_namespace)
-
-            line = namespace_decl_match.group(2)
-            if line.find('{') != -1:
-                new_namespace.seen_open_brace = True
-                line = line[line.find('{') + 1:]
-
-        # Look for a class declaration in whatever is left of the line
-        # after parsing namespaces.  The regexp accounts for decorated classes
-        # such as in:
-        #   class LOCKABLE API Object {
-        #   };
-        class_decl_match = Match(
-            r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
-            r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
-            r'(.*)$', line)
-        if (class_decl_match and
-            (not self.stack or self.stack[-1].open_parentheses == 0)):
-            # We do not want to accept classes that are actually template arguments:
-            #   template <class Ignore1,
-            #             class Ignore2 = Default<Args>,
-            #             template <Args> class Ignore3>
-            #   void Function() {};
-            #
-            # To avoid template argument cases, we scan forward and look for
-            # an unmatched '>'.  If we see one, assume we are inside a
-            # template argument list.
-            end_declaration = len(class_decl_match.group(1))
-            if not self.InTemplateArgumentList(clean_lines, linenum,
-                                               end_declaration):
-                self.stack.append(
-                    _ClassInfo(
-                        class_decl_match.group(3),
-                        class_decl_match.group(2), clean_lines, linenum))
-                line = class_decl_match.group(4)
-
-        # If we have not yet seen the opening brace for the innermost block,
-        # run checks here.
-        if not self.SeenOpenBrace():
-            self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
-
-        # Update access control if we are inside a class/struct
-        if self.stack and isinstance(self.stack[-1], _ClassInfo):
-            classinfo = self.stack[-1]
-            access_match = Match(
-                r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
-                r':(?:[^:]|$)', line)
-            if access_match:
-                classinfo.access = access_match.group(2)
-
-                # Check that access keywords are indented +1 space.  Skip this
-                # check if the keywords are not preceded by whitespaces.
-                indent = access_match.group(1)
-                if (len(indent) != classinfo.class_indent + 1 and
-                        Match(r'^\s*$', indent)):
-                    if classinfo.is_struct:
-                        parent = 'struct ' + classinfo.name
-                    else:
-                        parent = 'class ' + classinfo.name
-                    slots = ''
-                    if access_match.group(3):
-                        slots = access_match.group(3)
-                    error(filename, linenum, 'whitespace/indent', 3,
-                          '%s%s: should be indented +1 space inside %s' % (
-                              access_match.group(2), slots, parent))
-
-        # Consume braces or semicolons from what's left of the line
-        while True:
-            # Match first brace, semicolon, or closed parenthesis.
-            matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
-            if not matched:
-                break
-
-            token = matched.group(1)
-            if token == '{':
-                # If namespace or class hasn't seen a opening brace yet, mark
-                # namespace/class head as complete.  Push a new block onto the
-                # stack otherwise.
-                if not self.SeenOpenBrace():
-                    self.stack[-1].seen_open_brace = True
-                elif Match(r'^extern\s*"[^"]*"\s*\{', line):
-                    self.stack.append(_ExternCInfo())
-                else:
-                    self.stack.append(_BlockInfo(True))
-                    if _MATCH_ASM.match(line):
-                        self.stack[-1].inline_asm = _BLOCK_ASM
-
-            elif token == ';' or token == ')':
-                # If we haven't seen an opening brace yet, but we already saw
-                # a semicolon, this is probably a forward declaration.  Pop
-                # the stack for these.
-                #
-                # Similarly, if we haven't seen an opening brace yet, but we
-                # already saw a closing parenthesis, then these are probably
-                # function arguments with extra "class" or "struct" keywords.
-                # Also pop these stack for these.
-                if not self.SeenOpenBrace():
-                    self.stack.pop()
-            else:  # token == '}'
-                # Perform end of block checks and pop the stack.
-                if self.stack:
-                    self.stack[-1].CheckEnd(filename, clean_lines, linenum,
-                                            error)
-                    self.stack.pop()
-            line = matched.group(2)
-
-    def InnermostClass(self):
-        """Get class info on the top of the stack.
-
-    Returns:
-      A _ClassInfo object if we are inside a class, or None otherwise.
-    """
-        for i in range(len(self.stack), 0, -1):
-            classinfo = self.stack[i - 1]
-            if isinstance(classinfo, _ClassInfo):
-                return classinfo
-        return None
-
-    def CheckCompletedBlocks(self, filename, error):
-        """Checks that all classes and namespaces have been completely parsed.
-
-    Call this when all lines in a file have been processed.
-    Args:
-      filename: The name of the current file.
-      error: The function to call with any errors found.
-    """
-        # Note: This test can result in false positives if #ifdef constructs
-        # get in the way of brace matching. See the testBuildClass test in
-        # cpplint_unittest.py for an example of this.
-        for obj in self.stack:
-            if isinstance(obj, _ClassInfo):
-                error(filename, obj.starting_linenum, 'build/class', 5,
-                      'Failed to find complete declaration of class %s' %
-                      obj.name)
-            elif isinstance(obj, _NamespaceInfo):
-                error(filename, obj.starting_linenum, 'build/namespaces', 5,
-                      'Failed to find complete declaration of namespace %s' %
-                      obj.name)
-
-
-def CheckForNonStandardConstructs(filename, clean_lines, linenum, nesting_state,
-                                  error):
-    r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
-
-  Complain about several constructs which gcc-2 accepts, but which are
-  not standard C++.  Warning about these in lint is one way to ease the
-  transition to new compilers.
-  - put storage class first (e.g. "static const" instead of "const static").
-  - "%lld" instead of %qd" in printf-type functions.
-  - "%1$d" is non-standard in printf-type functions.
-  - "\%" is an undefined character escape sequence.
-  - text after #endif is not allowed.
-  - invalid inner-style forward declaration.
-  - >? and <? operators, and their >?= and <?= cousins.
-
-  Additionally, check for constructor/destructor style violations and reference
-  members, as it is very convenient to do so while checking for
-  gcc-2 compliance.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: A callable to which errors are reported, which takes 4 arguments:
-           filename, line number, error level, and message
-  """
-
-    # Remove comments from the line, but leave in strings for now.
-    line = clean_lines.lines[linenum]
-
-    if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
-        error(filename, linenum, 'runtime/printf_format', 3,
-              '%q in format strings is deprecated.  Use %ll instead.')
-
-    if Search(r'printf\s*\(.*".*%\d+\$', line):
-        error(filename, linenum, 'runtime/printf_format', 2,
-              '%N$ formats are unconventional.  Try rewriting to avoid them.')
-
-    # Remove escaped backslashes before looking for undefined escapes.
-    line = line.replace('\\\\', '')
-
-    if Search(r'("|\').*\\(%|\[|\(|{)', line):
-        error(filename, linenum, 'build/printf_format', 3,
-              '%, [, (, and { are undefined character escapes.  Unescape them.')
-
-    # For the rest, work with both comments and strings removed.
-    line = clean_lines.elided[linenum]
-
-    if Search(r'\b(const|volatile|void|char|short|int|long'
-              r'|float|double|signed|unsigned'
-              r'|schar|u?int8|u?int16|u?int32|u?int64)'
-              r'\s+(register|static|extern|typedef)\b', line):
-        error(filename, linenum, 'build/storage_class', 5,
-              'Storage class (static, extern, typedef, etc) should be first.')
-
-    if Match(r'\s*#\s*endif\s*[^/\s]+', line):
-        error(filename, linenum, 'build/endif_comment', 5,
-              'Uncommented text after #endif is non-standard.  Use a comment.')
-
-    if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
-        error(
-            filename, linenum, 'build/forward_decl', 5,
-            'Inner-style forward declarations are invalid.  Remove this line.')
-
-    if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
-              line):
-        error(
-            filename, linenum, 'build/deprecated', 3,
-            '>? and <? (max and min) operators are non-standard and deprecated.')
-
-    if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
-        # TODO(unknown): Could it be expanded safely to arbitrary references,
-        # without triggering too many false positives? The first
-        # attempt triggered 5 warnings for mostly benign code in the regtest, hence
-        # the restriction.
-        # Here's the original regexp, for the reference:
-        # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
-        # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
-        error(filename, linenum, 'runtime/member_string_references', 2,
-              'const string& members are dangerous. It is much better to use '
-              'alternatives, such as pointers or simple constants.')
-
-    # Everything else in this function operates on class declarations.
-    # Return early if the top of the nesting stack is not a class, or if
-    # the class head is not completed yet.
-    classinfo = nesting_state.InnermostClass()
-    if not classinfo or not classinfo.seen_open_brace:
-        return
-
-    # The class may have been declared with namespace or classname qualifiers.
-    # The constructor and destructor will not have those qualifiers.
-    base_classname = classinfo.name.split('::')[-1]
-
-    # Look for single-argument constructors that aren't marked explicit.
-    # Technically a valid construct, but against style. Also look for
-    # non-single-argument constructors which are also technically valid, but
-    # strongly suggest something is wrong.
-    explicit_constructor_match = Match(
-        r'\s+(?:inline\s+)?(explicit\s+)?(?:inline\s+)?%s\s*'
-        r'\(((?:[^()]|\([^()]*\))*)\)' % re.escape(base_classname), line)
-
-    if explicit_constructor_match:
-        is_marked_explicit = explicit_constructor_match.group(1)
-
-        if not explicit_constructor_match.group(2):
-            constructor_args = []
-        else:
-            constructor_args = explicit_constructor_match.group(2).split(',')
-
-        # collapse arguments so that commas in template parameter lists and function
-        # argument parameter lists don't split arguments in two
-        i = 0
-        while i < len(constructor_args):
-            constructor_arg = constructor_args[i]
-            while (constructor_arg.count('<') > constructor_arg.count('>') or
-                   constructor_arg.count('(') > constructor_arg.count(')')):
-                constructor_arg += ',' + constructor_args[i + 1]
-                del constructor_args[i + 1]
-            constructor_args[i] = constructor_arg
-            i += 1
-
-        defaulted_args = [arg for arg in constructor_args if '=' in arg]
-        noarg_constructor = (
-            not constructor_args or  # empty arg list
-            # 'void' arg specifier
-            (len(constructor_args) == 1 and
-             constructor_args[0].strip() == 'void'))
-        onearg_constructor = (
-            (
-                len(constructor_args) == 1 and  # exactly one arg
-                not noarg_constructor) or
-            # all but at most one arg defaulted
-            (len(constructor_args) >= 1 and not noarg_constructor and
-             len(defaulted_args) >= len(constructor_args) - 1))
-        initializer_list_constructor = bool(
-            onearg_constructor and
-            Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0]))
-        copy_constructor = bool(
-            onearg_constructor and
-            Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' %
-                  re.escape(base_classname), constructor_args[0].strip()))
-
-        if (not is_marked_explicit and onearg_constructor and
-                not initializer_list_constructor and not copy_constructor):
-            if defaulted_args:
-                error(filename, linenum, 'runtime/explicit', 5,
-                      'Constructors callable with one argument '
-                      'should be marked explicit.')
-            else:
-                error(
-                    filename, linenum, 'runtime/explicit', 5,
-                    'Single-parameter constructors should be marked explicit.')
-        elif is_marked_explicit and not onearg_constructor:
-            if noarg_constructor:
-                error(
-                    filename, linenum, 'runtime/explicit', 5,
-                    'Zero-parameter constructors should not be marked explicit.')
-            else:
-                error(filename, linenum, 'runtime/explicit', 0,
-                      'Constructors that require multiple arguments '
-                      'should not be marked explicit.')
-
-
-def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
-    """Checks for the correctness of various spacing around function calls.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Since function calls often occur inside if/for/while/switch
-    # expressions - which have their own, more liberal conventions - we
-    # first see if we should be looking inside such an expression for a
-    # function call, to which we can apply more strict standards.
-    fncall = line  # if there's no control flow construct, look at whole line
-    for pattern in (r'\bif\s*\((.*)\)\s*{', r'\bfor\s*\((.*)\)\s*{',
-                    r'\bwhile\s*\((.*)\)\s*[{;]', r'\bswitch\s*\((.*)\)\s*{'):
-        match = Search(pattern, line)
-        if match:
-            fncall = match.group(1)  # look inside the parens for function calls
-            break
-
-    # Except in if/for/while/switch, there should never be space
-    # immediately inside parens (eg "f( 3, 4 )").  We make an exception
-    # for nested parens ( (a+b) + c ).  Likewise, there should never be
-    # a space before a ( when it's a function argument.  I assume it's a
-    # function argument when the char before the whitespace is legal in
-    # a function name (alnum + _) and we're not starting a macro. Also ignore
-    # pointers and references to arrays and functions coz they're too tricky:
-    # we use a very simple way to recognize these:
-    # " (something)(maybe-something)" or
-    # " (something)(maybe-something," or
-    # " (something)[something]"
-    # Note that we assume the contents of [] to be short enough that
-    # they'll never need to wrap.
-    if (  # Ignore control structures.
-            not Search(
-                r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
-                fncall) and
-            # Ignore pointers/references to functions.
-            not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
-            # Ignore pointers/references to arrays.
-            not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
-        if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):  # a ( used for a fn call
-            error(filename, linenum, 'whitespace/parens', 4,
-                  'Extra space after ( in function call')
-        elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
-            error(filename, linenum, 'whitespace/parens', 2,
-                  'Extra space after (')
-        if (Search(r'\w\s+\(', fncall) and
-                not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
-                not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and
-                not Search(r'\bcase\s+\(', fncall)):
-            # TODO(unknown): Space after an operator function seem to be a common
-            # error, silence those for now by restricting them to highest verbosity.
-            if Search(r'\boperator_*\b', line):
-                error(filename, linenum, 'whitespace/parens', 0,
-                      'Extra space before ( in function call')
-            else:
-                error(filename, linenum, 'whitespace/parens', 4,
-                      'Extra space before ( in function call')
-        # If the ) is followed only by a newline or a { + newline, assume it's
-        # part of a control statement (if/while/etc), and don't complain
-        if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
-            # If the closing parenthesis is preceded by only whitespaces,
-            # try to give a more descriptive error message.
-            if Search(r'^\s+\)', fncall):
-                error(filename, linenum, 'whitespace/parens', 2,
-                      'Closing ) should be moved to the previous line')
-            else:
-                error(filename, linenum, 'whitespace/parens', 2,
-                      'Extra space before )')
-
-
-def IsBlankLine(line):
-    """Returns true if the given line is blank.
-
-  We consider a line to be blank if the line is empty or consists of
-  only white spaces.
-
-  Args:
-    line: A line of a string.
-
-  Returns:
-    True, if the given line is blank.
-  """
-    return not line or line.isspace()
-
-
-def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
-                                 error):
-    is_namespace_indent_item = (
-        len(nesting_state.stack) > 1 and
-        nesting_state.stack[-1].check_namespace_indentation and
-        isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and
-        nesting_state.previous_stack_top == nesting_state.stack[-2])
-
-    if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
-                                       clean_lines.elided, line):
-        CheckItemIndentationInNamespace(filename, clean_lines.elided, line,
-                                        error)
-
-
-def CheckForFunctionLengths(filename, clean_lines, linenum, function_state,
-                            error):
-    """Reports for long function bodies.
-
-  For an overview why this is done, see:
-  http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
-
-  Uses a simplistic algorithm assuming other style guidelines
-  (especially spacing) are followed.
-  Only checks unindented functions, so class members are unchecked.
-  Trivial bodies are unchecked, so constructors with huge initializer lists
-  may be missed.
-  Blank/comment lines are not counted so as to avoid encouraging the removal
-  of vertical space and comments just to get through a lint check.
-  NOLINT *on the last line of a function* disables this check.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    function_state: Current function name and lines in body so far.
-    error: The function to call with any errors found.
-  """
-    lines = clean_lines.lines
-    line = lines[linenum]
-    joined_line = ''
-
-    starting_func = False
-    regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
-    match_result = Match(regexp, line)
-    if match_result:
-        # If the name is all caps and underscores, figure it's a macro and
-        # ignore it, unless it's TEST or TEST_F.
-        function_name = match_result.group(1).split()[-1]
-        if function_name == 'TEST' or function_name == 'TEST_F' or (
-                not Match(r'[A-Z_]+$', function_name)):
-            starting_func = True
-
-    if starting_func:
-        body_found = False
-        for start_linenum in xrange(linenum, clean_lines.NumLines()):
-            start_line = lines[start_linenum]
-            joined_line += ' ' + start_line.lstrip()
-            if Search(r'(;|})',
-                      start_line):  # Declarations and trivial functions
-                body_found = True
-                break  # ... ignore
-            elif Search(r'{', start_line):
-                body_found = True
-                function = Search(r'((\w|:)*)\(', line).group(1)
-                if Match(r'TEST', function):  # Handle TEST... macros
-                    parameter_regexp = Search(r'(\(.*\))', joined_line)
-                    if parameter_regexp:  # Ignore bad syntax
-                        function += parameter_regexp.group(1)
-                else:
-                    function += '()'
-                function_state.Begin(function)
-                break
-        if not body_found:
-            # No body for the function (or evidence of a non-function) was found.
-            error(filename, linenum, 'readability/fn_size', 5,
-                  'Lint failed to find start of function body.')
-    elif Match(r'^\}\s*$', line):  # function end
-        function_state.Check(error, filename, linenum)
-        function_state.End()
-    elif not Match(r'^\s*$', line):
-        function_state.Count()  # Count non-blank/non-comment lines.
-
-
-_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
-
-
-def CheckComment(line, filename, linenum, next_line_start, error):
-    """Checks for common mistakes in comments.
-
-  Args:
-    line: The line in question.
-    filename: The name of the current file.
-    linenum: The number of the line to check.
-    next_line_start: The first non-whitespace column of the next line.
-    error: The function to call with any errors found.
-  """
-    commentpos = line.find('//')
-    if commentpos != -1:
-        # Check if the // may be in quotes.  If so, ignore it
-        # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
-        if (line.count('"', 0, commentpos) - line.count('\\"', 0, commentpos)
-            ) % 2 == 0:  # not in quotes
-            # Allow one space for new scopes, two spaces otherwise:
-            if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos)
-                    and ((commentpos >= 1 and
-                          line[commentpos - 1] not in string.whitespace) or
-                         (commentpos >= 2 and
-                          line[commentpos - 2] not in string.whitespace))):
-                error(filename, linenum, 'whitespace/comments', 2,
-                      'At least two spaces is best between code and comments')
-
-            # Checks for common mistakes in TODO comments.
-            comment = line[commentpos:]
-            match = _RE_PATTERN_TODO.match(comment)
-            if match:
-                # One whitespace is correct; zero whitespace is handled elsewhere.
-                leading_whitespace = match.group(1)
-                if len(leading_whitespace) > 1:
-                    error(filename, linenum, 'whitespace/todo', 2,
-                          'Too many spaces before TODO')
-
-                username = match.group(2)
-                if not username:
-                    error(filename, linenum, 'readability/todo', 2,
-                          'Missing username in TODO; it should look like '
-                          '"// TODO(my_username): Stuff."')
-
-                middle_whitespace = match.group(3)
-                # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
-                if middle_whitespace != ' ' and middle_whitespace != '':
-                    error(filename, linenum, 'whitespace/todo', 2,
-                          'TODO(my_username) should be followed by a space')
-
-            # If the comment contains an alphanumeric character, there
-            # should be a space somewhere between it and the // unless
-            # it's a /// or //! Doxygen comment.
-            if (Match(r'//[^ ]*\w', comment) and
-                    not Match(r'(///|//\!)(\s+|$)', comment)):
-                error(filename, linenum, 'whitespace/comments', 4,
-                      'Should have a space between // and comment')
-
-
-def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
-    """Checks for improper use of DISALLOW* macros.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]  # get rid of comments and strings
-
-    matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
-                     r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
-    if not matched:
-        return
-    if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
-        if nesting_state.stack[-1].access != 'private':
-            error(filename, linenum, 'readability/constructors', 3,
-                  '%s must be in the private: section' % matched.group(1))
-
-    else:
-        # Found DISALLOW* macro outside a class declaration, or perhaps it
-        # was used inside a function when it should have been part of the
-        # class declaration.  We could issue a warning here, but it
-        # probably resulted in a compiler error already.
-        pass
-
-
-def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
-    """Checks for the correctness of various spacing issues in the code.
-
-  Things we check for: spaces around operators, spaces after
-  if/for/while/switch, no spaces around parens in function calls, two
-  spaces between code and comment, don't start a block with a blank
-  line, don't end a function with a blank line, don't add a blank line
-  after public/protected/private, don't have too many blank lines in a row.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: The function to call with any errors found.
-  """
-
-    # Don't use "elided" lines here, otherwise we can't check commented lines.
-    # Don't want to use "raw" either, because we don't want to check inside C++11
-    # raw strings,
-    raw = clean_lines.lines_without_raw_strings
-    line = raw[linenum]
-
-    # Before nixing comments, check if the line is blank for no good
-    # reason.  This includes the first line after a block is opened, and
-    # blank lines at the end of a function (ie, right before a line like '}'
-    #
-    # Skip all the blank line checks if we are immediately inside a
-    # namespace body.  In other words, don't issue blank line warnings
-    # for this block:
-    #   namespace {
-    #
-    #   }
-    #
-    # A warning about missing end of namespace comments will be issued instead.
-    #
-    # Also skip blank line checks for 'extern "C"' blocks, which are formatted
-    # like namespaces.
-    if (IsBlankLine(line) and not nesting_state.InNamespaceBody() and
-            not nesting_state.InExternC()):
-        elided = clean_lines.elided
-        prev_line = elided[linenum - 1]
-        prevbrace = prev_line.rfind('{')
-        # TODO(unknown): Don't complain if line before blank line, and line after,
-        #                both start with alnums and are indented the same amount.
-        #                This ignores whitespace at the start of a namespace block
-        #                because those are not usually indented.
-        if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
-            # OK, we have a blank line at the start of a code block.  Before we
-            # complain, we check if it is an exception to the rule: The previous
-            # non-empty line has the parameters of a function header that are indented
-            # 4 spaces (because they did not fit in a 80 column line when placed on
-            # the same line as the function name).  We also check for the case where
-            # the previous line is indented 6 spaces, which may happen when the
-            # initializers of a constructor do not fit into a 80 column line.
-            exception = False
-            if Match(r' {6}\w', prev_line):  # Initializer list?
-                # We are looking for the opening column of initializer list, which
-                # should be indented 4 spaces to cause 6 space indentation afterwards.
-                search_position = linenum - 2
-                while (search_position >= 0 and
-                       Match(r' {6}\w', elided[search_position])):
-                    search_position -= 1
-                exception = (search_position >= 0 and
-                             elided[search_position][:5] == '    :')
-            else:
-                # Search for the function arguments or an initializer list.  We use a
-                # simple heuristic here: If the line is indented 4 spaces; and we have a
-                # closing paren, without the opening paren, followed by an opening brace
-                # or colon (for initializer lists) we assume that it is the last line of
-                # a function header.  If we have a colon indented 4 spaces, it is an
-                # initializer list.
-                exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
-                                   prev_line) or Match(r' {4}:', prev_line))
-
-            if not exception:
-                error(filename, linenum, 'whitespace/blank_line', 2,
-                      'Redundant blank line at the start of a code block '
-                      'should be deleted.')
-        # Ignore blank lines at the end of a block in a long if-else
-        # chain, like this:
-        #   if (condition1) {
-        #     // Something followed by a blank line
-        #
-        #   } else if (condition2) {
-        #     // Something else
-        #   }
-        if linenum + 1 < clean_lines.NumLines():
-            next_line = raw[linenum + 1]
-            if (next_line and Match(r'\s*}', next_line) and
-                    next_line.find('} else ') == -1):
-                error(filename, linenum, 'whitespace/blank_line', 3,
-                      'Redundant blank line at the end of a code block '
-                      'should be deleted.')
-
-        matched = Match(r'\s*(public|protected|private):', prev_line)
-        if matched:
-            error(filename, linenum, 'whitespace/blank_line', 3,
-                  'Do not leave a blank line after "%s:"' % matched.group(1))
-
-    # Next, check comments
-    next_line_start = 0
-    if linenum + 1 < clean_lines.NumLines():
-        next_line = raw[linenum + 1]
-        next_line_start = len(next_line) - len(next_line.lstrip())
-    CheckComment(line, filename, linenum, next_line_start, error)
-
-    # get rid of comments and strings
-    line = clean_lines.elided[linenum]
-
-    # You shouldn't have spaces before your brackets, except maybe after
-    # 'delete []' or 'return []() {};'
-    if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line):
-        error(filename, linenum, 'whitespace/braces', 5, 'Extra space before [')
-
-    # In range-based for, we wanted spaces before and after the colon, but
-    # not around "::" tokens that might appear.
-    if (Search(r'for *\(.*[^:]:[^: ]', line) or
-            Search(r'for *\(.*[^: ]:[^:]', line)):
-        error(filename, linenum, 'whitespace/forcolon', 2,
-              'Missing space around colon in range-based for loop')
-
-
-def CheckOperatorSpacing(filename, clean_lines, linenum, error):
-    """Checks for horizontal spacing around operators.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Don't try to do spacing checks for operator methods.  Do this by
-    # replacing the troublesome characters with something else,
-    # preserving column position for all other characters.
-    #
-    # The replacement is done repeatedly to avoid false positives from
-    # operators that call operators.
-    while True:
-        match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
-        if match:
-            line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
-        else:
-            break
-
-    # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
-    # Otherwise not.  Note we only check for non-spaces on *both* sides;
-    # sometimes people put non-spaces on one side when aligning ='s among
-    # many lines (not that this is behavior that I approve of...)
-    if ((Search(r'[\w.]=', line) or
-         Search(r'=[\w.]', line)) and not Search(r'\b(if|while|for) ', line)
-            # Operators taken from [lex.operators] in C++11 standard.
-            and
-            not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) and
-            not Search(r'operator=', line)):
-        error(filename, linenum, 'whitespace/operators', 4,
-              'Missing spaces around =')
-
-    # It's ok not to have spaces around binary operators like + - * /, but if
-    # there's too little whitespace, we get concerned.  It's hard to tell,
-    # though, so we punt on this one for now.  TODO.
-
-    # You should always have whitespace around binary operators.
-    #
-    # Check <= and >= first to avoid false positives with < and >, then
-    # check non-include lines for spacing around < and >.
-    #
-    # If the operator is followed by a comma, assume it's be used in a
-    # macro context and don't do any checks.  This avoids false
-    # positives.
-    #
-    # Note that && is not included here.  Those are checked separately
-    # in CheckRValueReference
-    match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
-    if match:
-        error(filename, linenum, 'whitespace/operators', 3,
-              'Missing spaces around %s' % match.group(1))
-    elif not Match(r'#.*include', line):
-        # Look for < that is not surrounded by spaces.  This is only
-        # triggered if both sides are missing spaces, even though
-        # technically should should flag if at least one side is missing a
-        # space.  This is done to avoid some false positives with shifts.
-        match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
-        if match:
-            (_, _, end_pos) = CloseExpression(clean_lines, linenum,
-                                              len(match.group(1)))
-            if end_pos <= -1:
-                error(filename, linenum, 'whitespace/operators', 3,
-                      'Missing spaces around <')
-
-        # Look for > that is not surrounded by spaces.  Similar to the
-        # above, we only trigger if both sides are missing spaces to avoid
-        # false positives with shifts.
-        match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
-        if match:
-            (_, _, start_pos) = ReverseCloseExpression(clean_lines, linenum,
-                                                       len(match.group(1)))
-            if start_pos <= -1:
-                error(filename, linenum, 'whitespace/operators', 3,
-                      'Missing spaces around >')
-
-    # We allow no-spaces around << when used like this: 10<<20, but
-    # not otherwise (particularly, not when used as streams)
-    #
-    # We also allow operators following an opening parenthesis, since
-    # those tend to be macros that deal with operators.
-    match = Search(r'(operator|[^\s(<])(?:L|UL|ULL|l|ul|ull)?<<([^\s,=<])',
-                   line)
-    if (match and
-            not (match.group(1).isdigit() and match.group(2).isdigit()) and
-            not (match.group(1) == 'operator' and match.group(2) == ';')):
-        error(filename, linenum, 'whitespace/operators', 3,
-              'Missing spaces around <<')
-
-    # We allow no-spaces around >> for almost anything.  This is because
-    # C++11 allows ">>" to close nested templates, which accounts for
-    # most cases when ">>" is not followed by a space.
-    #
-    # We still warn on ">>" followed by alpha character, because that is
-    # likely due to ">>" being used for right shifts, e.g.:
-    #   value >> alpha
-    #
-    # When ">>" is used to close templates, the alphanumeric letter that
-    # follows would be part of an identifier, and there should still be
-    # a space separating the template type and the identifier.
-    #   type<type<type>> alpha
-    match = Search(r'>>[a-zA-Z_]', line)
-    if match:
-        error(filename, linenum, 'whitespace/operators', 3,
-              'Missing spaces around >>')
-
-    # There shouldn't be space around unary operators
-    match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
-    if match:
-        error(filename, linenum, 'whitespace/operators', 4,
-              'Extra space for operator %s' % match.group(1))
-
-
-def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
-    """Checks for horizontal spacing around parentheses.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # No spaces after an if, while, switch, or for
-    match = Search(r' (if\(|for\(|while\(|switch\()', line)
-    if match:
-        error(filename, linenum, 'whitespace/parens', 5,
-              'Missing space before ( in %s' % match.group(1))
-
-    # For if/for/while/switch, the left and right parens should be
-    # consistent about how many spaces are inside the parens, and
-    # there should either be zero or one spaces inside the parens.
-    # We don't want: "if ( foo)" or "if ( foo   )".
-    # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
-    match = Search(r'\b(if|for|while|switch)\s*'
-                   r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', line)
-    if match:
-        if len(match.group(2)) != len(match.group(4)):
-            if not (match.group(3) == ';' and
-                    len(match.group(2)) == 1 + len(match.group(4)) or
-                    not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
-                error(filename, linenum, 'whitespace/parens', 5,
-                      'Mismatching spaces inside () in %s' % match.group(1))
-        if len(match.group(2)) not in [0, 1]:
-            error(filename, linenum, 'whitespace/parens', 5,
-                  'Should have zero or one spaces inside ( and ) in %s' %
-                  match.group(1))
-
-
-def CheckCommaSpacing(filename, clean_lines, linenum, error):
-    """Checks for horizontal spacing near commas and semicolons.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    raw = clean_lines.lines_without_raw_strings
-    line = clean_lines.elided[linenum]
-
-    # You should always have a space after a comma (either as fn arg or operator)
-    #
-    # This does not apply when the non-space character following the
-    # comma is another comma, since the only time when that happens is
-    # for empty macro arguments.
-    #
-    # We run this check in two passes: first pass on elided lines to
-    # verify that lines contain missing whitespaces, second pass on raw
-    # lines to confirm that those missing whitespaces are not due to
-    # elided comments.
-    if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and
-            Search(r',[^,\s]', raw[linenum])):
-        error(filename, linenum, 'whitespace/comma', 3, 'Missing space after ,')
-
-    # You should always have a space after a semicolon
-    # except for few corner cases
-    # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
-    # space after ;
-    if Search(r';[^\s};\\)/]', line):
-        error(filename, linenum, 'whitespace/semicolon', 3,
-              'Missing space after ;')
-
-
-def CheckBracesSpacing(filename, clean_lines, linenum, error):
-    """Checks for horizontal spacing near commas.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Except after an opening paren, or after another opening brace (in case of
-    # an initializer list, for instance), you should have spaces before your
-    # braces. And since you should never have braces at the beginning of a line,
-    # this is an easy test.
-    match = Match(r'^(.*[^ ({>]){', line)
-    if match:
-        # Try a bit harder to check for brace initialization.  This
-        # happens in one of the following forms:
-        #   Constructor() : initializer_list_{} { ... }
-        #   Constructor{}.MemberFunction()
-        #   Type variable{};
-        #   FunctionCall(type{}, ...);
-        #   LastArgument(..., type{});
-        #   LOG(INFO) << type{} << " ...";
-        #   map_of_type[{...}] = ...;
-        #   ternary = expr ? new type{} : nullptr;
-        #   OuterTemplate<InnerTemplateConstructor<Type>{}>
-        #
-        # We check for the character following the closing brace, and
-        # silence the warning if it's one of those listed above, i.e.
-        # "{.;,)<>]:".
-        #
-        # To account for nested initializer list, we allow any number of
-        # closing braces up to "{;,)<".  We can't simply silence the
-        # warning on first sight of closing brace, because that would
-        # cause false negatives for things that are not initializer lists.
-        #   Silence this:         But not this:
-        #     Outer{                if (...) {
-        #       Inner{...}            if (...){  // Missing space before {
-        #     };                    }
-        #
-        # There is a false negative with this approach if people inserted
-        # spurious semicolons, e.g. "if (cond){};", but we will catch the
-        # spurious semicolon with a separate check.
-        (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum,
-                                                        len(match.group(1)))
-        trailing_text = ''
-        if endpos > -1:
-            trailing_text = endline[endpos:]
-        for offset in xrange(endlinenum + 1,
-                             min(endlinenum + 3, clean_lines.NumLines() - 1)):
-            trailing_text += clean_lines.elided[offset]
-        if not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text):
-            error(filename, linenum, 'whitespace/braces', 5,
-                  'Missing space before {')
-
-    # Make sure '} else {' has spaces.
-    if Search(r'}else', line):
-        error(filename, linenum, 'whitespace/braces', 5,
-              'Missing space before else')
-
-    # You shouldn't have a space before a semicolon at the end of the line.
-    # There's a special case for "for" since the style guide allows space before
-    # the semicolon there.
-    if Search(r':\s*;\s*$', line):
-        error(filename, linenum, 'whitespace/semicolon', 5,
-              'Semicolon defining empty statement. Use {} instead.')
-    elif Search(r'^\s*;\s*$', line):
-        error(
-            filename, linenum, 'whitespace/semicolon', 5,
-            'Line contains only semicolon. If this should be an empty statement, '
-            'use {} instead.')
-    elif (Search(r'\s+;\s*$', line) and not Search(r'\bfor\b', line)):
-        error(filename, linenum, 'whitespace/semicolon', 5,
-              'Extra space before last semicolon. If this should be an empty '
-              'statement, use {} instead.')
-
-
-def IsDecltype(clean_lines, linenum, column):
-    """Check if the token ending on (linenum, column) is decltype().
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: the number of the line to check.
-    column: end column of the token to check.
-  Returns:
-    True if this token is decltype() expression, False otherwise.
-  """
-    (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
-    if start_col < 0:
-        return False
-    if Search(r'\bdecltype\s*$', text[0:start_col]):
-        return True
-    return False
-
-
-def IsTemplateParameterList(clean_lines, linenum, column):
-    """Check if the token ending on (linenum, column) is the end of template<>.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: the number of the line to check.
-    column: end column of the token to check.
-  Returns:
-    True if this token is end of a template parameter list, False otherwise.
-  """
-    (_, startline, startpos) = ReverseCloseExpression(clean_lines, linenum,
-                                                      column)
-    if (startpos > -1 and Search(r'\btemplate\s*$',
-                                 clean_lines.elided[startline][0:startpos])):
-        return True
-    return False
-
-
-def IsRValueType(typenames, clean_lines, nesting_state, linenum, column):
-    """Check if the token ending on (linenum, column) is a type.
-
-  Assumes that text to the right of the column is "&&" or a function
-  name.
-
-  Args:
-    typenames: set of type names from template-argument-list.
-    clean_lines: A CleansedLines instance containing the file.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    linenum: the number of the line to check.
-    column: end column of the token to check.
-  Returns:
-    True if this token is a type, False if we are not sure.
-  """
-    prefix = clean_lines.elided[linenum][0:column]
-
-    # Get one word to the left.  If we failed to do so, this is most
-    # likely not a type, since it's unlikely that the type name and "&&"
-    # would be split across multiple lines.
-    match = Match(r'^(.*)(\b\w+|[>*)&])\s*$', prefix)
-    if not match:
-        return False
-
-    # Check text following the token.  If it's "&&>" or "&&," or "&&...", it's
-    # most likely a rvalue reference used inside a template.
-    suffix = clean_lines.elided[linenum][column:]
-    if Match(r'&&\s*(?:[>,]|\.\.\.)', suffix):
-        return True
-
-    # Check for known types and end of templates:
-    #   int&& variable
-    #   vector<int>&& variable
-    #
-    # Because this function is called recursively, we also need to
-    # recognize pointer and reference types:
-    #   int* Function()
-    #   int& Function()
-    if (match.group(2) in typenames or match.group(2) in [
-            'char', 'char16_t', 'char32_t', 'wchar_t', 'bool', 'short', 'int',
-            'long', 'signed', 'unsigned', 'float', 'double', 'void', 'auto',
-            '>', '*', '&'
-    ]):
-        return True
-
-    # If we see a close parenthesis, look for decltype on the other side.
-    # decltype would unambiguously identify a type, anything else is
-    # probably a parenthesized expression and not a type.
-    if match.group(2) == ')':
-        return IsDecltype(clean_lines, linenum,
-                          len(match.group(1)) + len(match.group(2)) - 1)
-
-    # Check for casts and cv-qualifiers.
-    #   match.group(1)  remainder
-    #   --------------  ---------
-    #   const_cast<     type&&
-    #   const           type&&
-    #   type            const&&
-    if Search(r'\b(?:const_cast\s*<|static_cast\s*<|dynamic_cast\s*<|'
-              r'reinterpret_cast\s*<|\w+\s)\s*$', match.group(1)):
-        return True
-
-    # Look for a preceding symbol that might help differentiate the context.
-    # These are the cases that would be ambiguous:
-    #   match.group(1)  remainder
-    #   --------------  ---------
-    #   Call         (   expression &&
-    #   Declaration  (   type&&
-    #   sizeof       (   type&&
-    #   if           (   expression &&
-    #   while        (   expression &&
-    #   for          (   type&&
-    #   for(         ;   expression &&
-    #   statement    ;   type&&
-    #   block        {   type&&
-    #   constructor  {   expression &&
-    start = linenum
-    line = match.group(1)
-    match_symbol = None
-    while start >= 0:
-        # We want to skip over identifiers and commas to get to a symbol.
-        # Commas are skipped so that we can find the opening parenthesis
-        # for function parameter lists.
-        match_symbol = Match(r'^(.*)([^\w\s,])[\w\s,]*$', line)
-        if match_symbol:
-            break
-        start -= 1
-        line = clean_lines.elided[start]
-
-    if not match_symbol:
-        # Probably the first statement in the file is an rvalue reference
-        return True
-
-    if match_symbol.group(2) == '}':
-        # Found closing brace, probably an indicate of this:
-        #   block{} type&&
-        return True
-
-    if match_symbol.group(2) == ';':
-        # Found semicolon, probably one of these:
-        #   for(; expression &&
-        #   statement; type&&
-
-        # Look for the previous 'for(' in the previous lines.
-        before_text = match_symbol.group(1)
-        for i in xrange(start - 1, max(start - 6, 0), -1):
-            before_text = clean_lines.elided[i] + before_text
-        if Search(r'for\s*\([^{};]*$', before_text):
-            # This is the condition inside a for-loop
-            return False
-
-        # Did not find a for-init-statement before this semicolon, so this
-        # is probably a new statement and not a condition.
-        return True
-
-    if match_symbol.group(2) == '{':
-        # Found opening brace, probably one of these:
-        #   block{ type&& = ... ; }
-        #   constructor{ expression && expression }
-
-        # Look for a closing brace or a semicolon.  If we see a semicolon
-        # first, this is probably a rvalue reference.
-        line = clean_lines.elided[start][0:len(match_symbol.group(1)) + 1]
-        end = start
-        depth = 1
-        while True:
-            for ch in line:
-                if ch == ';':
-                    return True
-                elif ch == '{':
-                    depth += 1
-                elif ch == '}':
-                    depth -= 1
-                    if depth == 0:
-                        return False
-            end += 1
-            if end >= clean_lines.NumLines():
-                break
-            line = clean_lines.elided[end]
-        # Incomplete program?
-        return False
-
-    if match_symbol.group(2) == '(':
-        # Opening parenthesis.  Need to check what's to the left of the
-        # parenthesis.  Look back one extra line for additional context.
-        before_text = match_symbol.group(1)
-        if linenum > 1:
-            before_text = clean_lines.elided[linenum - 1] + before_text
-        before_text = match_symbol.group(1)
-
-        # Patterns that are likely to be types:
-        #   [](type&&
-        #   for (type&&
-        #   sizeof(type&&
-        #   operator=(type&&
-        #
-        if Search(r'(?:\]|\bfor|\bsizeof|\boperator\s*\S+\s*)\s*$',
-                  before_text):
-            return True
-
-        # Patterns that are likely to be expressions:
-        #   if (expression &&
-        #   while (expression &&
-        #   : initializer(expression &&
-        #   , initializer(expression &&
-        #   ( FunctionCall(expression &&
-        #   + FunctionCall(expression &&
-        #   + (expression &&
-        #
-        # The last '+' represents operators such as '+' and '-'.
-        if Search(r'(?:\bif|\bwhile|[-+=%^(<!?:,&*]\s*)$', before_text):
-            return False
-
-        # Something else.  Check that tokens to the left look like
-        #   return_type function_name
-        match_func = Match(r'^(.*\S.*)\s+\w(?:\w|::)*(?:<[^<>]*>)?\s*$',
-                           match_symbol.group(1))
-        if match_func:
-            # Check for constructors, which don't have return types.
-            if Search(r'\b(?:explicit|inline)$', match_func.group(1)):
-                return True
-            implicit_constructor = Match(r'\s*(\w+)\((?:const\s+)?(\w+)',
-                                         prefix)
-            if (implicit_constructor and implicit_constructor.group(1) ==
-                    implicit_constructor.group(2)):
-                return True
-            return IsRValueType(typenames, clean_lines, nesting_state, linenum,
-                                len(match_func.group(1)))
-
-        # Nothing before the function name.  If this is inside a block scope,
-        # this is probably a function call.
-        return not (nesting_state.previous_stack_top and
-                    nesting_state.previous_stack_top.IsBlockInfo())
-
-    if match_symbol.group(2) == '>':
-        # Possibly a closing bracket, check that what's on the other side
-        # looks like the start of a template.
-        return IsTemplateParameterList(clean_lines, start,
-                                       len(match_symbol.group(1)))
-
-    # Some other symbol, usually something like "a=b&&c".  This is most
-    # likely not a type.
-    return False
-
-
-def IsDeletedOrDefault(clean_lines, linenum):
-    """Check if current constructor or operator is deleted or default.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-  Returns:
-    True if this is a deleted or default constructor.
-  """
-    open_paren = clean_lines.elided[linenum].find('(')
-    if open_paren < 0:
-        return False
-    (close_line, _, close_paren) = CloseExpression(clean_lines, linenum,
-                                                   open_paren)
-    if close_paren < 0:
-        return False
-    return Match(r'\s*=\s*(?:delete|default)\b', close_line[close_paren:])
-
-
-def IsRValueAllowed(clean_lines, linenum, typenames):
-    """Check if RValue reference is allowed on a particular line.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    typenames: set of type names from template-argument-list.
-  Returns:
-    True if line is within the region where RValue references are allowed.
-  """
-    # Allow region marked by PUSH/POP macros
-    for i in xrange(linenum, 0, -1):
-        line = clean_lines.elided[i]
-        if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
-            if not line.endswith('PUSH'):
-                return False
-            for j in xrange(linenum, clean_lines.NumLines(), 1):
-                line = clean_lines.elided[j]
-                if Match(r'GOOGLE_ALLOW_RVALUE_REFERENCES_(?:PUSH|POP)', line):
-                    return line.endswith('POP')
-
-    # Allow operator=
-    line = clean_lines.elided[linenum]
-    if Search(r'\boperator\s*=\s*\(', line):
-        return IsDeletedOrDefault(clean_lines, linenum)
-
-    # Allow constructors
-    match = Match(r'\s*(?:[\w<>]+::)*([\w<>]+)\s*::\s*([\w<>]+)\s*\(', line)
-    if match and match.group(1) == match.group(2):
-        return IsDeletedOrDefault(clean_lines, linenum)
-    if Search(r'\b(?:explicit|inline)\s+[\w<>]+\s*\(', line):
-        return IsDeletedOrDefault(clean_lines, linenum)
-
-    if Match(r'\s*[\w<>]+\s*\(', line):
-        previous_line = 'ReturnType'
-        if linenum > 0:
-            previous_line = clean_lines.elided[linenum - 1]
-        if Match(r'^\s*$', previous_line) or Search(r'[{}:;]\s*$',
-                                                    previous_line):
-            return IsDeletedOrDefault(clean_lines, linenum)
-
-    # Reject types not mentioned in template-argument-list
-    while line:
-        match = Match(r'^.*?(\w+)\s*&&(.*)$', line)
-        if not match:
-            break
-        if match.group(1) not in typenames:
-            return False
-        line = match.group(2)
-
-    # All RValue types that were in template-argument-list should have
-    # been removed by now.  Those were allowed, assuming that they will
-    # be forwarded.
-    #
-    # If there are no remaining RValue types left (i.e. types that were
-    # not found in template-argument-list), flag those as not allowed.
-    return line.find('&&') < 0
-
-
-def GetTemplateArgs(clean_lines, linenum):
-    """Find list of template arguments associated with this function declaration.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: Line number containing the start of the function declaration,
-             usually one line after the end of the template-argument-list.
-  Returns:
-    Set of type names, or empty set if this does not appear to have
-    any template parameters.
-  """
-    # Find start of function
-    func_line = linenum
-    while func_line > 0:
-        line = clean_lines.elided[func_line]
-        if Match(r'^\s*$', line):
-            return set()
-        if line.find('(') >= 0:
-            break
-        func_line -= 1
-    if func_line == 0:
-        return set()
-
-    # Collapse template-argument-list into a single string
-    argument_list = ''
-    match = Match(r'^(\s*template\s*)<', clean_lines.elided[func_line])
-    if match:
-        # template-argument-list on the same line as function name
-        start_col = len(match.group(1))
-        _, end_line, end_col = CloseExpression(clean_lines, func_line,
-                                               start_col)
-        if end_col > -1 and end_line == func_line:
-            start_col += 1  # Skip the opening bracket
-            argument_list = clean_lines.elided[func_line][start_col:end_col]
-
-    elif func_line > 1:
-        # template-argument-list one line before function name
-        match = Match(r'^(.*)>\s*$', clean_lines.elided[func_line - 1])
-        if match:
-            end_col = len(match.group(1))
-            _, start_line, start_col = ReverseCloseExpression(
-                clean_lines, func_line - 1, end_col)
-            if start_col > -1:
-                start_col += 1  # Skip the opening bracket
-                while start_line < func_line - 1:
-                    argument_list += clean_lines.elided[start_line][start_col:]
-                    start_col = 0
-                    start_line += 1
-                argument_list += clean_lines.elided[func_line - 1][start_col:
-                                                                   end_col]
-
-    if not argument_list:
-        return set()
-
-    # Extract type names
-    typenames = set()
-    while True:
-        match = Match(r'^[,\s]*(?:typename|class)(?:\.\.\.)?\s+(\w+)(.*)$',
-                      argument_list)
-        if not match:
-            break
-        typenames.add(match.group(1))
-        argument_list = match.group(2)
-    return typenames
-
-
-def CheckRValueReference(filename, clean_lines, linenum, nesting_state, error):
-    """Check for rvalue references.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: The function to call with any errors found.
-  """
-    # Find lines missing spaces around &&.
-    # TODO(unknown): currently we don't check for rvalue references
-    # with spaces surrounding the && to avoid false positives with
-    # boolean expressions.
-    line = clean_lines.elided[linenum]
-    match = Match(r'^(.*\S)&&', line)
-    if not match:
-        match = Match(r'(.*)&&\S', line)
-    if (not match) or '(&&)' in line or Search(r'\boperator\s*$',
-                                               match.group(1)):
-        return
-
-    # Either poorly formed && or an rvalue reference, check the context
-    # to get a more accurate error message.  Mostly we want to determine
-    # if what's to the left of "&&" is a type or not.
-    typenames = GetTemplateArgs(clean_lines, linenum)
-    and_pos = len(match.group(1))
-    if IsRValueType(typenames, clean_lines, nesting_state, linenum, and_pos):
-        if not IsRValueAllowed(clean_lines, linenum, typenames):
-            error(filename, linenum, 'build/c++11', 3,
-                  'RValue references are an unapproved C++ feature.')
-    else:
-        error(filename, linenum, 'whitespace/operators', 3,
-              'Missing spaces around &&')
-
-
-def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
-    """Checks for additional blank line issues related to sections.
-
-  Currently the only thing checked here is blank line before protected/private.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    class_info: A _ClassInfo objects.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    # Skip checks if the class is small, where small means 25 lines or less.
-    # 25 lines seems like a good cutoff since that's the usual height of
-    # terminals, and any class that can't fit in one screen can't really
-    # be considered "small".
-    #
-    # Also skip checks if we are on the first line.  This accounts for
-    # classes that look like
-    #   class Foo { public: ... };
-    #
-    # If we didn't find the end of the class, last_line would be zero,
-    # and the check will be skipped by the first condition.
-    if (class_info.last_line - class_info.starting_linenum <= 24 or
-            linenum <= class_info.starting_linenum):
-        return
-
-    matched = Match(r'\s*(public|protected|private):',
-                    clean_lines.lines[linenum])
-    if matched:
-        # Issue warning if the line before public/protected/private was
-        # not a blank line, but don't do this if the previous line contains
-        # "class" or "struct".  This can happen two ways:
-        #  - We are at the beginning of the class.
-        #  - We are forward-declaring an inner class that is semantically
-        #    private, but needed to be public for implementation reasons.
-        # Also ignores cases where the previous line ends with a backslash as can be
-        # common when defining classes in C macros.
-        prev_line = clean_lines.lines[linenum - 1]
-        if (not IsBlankLine(prev_line) and
-                not Search(r'\b(class|struct)\b', prev_line) and
-                not Search(r'\\$', prev_line)):
-            # Try a bit harder to find the beginning of the class.  This is to
-            # account for multi-line base-specifier lists, e.g.:
-            #   class Derived
-            #       : public Base {
-            end_class_head = class_info.starting_linenum
-            for i in range(class_info.starting_linenum, linenum):
-                if Search(r'\{\s*$', clean_lines.lines[i]):
-                    end_class_head = i
-                    break
-            if end_class_head < linenum - 1:
-                error(filename, linenum, 'whitespace/blank_line', 3,
-                      '"%s:" should be preceded by a blank line' %
-                      matched.group(1))
-
-
-def GetPreviousNonBlankLine(clean_lines, linenum):
-    """Return the most recent non-blank line and its line number.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file contents.
-    linenum: The number of the line to check.
-
-  Returns:
-    A tuple with two elements.  The first element is the contents of the last
-    non-blank line before the current line, or the empty string if this is the
-    first non-blank line.  The second is the line number of that line, or -1
-    if this is the first non-blank line.
-  """
-
-    prevlinenum = linenum - 1
-    while prevlinenum >= 0:
-        prevline = clean_lines.elided[prevlinenum]
-        if not IsBlankLine(prevline):  # if not a blank line...
-            return (prevline, prevlinenum)
-        prevlinenum -= 1
-    return ('', -1)
-
-
-def CheckBraces(filename, clean_lines, linenum, error):
-    """Looks for misplaced braces (e.g. at the end of line).
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-
-    line = clean_lines.elided[linenum]  # get rid of comments and strings
-
-    if Match(r'\s*{\s*$', line):
-        # We allow an open brace to start a line in the case where someone is using
-        # braces in a block to explicitly create a new scope, which is commonly used
-        # to control the lifetime of stack-allocated variables.  Braces are also
-        # used for brace initializers inside function calls.  We don't detect this
-        # perfectly: we just don't complain if the last non-whitespace character on
-        # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
-        # previous line starts a preprocessor block.
-        prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
-        if (not Search(r'[,;:}{(]\s*$', prevline) and
-                not Match(r'\s*#', prevline)):
-            error(filename, linenum, 'whitespace/braces', 4,
-                  '{ should almost always be at the end of the previous line')
-
-    # An else clause should be on the same line as the preceding closing brace.
-    if Match(r'\s*else\b\s*(?:if\b|\{|$)', line):
-        prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
-        if Match(r'\s*}\s*$', prevline):
-            error(filename, linenum, 'whitespace/newline', 4,
-                  'An else should appear on the same line as the preceding }')
-
-    # If braces come on one side of an else, they should be on both.
-    # However, we have to worry about "else if" that spans multiple lines!
-    if Search(r'else if\s*\(', line):  # could be multi-line if
-        brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
-        # find the ( after the if
-        pos = line.find('else if')
-        pos = line.find('(', pos)
-        if pos > 0:
-            (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
-            brace_on_right = endline[endpos:].find('{') != -1
-            if brace_on_left != brace_on_right:  # must be brace after if
-                error(
-                    filename, linenum, 'readability/braces', 5,
-                    'If an else has a brace on one side, it should have it on both'
-                )
-    elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
-        error(filename, linenum, 'readability/braces', 5,
-              'If an else has a brace on one side, it should have it on both')
-
-    # Likewise, an else should never have the else clause on the same line
-    if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
-        error(filename, linenum, 'whitespace/newline', 4,
-              'Else clause should never be on same line as else (use 2 lines)')
-
-    # In the same way, a do/while should never be on one line
-    if Match(r'\s*do [^\s{]', line):
-        error(filename, linenum, 'whitespace/newline', 4,
-              'do/while clauses should not be on a single line')
-
-    # Check single-line if/else bodies. The style guide says 'curly braces are not
-    # required for single-line statements'. We additionally allow multi-line,
-    # single statements, but we reject anything with more than one semicolon in
-    # it. This means that the first semicolon after the if should be at the end of
-    # its line, and the line after that should have an indent level equal to or
-    # lower than the if. We also check for ambiguous if/else nesting without
-    # braces.
-    if_else_match = Search(r'\b(if\s*\(|else\b)', line)
-    if if_else_match and not Match(r'\s*#', line):
-        if_indent = GetIndentLevel(line)
-        endline, endlinenum, endpos = line, linenum, if_else_match.end()
-        if_match = Search(r'\bif\s*\(', line)
-        if if_match:
-            # This could be a multiline if condition, so find the end first.
-            pos = if_match.end() - 1
-            (endline, endlinenum, endpos) = CloseExpression(clean_lines,
-                                                            linenum, pos)
-        # Check for an opening brace, either directly after the if or on the next
-        # line. If found, this isn't a single-statement conditional.
-        if (not Match(r'\s*{', endline[endpos:]) and
-                not (Match(r'\s*$', endline[endpos:]) and endlinenum <
-                     (len(clean_lines.elided) - 1) and
-                     Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
-            while (endlinenum < len(clean_lines.elided) and
-                   ';' not in clean_lines.elided[endlinenum][endpos:]):
-                endlinenum += 1
-                endpos = 0
-            if endlinenum < len(clean_lines.elided):
-                endline = clean_lines.elided[endlinenum]
-                # We allow a mix of whitespace and closing braces (e.g. for one-liner
-                # methods) and a single \ after the semicolon (for macros)
-                endpos = endline.find(';')
-                if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
-                    # Semicolon isn't the last character, there's something trailing.
-                    # Output a warning if the semicolon is not contained inside
-                    # a lambda expression.
-                    if not Match(
-                            r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$',
-                            endline):
-                        error(
-                            filename, linenum, 'readability/braces', 4,
-                            'If/else bodies with multiple statements require braces'
-                        )
-                elif endlinenum < len(clean_lines.elided) - 1:
-                    # Make sure the next line is dedented
-                    next_line = clean_lines.elided[endlinenum + 1]
-                    next_indent = GetIndentLevel(next_line)
-                    # With ambiguous nested if statements, this will error out on the
-                    # if that *doesn't* match the else, regardless of whether it's the
-                    # inner one or outer one.
-                    if (if_match and Match(r'\s*else\b', next_line) and
-                            next_indent != if_indent):
-                        error(
-                            filename, linenum, 'readability/braces', 4,
-                            'Else clause should be indented at the same level as if. '
-                            'Ambiguous nested if/else chains require braces.')
-                    elif next_indent > if_indent:
-                        error(
-                            filename, linenum, 'readability/braces', 4,
-                            'If/else bodies with multiple statements require braces'
-                        )
-
-
-def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
-    """Looks for redundant trailing semicolon.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-
-    line = clean_lines.elided[linenum]
-
-    # Block bodies should not be followed by a semicolon.  Due to C++11
-    # brace initialization, there are more places where semicolons are
-    # required than not, so we use a whitelist approach to check these
-    # rather than a blacklist.  These are the places where "};" should
-    # be replaced by just "}":
-    # 1. Some flavor of block following closing parenthesis:
-    #    for (;;) {};
-    #    while (...) {};
-    #    switch (...) {};
-    #    Function(...) {};
-    #    if (...) {};
-    #    if (...) else if (...) {};
-    #
-    # 2. else block:
-    #    if (...) else {};
-    #
-    # 3. const member function:
-    #    Function(...) const {};
-    #
-    # 4. Block following some statement:
-    #    x = 42;
-    #    {};
-    #
-    # 5. Block at the beginning of a function:
-    #    Function(...) {
-    #      {};
-    #    }
-    #
-    #    Note that naively checking for the preceding "{" will also match
-    #    braces inside multi-dimensional arrays, but this is fine since
-    #    that expression will not contain semicolons.
-    #
-    # 6. Block following another block:
-    #    while (true) {}
-    #    {};
-    #
-    # 7. End of namespaces:
-    #    namespace {};
-    #
-    #    These semicolons seems far more common than other kinds of
-    #    redundant semicolons, possibly due to people converting classes
-    #    to namespaces.  For now we do not warn for this case.
-    #
-    # Try matching case 1 first.
-    match = Match(r'^(.*\)\s*)\{', line)
-    if match:
-        # Matched closing parenthesis (case 1).  Check the token before the
-        # matching opening parenthesis, and don't warn if it looks like a
-        # macro.  This avoids these false positives:
-        #  - macro that defines a base class
-        #  - multi-line macro that defines a base class
-        #  - macro that defines the whole class-head
-        #
-        # But we still issue warnings for macros that we know are safe to
-        # warn, specifically:
-        #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
-        #  - TYPED_TEST
-        #  - INTERFACE_DEF
-        #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
-        #
-        # We implement a whitelist of safe macros instead of a blacklist of
-        # unsafe macros, even though the latter appears less frequently in
-        # google code and would have been easier to implement.  This is because
-        # the downside for getting the whitelist wrong means some extra
-        # semicolons, while the downside for getting the blacklist wrong
-        # would result in compile errors.
-        #
-        # In addition to macros, we also don't want to warn on
-        #  - Compound literals
-        #  - Lambdas
-        #  - alignas specifier with anonymous structs:
-        closing_brace_pos = match.group(1).rfind(')')
-        opening_parenthesis = ReverseCloseExpression(clean_lines, linenum,
-                                                     closing_brace_pos)
-        if opening_parenthesis[2] > -1:
-            line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
-            macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
-            func = Match(r'^(.*\])\s*$', line_prefix)
-            if ((macro and macro.group(1) not in
-                 ('TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
-                  'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
-                  'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
-                (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
-                    Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or
-                    Search(r'\s+=\s*$', line_prefix)):
-                match = None
-        if (match and opening_parenthesis[1] > 1 and Search(
-                r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
-            # Multi-line lambda-expression
-            match = None
-
-    else:
-        # Try matching cases 2-3.
-        match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
-        if not match:
-            # Try matching cases 4-6.  These are always matched on separate lines.
-            #
-            # Note that we can't simply concatenate the previous line to the
-            # current line and do a single match, otherwise we may output
-            # duplicate warnings for the blank line case:
-            #   if (cond) {
-            #     // blank line
-            #   }
-            prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
-            if prevline and Search(r'[;{}]\s*$', prevline):
-                match = Match(r'^(\s*)\{', line)
-
-    # Check matching closing brace
-    if match:
-        (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum,
-                                                        len(match.group(1)))
-        if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
-            # Current {} pair is eligible for semicolon check, and we have found
-            # the redundant semicolon, output warning here.
-            #
-            # Note: because we are scanning forward for opening braces, and
-            # outputting warnings for the matching closing brace, if there are
-            # nested blocks with trailing semicolons, we will get the error
-            # messages in reversed order.
-            error(filename, endlinenum, 'readability/braces', 4,
-                  "You don't need a ; after a }")
-
-
-def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
-    """Look for empty loop/conditional body with only a single semicolon.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-
-    # Search for loop keywords at the beginning of the line.  Because only
-    # whitespaces are allowed before the keywords, this will also ignore most
-    # do-while-loops, since those lines should start with closing brace.
-    #
-    # We also check "if" blocks here, since an empty conditional block
-    # is likely an error.
-    line = clean_lines.elided[linenum]
-    matched = Match(r'\s*(for|while|if)\s*\(', line)
-    if matched:
-        # Find the end of the conditional expression
-        (end_line, end_linenum, end_pos) = CloseExpression(clean_lines, linenum,
-                                                           line.find('('))
-
-        # Output warning if what follows the condition expression is a semicolon.
-        # No warning for all other cases, including whitespace or newline, since we
-        # have a separate check for semicolons preceded by whitespace.
-        if end_pos >= 0 and Match(r';', end_line[end_pos:]):
-            if matched.group(1) == 'if':
-                error(filename, end_linenum,
-                      'whitespace/empty_conditional_body', 5,
-                      'Empty conditional bodies should use {}')
-            else:
-                error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
-                      'Empty loop bodies should use {} or continue')
-
-
-def FindCheckMacro(line):
-    """Find a replaceable CHECK-like macro.
-
-  Args:
-    line: line to search on.
-  Returns:
-    (macro name, start position), or (None, -1) if no replaceable
-    macro is found.
-  """
-    for macro in _CHECK_MACROS:
-        i = line.find(macro)
-        if i >= 0:
-            # Find opening parenthesis.  Do a regular expression match here
-            # to make sure that we are matching the expected CHECK macro, as
-            # opposed to some other macro that happens to contain the CHECK
-            # substring.
-            matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
-            if not matched:
-                continue
-            return (macro, len(matched.group(1)))
-    return (None, -1)
-
-
-def CheckCheck(filename, clean_lines, linenum, error):
-    """Checks the use of CHECK and EXPECT macros.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-
-    # Decide the set of replacement macros that should be suggested
-    lines = clean_lines.elided
-    (check_macro, start_pos) = FindCheckMacro(lines[linenum])
-    if not check_macro:
-        return
-
-    # Find end of the boolean expression by matching parentheses
-    (last_line, end_line, end_pos) = CloseExpression(clean_lines, linenum,
-                                                     start_pos)
-    if end_pos < 0:
-        return
-
-    # If the check macro is followed by something other than a
-    # semicolon, assume users will log their own custom error messages
-    # and don't suggest any replacements.
-    if not Match(r'\s*;', last_line[end_pos:]):
-        return
-
-    if linenum == end_line:
-        expression = lines[linenum][start_pos + 1:end_pos - 1]
-    else:
-        expression = lines[linenum][start_pos + 1:]
-        for i in xrange(linenum + 1, end_line):
-            expression += lines[i]
-        expression += last_line[0:end_pos - 1]
-
-    # Parse expression so that we can take parentheses into account.
-    # This avoids false positives for inputs like "CHECK((a < 4) == b)",
-    # which is not replaceable by CHECK_LE.
-    lhs = ''
-    rhs = ''
-    operator = None
-    while expression:
-        matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
-                        r'==|!=|>=|>|<=|<|\()(.*)$', expression)
-        if matched:
-            token = matched.group(1)
-            if token == '(':
-                # Parenthesized operand
-                expression = matched.group(2)
-                (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
-                if end < 0:
-                    return  # Unmatched parenthesis
-                lhs += '(' + expression[0:end]
-                expression = expression[end:]
-            elif token in ('&&', '||'):
-                # Logical and/or operators.  This means the expression
-                # contains more than one term, for example:
-                #   CHECK(42 < a && a < b);
-                #
-                # These are not replaceable with CHECK_LE, so bail out early.
-                return
-            elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
-                # Non-relational operator
-                lhs += token
-                expression = matched.group(2)
-            else:
-                # Relational operator
-                operator = token
-                rhs = matched.group(2)
-                break
-        else:
-            # Unparenthesized operand.  Instead of appending to lhs one character
-            # at a time, we do another regular expression match to consume several
-            # characters at once if possible.  Trivial benchmark shows that this
-            # is more efficient when the operands are longer than a single
-            # character, which is generally the case.
-            matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
-            if not matched:
-                matched = Match(r'^(\s*\S)(.*)$', expression)
-                if not matched:
-                    break
-            lhs += matched.group(1)
-            expression = matched.group(2)
-
-    # Only apply checks if we got all parts of the boolean expression
-    if not (lhs and operator and rhs):
-        return
-
-    # Check that rhs do not contain logical operators.  We already know
-    # that lhs is fine since the loop above parses out && and ||.
-    if rhs.find('&&') > -1 or rhs.find('||') > -1:
-        return
-
-    # At least one of the operands must be a constant literal.  This is
-    # to avoid suggesting replacements for unprintable things like
-    # CHECK(variable != iterator)
-    #
-    # The following pattern matches decimal, hex integers, strings, and
-    # characters (in that order).
-    lhs = lhs.strip()
-    rhs = rhs.strip()
-    match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
-    if Match(match_constant, lhs) or Match(match_constant, rhs):
-        # Note: since we know both lhs and rhs, we can provide a more
-        # descriptive error message like:
-        #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
-        # Instead of:
-        #   Consider using CHECK_EQ instead of CHECK(a == b)
-        #
-        # We are still keeping the less descriptive message because if lhs
-        # or rhs gets long, the error message might become unreadable.
-        error(filename, linenum, 'readability/check', 2,
-              'Consider using %s instead of %s(a %s b)' %
-              (_CHECK_REPLACEMENT[check_macro][operator], check_macro,
-               operator))
-
-
-def CheckAltTokens(filename, clean_lines, linenum, error):
-    """Check alternative keywords being used in boolean expressions.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Avoid preprocessor lines
-    if Match(r'^\s*#', line):
-        return
-
-    # Last ditch effort to avoid multi-line comments.  This will not help
-    # if the comment started before the current line or ended after the
-    # current line, but it catches most of the false positives.  At least,
-    # it provides a way to workaround this warning for people who use
-    # multi-line comments in preprocessor macros.
-    #
-    # TODO(unknown): remove this once cpplint has better support for
-    # multi-line comments.
-    if line.find('/*') >= 0 or line.find('*/') >= 0:
-        return
-
-    for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
-        error(filename, linenum, 'readability/alt_tokens', 2,
-              'Use operator %s instead of %s' % (
-                  _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
-
-
-def GetLineWidth(line):
-    """Determines the width of the line in column positions.
-
-  Args:
-    line: A string, which may be a Unicode string.
-
-  Returns:
-    The width of the line in column positions, accounting for Unicode
-    combining characters and wide characters.
-  """
-    if isinstance(line, unicode):
-        width = 0
-        for uc in unicodedata.normalize('NFC', line):
-            if unicodedata.east_asian_width(uc) in ('W', 'F'):
-                width += 2
-            elif not unicodedata.combining(uc):
-                width += 1
-        return width
-    else:
-        return len(line)
-
-
-def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
-               error):
-    """Checks rules from the 'C++ style rules' section of cppguide.html.
-
-  Most of these rules are hard to test (naming, comment style), but we
-  do what we can.  In particular we check for 2-space indents, line lengths,
-  tab usage, spaces inside code, etc.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    file_extension: The extension (without the dot) of the filename.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: The function to call with any errors found.
-  """
-
-    # Don't use "elided" lines here, otherwise we can't check commented lines.
-    # Don't want to use "raw" either, because we don't want to check inside C++11
-    # raw strings,
-    raw_lines = clean_lines.lines_without_raw_strings
-    line = raw_lines[linenum]
-
-    if line.find('\t') != -1:
-        error(filename, linenum, 'whitespace/tab', 1,
-              'Tab found; better to use spaces')
-
-    # One or three blank spaces at the beginning of the line is weird; it's
-    # hard to reconcile that with 2-space indents.
-    # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
-    # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
-    # if(RLENGTH > 20) complain = 0;
-    # if(match($0, " +(error|private|public|protected):")) complain = 0;
-    # if(match(prev, "&& *$")) complain = 0;
-    # if(match(prev, "\\|\\| *$")) complain = 0;
-    # if(match(prev, "[\",=><] *$")) complain = 0;
-    # if(match($0, " <<")) complain = 0;
-    # if(match(prev, " +for \\(")) complain = 0;
-    # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
-    scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
-    classinfo = nesting_state.InnermostClass()
-    initial_spaces = 0
-    cleansed_line = clean_lines.elided[linenum]
-    while initial_spaces < len(line) and line[initial_spaces] == ' ':
-        initial_spaces += 1
-    if line and line[-1].isspace():
-        error(filename, linenum, 'whitespace/end_of_line', 4,
-              'Line ends in whitespace.  Consider deleting these extra spaces.')
-    # There are certain situations we allow one space, notably for
-    # section labels, and also lines containing multi-line raw strings.
-    elif ((initial_spaces == 1 or initial_spaces == 3) and
-          not Match(scope_or_label_pattern, cleansed_line) and
-          not (clean_lines.raw_lines[linenum] != line and
-               Match(r'^\s*""', line))):
-        error(filename, linenum, 'whitespace/indent', 3,
-              'Weird number of spaces at line-start.  '
-              'Are you using a 2-space indent?')
-
-    # Check if the line is a header guard.
-    is_header_guard = False
-    if file_extension == 'h':
-        cppvar = GetHeaderGuardCPPVariable(filename)
-        if (line.startswith('#ifndef %s' % cppvar) or
-                line.startswith('#define %s' % cppvar) or
-                line.startswith('#endif  // %s' % cppvar)):
-            is_header_guard = True
-    # #include lines and header guards can be long, since there's no clean way to
-    # split them.
-    #
-    # URLs can be long too.  It's possible to split these, but it makes them
-    # harder to cut&paste.
-    #
-    # The "$Id:...$" comment may also get very long without it being the
-    # developers fault.
-    if (not line.startswith('#include') and not is_header_guard and
-            not Match(r'^\s*//.*http(s?)://\S*$', line) and
-            not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
-        line_width = GetLineWidth(line)
-        extended_length = int((_line_length * 1.25))
-        if line_width > extended_length:
-            error(filename, linenum, 'whitespace/line_length', 4,
-                  'Lines should very rarely be longer than %i characters' %
-                  extended_length)
-        elif line_width > _line_length:
-            error(filename, linenum, 'whitespace/line_length', 2,
-                  'Lines should be <= %i characters long' % _line_length)
-
-    if (cleansed_line.count(';') > 1 and
-            # for loops are allowed two ;'s (and may run over two lines).
-            cleansed_line.find('for') == -1 and
-        (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
-         GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
-            # It's ok to have many commands in a switch case that fits in 1 line
-            not ((cleansed_line.find('case ') != -1 or
-                  cleansed_line.find('default:') != -1) and
-                 cleansed_line.find('break;') != -1)):
-        error(filename, linenum, 'whitespace/newline', 0,
-              'More than one command on the same line')
-
-    # Some more style checks
-    CheckBraces(filename, clean_lines, linenum, error)
-    CheckTrailingSemicolon(filename, clean_lines, linenum, error)
-    CheckEmptyBlockBody(filename, clean_lines, linenum, error)
-    CheckAccess(filename, clean_lines, linenum, nesting_state, error)
-    CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
-    CheckOperatorSpacing(filename, clean_lines, linenum, error)
-    CheckParenthesisSpacing(filename, clean_lines, linenum, error)
-    CheckCommaSpacing(filename, clean_lines, linenum, error)
-    CheckBracesSpacing(filename, clean_lines, linenum, error)
-    CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
-    CheckRValueReference(filename, clean_lines, linenum, nesting_state, error)
-    CheckCheck(filename, clean_lines, linenum, error)
-    CheckAltTokens(filename, clean_lines, linenum, error)
-    classinfo = nesting_state.InnermostClass()
-    if classinfo:
-        CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
-
-
-_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
-# Matches the first component of a filename delimited by -s and _s. That is:
-#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
-#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
-#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
-#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
-_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
-
-
-def _DropCommonSuffixes(filename):
-    """Drops common suffixes like _test.cc or -inl.h from filename.
-
-  For example:
-    >>> _DropCommonSuffixes('foo/foo-inl.h')
-    'foo/foo'
-    >>> _DropCommonSuffixes('foo/bar/foo.cc')
-    'foo/bar/foo'
-    >>> _DropCommonSuffixes('foo/foo_internal.h')
-    'foo/foo'
-    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
-    'foo/foo_unusualinternal'
-
-  Args:
-    filename: The input filename.
-
-  Returns:
-    The filename with the common suffix removed.
-  """
-    for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', 'inl.h', 'impl.h',
-                   'internal.h'):
-        if (filename.endswith(suffix) and len(filename) > len(suffix) and
-                filename[-len(suffix) - 1] in ('-', '_')):
-            return filename[:-len(suffix) - 1]
-    return os.path.splitext(filename)[0]
-
-
-def _IsTestFilename(filename):
-    """Determines if the given filename has a suffix that identifies it as a test.
-
-  Args:
-    filename: The input filename.
-
-  Returns:
-    True if 'filename' looks like a test, False otherwise.
-  """
-    if (filename.endswith('_test.cc') or filename.endswith('_unittest.cc') or
-            filename.endswith('_regtest.cc')):
-        return True
-    else:
-        return False
-
-
-def _ClassifyInclude(fileinfo, include, is_system):
-    """Figures out what kind of header 'include' is.
-
-  Args:
-    fileinfo: The current file cpplint is running over. A FileInfo instance.
-    include: The path to a #included file.
-    is_system: True if the #include used <> rather than "".
-
-  Returns:
-    One of the _XXX_HEADER constants.
-
-  For example:
-    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
-    _C_SYS_HEADER
-    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
-    _CPP_SYS_HEADER
-    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
-    _LIKELY_MY_HEADER
-    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
-    ...                  'bar/foo_other_ext.h', False)
-    _POSSIBLE_MY_HEADER
-    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
-    _OTHER_HEADER
-  """
-    # This is a list of all standard c++ header files, except
-    # those already checked for above.
-    is_cpp_h = include in _CPP_HEADERS
-
-    if is_system:
-        if is_cpp_h:
-            return _CPP_SYS_HEADER
-        else:
-            return _C_SYS_HEADER
-
-    # If the target file and the include we're checking share a
-    # basename when we drop common extensions, and the include
-    # lives in . , then it's likely to be owned by the target file.
-    target_dir, target_base = (
-        os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
-    include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
-    if target_base == include_base and (
-            include_dir == target_dir or
-            include_dir == os.path.normpath(target_dir + '/../public')):
-        return _LIKELY_MY_HEADER
-
-    # If the target and include share some initial basename
-    # component, it's possible the target is implementing the
-    # include, so it's allowed to be first, but we'll never
-    # complain if it's not there.
-    target_first_component = _RE_FIRST_COMPONENT.match(target_base)
-    include_first_component = _RE_FIRST_COMPONENT.match(include_base)
-    if (target_first_component and include_first_component and
-            target_first_component.group(0) ==
-            include_first_component.group(0)):
-        return _POSSIBLE_MY_HEADER
-
-    return _OTHER_HEADER
-
-
-def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
-    """Check rules that are applicable to #include lines.
-
-  Strings on #include lines are NOT removed from elided line, to make
-  certain tasks easier. However, to prevent false positives, checks
-  applicable to #include lines in CheckLanguage must be put here.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    include_state: An _IncludeState instance in which the headers are inserted.
-    error: The function to call with any errors found.
-  """
-    fileinfo = FileInfo(filename)
-    line = clean_lines.lines[linenum]
-
-    # "include" should use the new style "foo/bar.h" instead of just "bar.h"
-    # Only do this check if the included header follows google naming
-    # conventions.  If not, assume that it's a 3rd party API that
-    # requires special include conventions.
-    #
-    # We also make an exception for Lua headers, which follow google
-    # naming convention but not the include convention.
-    match = Match(r'#include\s*"([^/]+\.h)"', line)
-    if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)):
-        error(filename, linenum, 'build/include', 4,
-              'Include the directory when naming .h files')
-
-    # we shouldn't include a file more than once. actually, there are a
-    # handful of instances where doing so is okay, but in general it's
-    # not.
-    match = _RE_PATTERN_INCLUDE.search(line)
-    if match:
-        include = match.group(2)
-        is_system = (match.group(1) == '<')
-        duplicate_line = include_state.FindHeader(include)
-        if duplicate_line >= 0:
-            error(filename, linenum, 'build/include', 4,
-                  '"%s" already included at %s:%s' %
-                  (include, filename, duplicate_line))
-        elif (include.endswith('.cc') and
-              os.path.dirname(fileinfo.RepositoryName()) !=
-              os.path.dirname(include)):
-            error(filename, linenum, 'build/include', 4,
-                  'Do not include .cc files from other packages')
-        elif not _THIRD_PARTY_HEADERS_PATTERN.match(include):
-            include_state.include_list[-1].append((include, linenum))
-
-            # We want to ensure that headers appear in the right order:
-            # 1) for foo.cc, foo.h  (preferred location)
-            # 2) c system files
-            # 3) cpp system files
-            # 4) for foo.cc, foo.h  (deprecated location)
-            # 5) other google headers
-            #
-            # We classify each include statement as one of those 5 types
-            # using a number of techniques. The include_state object keeps
-            # track of the highest type seen, and complains if we see a
-            # lower type after that.
-            error_message = include_state.CheckNextIncludeOrder(
-                _ClassifyInclude(fileinfo, include, is_system))
-            if error_message:
-                error(filename, linenum, 'build/include_order', 4,
-                      '%s. Should be: %s.h, c system, c++ system, other.' %
-                      (error_message, fileinfo.BaseName()))
-            canonical_include = include_state.CanonicalizeAlphabeticalOrder(
-                include)
-            if not include_state.IsInAlphabeticalOrder(clean_lines, linenum,
-                                                       canonical_include):
-                error(filename, linenum, 'build/include_alpha', 4,
-                      'Include "%s" not in alphabetical order' % include)
-            include_state.SetLastHeader(canonical_include)
-
-
-def _GetTextInside(text, start_pattern):
-    r"""Retrieves all the text between matching open and close parentheses.
-
-  Given a string of lines and a regular expression string, retrieve all the text
-  following the expression and between opening punctuation symbols like
-  (, [, or {, and the matching close-punctuation symbol. This properly nested
-  occurrences of the punctuations, so for the text like
-    printf(a(), b(c()));
-  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
-  start_pattern must match string having an open punctuation symbol at the end.
-
-  Args:
-    text: The lines to extract text. Its comments and strings must be elided.
-           It can be single line and can span multiple lines.
-    start_pattern: The regexp string indicating where to start extracting
-                   the text.
-  Returns:
-    The extracted text.
-    None if either the opening string or ending punctuation could not be found.
-  """
-    # TODO(unknown): Audit cpplint.py to see what places could be profitably
-    # rewritten to use _GetTextInside (and use inferior regexp matching today).
-
-    # Give opening punctuations to get the matching close-punctuations.
-    matching_punctuation = {'(': ')', '{': '}', '[': ']'}
-    closing_punctuation = set(matching_punctuation.itervalues())
-
-    # Find the position to start extracting text.
-    match = re.search(start_pattern, text, re.M)
-    if not match:  # start_pattern not found in text.
-        return None
-    start_position = match.end(0)
-
-    assert start_position > 0, (
-        'start_pattern must ends with an opening punctuation.')
-    assert text[start_position - 1] in matching_punctuation, (
-        'start_pattern must ends with an opening punctuation.')
-    # Stack of closing punctuations we expect to have in text after position.
-    punctuation_stack = [matching_punctuation[text[start_position - 1]]]
-    position = start_position
-    while punctuation_stack and position < len(text):
-        if text[position] == punctuation_stack[-1]:
-            punctuation_stack.pop()
-        elif text[position] in closing_punctuation:
-            # A closing punctuation without matching opening punctuations.
-            return None
-        elif text[position] in matching_punctuation:
-            punctuation_stack.append(matching_punctuation[text[position]])
-        position += 1
-    if punctuation_stack:
-        # Opening punctuations left without matching close-punctuations.
-        return None
-    # punctuations match.
-    return text[start_position:position - 1]
-
-
-# Patterns for matching call-by-reference parameters.
-#
-# Supports nested templates up to 2 levels deep using this messy pattern:
-#   < (?: < (?: < [^<>]*
-#               >
-#           |   [^<>] )*
-#         >
-#     |   [^<>] )*
-#   >
-_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
-_RE_PATTERN_TYPE = (
-    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
-    r'(?:\w|'
-    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
-    r'::)+')
-# A call-by-reference parameter ends with '& identifier'.
-_RE_PATTERN_REF_PARAM = re.compile(
-    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
-    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
-# A call-by-const-reference parameter either ends with 'const& identifier'
-# or looks like 'const type& identifier' when 'type' is atomic.
-_RE_PATTERN_CONST_REF_PARAM = (
-    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + r'|const\s+' +
-    _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
-
-
-def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
-                  nesting_state, error):
-    """Checks rules from the 'C++ language rules' section of cppguide.html.
-
-  Some of these rules are hard to test (function overloading, using
-  uint32 inappropriately), but we do the best we can.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    file_extension: The extension (without the dot) of the filename.
-    include_state: An _IncludeState instance in which the headers are inserted.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: The function to call with any errors found.
-  """
-    # If the line is empty or consists of entirely a comment, no need to
-    # check it.
-    line = clean_lines.elided[linenum]
-    if not line:
-        return
-
-    match = _RE_PATTERN_INCLUDE.search(line)
-    if match:
-        CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
-        return
-
-    # Reset include state across preprocessor directives.  This is meant
-    # to silence warnings for conditional includes.
-    match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line)
-    if match:
-        include_state.ResetSection(match.group(1))
-
-    # Make Windows paths like Unix.
-    fullname = os.path.abspath(filename).replace('\\', '/')
-
-    # Perform other checks now that we are sure that this is not an include line
-    CheckCasts(filename, clean_lines, linenum, error)
-    CheckGlobalStatic(filename, clean_lines, linenum, error)
-    CheckPrintf(filename, clean_lines, linenum, error)
-
-    if file_extension == 'h':
-        # TODO(unknown): check that 1-arg constructors are explicit.
-        #                How to tell it's a constructor?
-        #                (handled in CheckForNonStandardConstructs for now)
-        # TODO(unknown): check that classes declare or disable copy/assign
-        #                (level 1 error)
-        pass
-
-    # Check if people are using the verboten C basic types.  The only exception
-    # we regularly allow is "unsigned short port" for port.
-    if Search(r'\bshort port\b', line):
-        if not Search(r'\bunsigned short port\b', line):
-            error(filename, linenum, 'runtime/int', 4,
-                  'Use "unsigned short" for ports, not "short"')
-    else:
-        match = Search(r'\b(short|long(?! +double)|long long)\b', line)
-        if match:
-            error(filename, linenum, 'runtime/int', 4,
-                  'Use int16/int64/etc, rather than the C type %s' %
-                  match.group(1))
-
-    # Check if some verboten operator overloading is going on
-    # TODO(unknown): catch out-of-line unary operator&:
-    #   class X {};
-    #   int operator&(const X& x) { return 42; }  // unary operator&
-    # The trick is it's hard to tell apart from binary operator&:
-    #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
-    if Search(r'\boperator\s*&\s*\(\s*\)', line):
-        error(filename, linenum, 'runtime/operator', 4,
-              'Unary operator& is dangerous.  Do not use it.')
-
-    # Check for suspicious usage of "if" like
-    # } if (a == b) {
-    if Search(r'\}\s*if\s*\(', line):
-        error(filename, linenum, 'readability/braces', 4,
-              'Did you mean "else if"? If not, start a new line for "if".')
-
-    # Check for potential format string bugs like printf(foo).
-    # We constrain the pattern not to pick things like DocidForPrintf(foo).
-    # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
-    # TODO(unknown): Catch the following case. Need to change the calling
-    # convention of the whole function to process multiple line to handle it.
-    #   printf(
-    #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
-    printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
-    if printf_args:
-        match = Match(r'([\w.\->()]+)$', printf_args)
-        if match and match.group(1) != '__VA_ARGS__':
-            function_name = re.search(r'\b((?:string)?printf)\s*\(', line,
-                                      re.I).group(1)
-            error(filename, linenum, 'runtime/printf', 4,
-                  'Potential format string bug. Do %s("%%s", %s) instead.' %
-                  (function_name, match.group(1)))
-
-    # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
-    match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
-    if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
-        error(filename, linenum, 'runtime/memset', 4,
-              'Did you mean "memset(%s, 0, %s)"?' %
-              (match.group(1), match.group(2)))
-
-    if Search(r'\busing namespace\b', line):
-        error(filename, linenum, 'build/namespaces', 5,
-              'Do not use namespace using-directives.  '
-              'Use using-declarations instead.')
-
-    # Detect variable-length arrays.
-    match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
-    if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
-            match.group(3).find(']') == -1):
-        # Split the size using space and arithmetic operators as delimiters.
-        # If any of the resulting tokens are not compile time constants then
-        # report the error.
-        tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
-        is_const = True
-        skip_next = False
-        for tok in tokens:
-            if skip_next:
-                skip_next = False
-                continue
-
-            if Search(r'sizeof\(.+\)', tok): continue
-            if Search(r'arraysize\(\w+\)', tok): continue
-
-            tok = tok.lstrip('(')
-            tok = tok.rstrip(')')
-            if not tok: continue
-            if Match(r'\d+', tok): continue
-            if Match(r'0[xX][0-9a-fA-F]+', tok): continue
-            if Match(r'k[A-Z0-9]\w*', tok): continue
-            if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
-            if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
-            # A catch all for tricky sizeof cases, including 'sizeof expression',
-            # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
-            # requires skipping the next token because we split on ' ' and '*'.
-            if tok.startswith('sizeof'):
-                skip_next = True
-                continue
-            is_const = False
-            break
-        if not is_const:
-            error(
-                filename, linenum, 'runtime/arrays', 1,
-                'Do not use variable-length arrays.  Use an appropriately named '
-                "('k' followed by CamelCase) compile-time constant for the size."
-            )
-
-    # Check for use of unnamed namespaces in header files.  Registration
-    # macros are typically OK, so we allow use of "namespace {" on lines
-    # that end with backslashes.
-    if (file_extension == 'h' and Search(r'\bnamespace\s*{', line) and
-            line[-1] != '\\'):
-        error(
-            filename, linenum, 'build/namespaces', 4,
-            'Do not use unnamed namespaces in header files.  See '
-            'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
-            ' for more information.')
-
-
-def CheckGlobalStatic(filename, clean_lines, linenum, error):
-    """Check for unsafe global or static objects.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Match two lines at a time to support multiline declarations
-    if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line):
-        line += clean_lines.elided[linenum + 1].strip()
-
-    # Check for people declaring static/global STL strings at the top level.
-    # This is dangerous because the C++ language does not guarantee that
-    # globals with constructors are initialized before the first access.
-    match = Match(r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
-                  line)
-
-    # Remove false positives:
-    # - String pointers (as opposed to values).
-    #    string *pointer
-    #    const string *pointer
-    #    string const *pointer
-    #    string *const pointer
-    #
-    # - Functions and template specializations.
-    #    string Function<Type>(...
-    #    string Class<Type>::Method(...
-    #
-    # - Operators.  These are matched separately because operator names
-    #   cross non-word boundaries, and trying to match both operators
-    #   and functions at the same time would decrease accuracy of
-    #   matching identifiers.
-    #    string Class::operator*()
-    if (match and
-            not Search(r'\bstring\b(\s+const)?\s*\*\s*(const\s+)?\w', line) and
-            not Search(r'\boperator\W', line) and not Match(
-                r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(3))):
-        error(
-            filename, linenum, 'runtime/string', 4,
-            'For a static/global string constant, use a C style string instead: '
-            '"%schar %s[]".' % (match.group(1), match.group(2)))
-
-    if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
-        error(filename, linenum, 'runtime/init', 4,
-              'You seem to be initializing a member variable with itself.')
-
-
-def CheckPrintf(filename, clean_lines, linenum, error):
-    """Check for printf related issues.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # When snprintf is used, the second argument shouldn't be a literal.
-    match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
-    if match and match.group(2) != '0':
-        # If 2nd arg is zero, snprintf is used to calculate size.
-        error(filename, linenum, 'runtime/printf', 3,
-              'If you can, use sizeof(%s) instead of %s as the 2nd arg '
-              'to snprintf.' % (match.group(1), match.group(2)))
-
-    # Check if some verboten C functions are being used.
-    if Search(r'\bsprintf\s*\(', line):
-        error(filename, linenum, 'runtime/printf', 5,
-              'Never use sprintf. Use snprintf instead.')
-    match = Search(r'\b(strcpy|strcat)\s*\(', line)
-    if match:
-        error(filename, linenum, 'runtime/printf', 4,
-              'Almost always, snprintf is better than %s' % match.group(1))
-
-
-def IsDerivedFunction(clean_lines, linenum):
-    """Check if current line contains an inherited function.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-  Returns:
-    True if current line contains a function with "override"
-    virt-specifier.
-  """
-    # Scan back a few lines for start of current function
-    for i in xrange(linenum, max(-1, linenum - 10), -1):
-        match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i])
-        if match:
-            # Look for "override" after the matching closing parenthesis
-            line, _, closing_paren = CloseExpression(clean_lines, i,
-                                                     len(match.group(1)))
-            return (closing_paren >= 0 and
-                    Search(r'\boverride\b', line[closing_paren:]))
-    return False
-
-
-def IsOutOfLineMethodDefinition(clean_lines, linenum):
-    """Check if current line contains an out-of-line method definition.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-  Returns:
-    True if current line contains an out-of-line method definition.
-  """
-    # Scan back a few lines for start of current function
-    for i in xrange(linenum, max(-1, linenum - 10), -1):
-        if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]):
-            return Match(r'^[^()]*\w+::\w+\(',
-                         clean_lines.elided[i]) is not None
-    return False
-
-
-def IsInitializerList(clean_lines, linenum):
-    """Check if current line is inside constructor initializer list.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-  Returns:
-    True if current line appears to be inside constructor initializer
-    list, False otherwise.
-  """
-    for i in xrange(linenum, 1, -1):
-        line = clean_lines.elided[i]
-        if i == linenum:
-            remove_function_body = Match(r'^(.*)\{\s*$', line)
-            if remove_function_body:
-                line = remove_function_body.group(1)
-
-        if Search(r'\s:\s*\w+[({]', line):
-            # A lone colon tend to indicate the start of a constructor
-            # initializer list.  It could also be a ternary operator, which
-            # also tend to appear in constructor initializer lists as
-            # opposed to parameter lists.
-            return True
-        if Search(r'\}\s*,\s*$', line):
-            # A closing brace followed by a comma is probably the end of a
-            # brace-initialized member in constructor initializer list.
-            return True
-        if Search(r'[{};]\s*$', line):
-            # Found one of the following:
-            # - A closing brace or semicolon, probably the end of the previous
-            #   function.
-            # - An opening brace, probably the start of current class or namespace.
-            #
-            # Current line is probably not inside an initializer list since
-            # we saw one of those things without seeing the starting colon.
-            return False
-
-    # Got to the beginning of the file without seeing the start of
-    # constructor initializer list.
-    return False
-
-
-def CheckForNonConstReference(filename, clean_lines, linenum, nesting_state,
-                              error):
-    """Check for non-const references.
-
-  Separate from CheckLanguage since it scans backwards from current
-  line, instead of scanning forward.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: The function to call with any errors found.
-  """
-    # Do nothing if there is no '&' on current line.
-    line = clean_lines.elided[linenum]
-    if '&' not in line:
-        return
-
-    # If a function is inherited, current function doesn't have much of
-    # a choice, so any non-const references should not be blamed on
-    # derived function.
-    if IsDerivedFunction(clean_lines, linenum):
-        return
-
-    # Don't warn on out-of-line method definitions, as we would warn on the
-    # in-line declaration, if it isn't marked with 'override'.
-    if IsOutOfLineMethodDefinition(clean_lines, linenum):
-        return
-
-    # Long type names may be broken across multiple lines, usually in one
-    # of these forms:
-    #   LongType
-    #       ::LongTypeContinued &identifier
-    #   LongType::
-    #       LongTypeContinued &identifier
-    #   LongType<
-    #       ...>::LongTypeContinued &identifier
-    #
-    # If we detected a type split across two lines, join the previous
-    # line to current line so that we can match const references
-    # accordingly.
-    #
-    # Note that this only scans back one line, since scanning back
-    # arbitrary number of lines would be expensive.  If you have a type
-    # that spans more than 2 lines, please use a typedef.
-    if linenum > 1:
-        previous = None
-        if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
-            # previous_line\n + ::current_line
-            previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
-                              clean_lines.elided[linenum - 1])
-        elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
-            # previous_line::\n + current_line
-            previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
-                              clean_lines.elided[linenum - 1])
-        if previous:
-            line = previous.group(1) + line.lstrip()
-        else:
-            # Check for templated parameter that is split across multiple lines
-            endpos = line.rfind('>')
-            if endpos > -1:
-                (_, startline, startpos) = ReverseCloseExpression(
-                    clean_lines, linenum, endpos)
-                if startpos > -1 and startline < linenum:
-                    # Found the matching < on an earlier line, collect all
-                    # pieces up to current line.
-                    line = ''
-                    for i in xrange(startline, linenum + 1):
-                        line += clean_lines.elided[i].strip()
-
-    # Check for non-const references in function parameters.  A single '&' may
-    # found in the following places:
-    #   inside expression: binary & for bitwise AND
-    #   inside expression: unary & for taking the address of something
-    #   inside declarators: reference parameter
-    # We will exclude the first two cases by checking that we are not inside a
-    # function body, including one that was just introduced by a trailing '{'.
-    # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
-    if (nesting_state.previous_stack_top and
-            not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
-                 isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
-        # Not at toplevel, not within a class, and not within a namespace
-        return
-
-    # Avoid initializer lists.  We only need to scan back from the
-    # current line for something that starts with ':'.
-    #
-    # We don't need to check the current line, since the '&' would
-    # appear inside the second set of parentheses on the current line as
-    # opposed to the first set.
-    if linenum > 0:
-        for i in xrange(linenum - 1, max(0, linenum - 10), -1):
-            previous_line = clean_lines.elided[i]
-            if not Search(r'[),]\s*$', previous_line):
-                break
-            if Match(r'^\s*:\s+\S', previous_line):
-                return
-
-    # Avoid preprocessors
-    if Search(r'\\\s*$', line):
-        return
-
-    # Avoid constructor initializer lists
-    if IsInitializerList(clean_lines, linenum):
-        return
-
-    # We allow non-const references in a few standard places, like functions
-    # called "swap()" or iostream operators like "<<" or ">>".  Do not check
-    # those function parameters.
-    #
-    # We also accept & in static_assert, which looks like a function but
-    # it's actually a declaration expression.
-    whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
-                             r'operator\s*[<>][<>]|'
-                             r'static_assert|COMPILE_ASSERT'
-                             r')\s*\(')
-    if Search(whitelisted_functions, line):
-        return
-    elif not Search(r'\S+\([^)]*$', line):
-        # Don't see a whitelisted function on this line.  Actually we
-        # didn't see any function name on this line, so this is likely a
-        # multi-line parameter list.  Try a bit harder to catch this case.
-        for i in xrange(2):
-            if (linenum > i and Search(whitelisted_functions,
-                                       clean_lines.elided[linenum - i - 1])):
-                return
-
-    decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
-    for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
-        if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
-            error(filename, linenum, 'runtime/references', 2,
-                  'Is this a non-const reference? '
-                  'If so, make const or use a pointer: ' + ReplaceAll(
-                      ' *<', '<', parameter))
-
-
-def CheckCasts(filename, clean_lines, linenum, error):
-    """Various cast related checks.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Check to see if they're using an conversion function cast.
-    # I just try to capture the most common basic types, though there are more.
-    # Parameterless conversion functions, such as bool(), are allowed as they are
-    # probably a member operator declaration or default constructor.
-    match = Search(r'(\bnew\s+|\S<\s*(?:const\s+)?)?\b'
-                   r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
-                   r'(\([^)].*)', line)
-    expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
-    if match and not expecting_function:
-        matched_type = match.group(2)
-
-        # matched_new_or_template is used to silence two false positives:
-        # - New operators
-        # - Template arguments with function types
-        #
-        # For template arguments, we match on types immediately following
-        # an opening bracket without any spaces.  This is a fast way to
-        # silence the common case where the function type is the first
-        # template argument.  False negative with less-than comparison is
-        # avoided because those operators are usually followed by a space.
-        #
-        #   function<double(double)>   // bracket + no space = false positive
-        #   value < double(42)         // bracket + space = true positive
-        matched_new_or_template = match.group(1)
-
-        # Avoid arrays by looking for brackets that come after the closing
-        # parenthesis.
-        if Match(r'\([^()]+\)\s*\[', match.group(3)):
-            return
-
-        # Other things to ignore:
-        # - Function pointers
-        # - Casts to pointer types
-        # - Placement new
-        # - Alias declarations
-        matched_funcptr = match.group(3)
-        if (matched_new_or_template is None and not (matched_funcptr and (Match(
-                r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
-                matched_funcptr) or matched_funcptr.startswith('(*)'))) and
-                not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
-                not Search(r'new\(\S+\)\s*' + matched_type, line)):
-            error(filename, linenum, 'readability/casting', 4,
-                  'Using deprecated casting style.  '
-                  'Use static_cast<%s>(...) instead' % matched_type)
-
-    if not expecting_function:
-        CheckCStyleCast(filename, clean_lines, linenum, 'static_cast',
-                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
-                        error)
-
-    # This doesn't catch all cases. Consider (const char * const)"hello".
-    #
-    # (char *) "foo" should always be a const_cast (reinterpret_cast won't
-    # compile).
-    if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast',
-                       r'\((char\s?\*+\s?)\)\s*"', error):
-        pass
-    else:
-        # Check pointer casts for other than string constants
-        CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast',
-                        r'\((\w+\s?\*+\s?)\)', error)
-
-    # In addition, we look for people taking the address of a cast.  This
-    # is dangerous -- casts can assign to temporaries, so the pointer doesn't
-    # point where you think.
-    #
-    # Some non-identifier character is required before the '&' for the
-    # expression to be recognized as a cast.  These are casts:
-    #   expression = &static_cast<int*>(temporary());
-    #   function(&(int*)(temporary()));
-    #
-    # This is not a cast:
-    #   reference_type&(int* function_param);
-    match = Search(r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|'
-                   r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line)
-    if match:
-        # Try a better error message when the & is bound to something
-        # dereferenced by the casted pointer, as opposed to the casted
-        # pointer itself.
-        parenthesis_error = False
-        match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<',
-                      line)
-        if match:
-            _, y1, x1 = CloseExpression(clean_lines, linenum,
-                                        len(match.group(1)))
-            if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
-                _, y2, x2 = CloseExpression(clean_lines, y1, x1)
-                if x2 >= 0:
-                    extended_line = clean_lines.elided[y2][x2:]
-                    if y2 < clean_lines.NumLines() - 1:
-                        extended_line += clean_lines.elided[y2 + 1]
-                    if Match(r'\s*(?:->|\[)', extended_line):
-                        parenthesis_error = True
-
-        if parenthesis_error:
-            error(filename, linenum, 'readability/casting', 4,
-                  ('Are you taking an address of something dereferenced '
-                   'from a cast?  Wrapping the dereferenced expression in '
-                   'parentheses will make the binding more obvious'))
-        else:
-            error(filename, linenum, 'runtime/casting', 4,
-                  ('Are you taking an address of a cast?  '
-                   'This is dangerous: could be a temp var.  '
-                   'Take the address before doing the cast, rather than after'))
-
-
-def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error):
-    """Checks for a C-style cast by looking for the pattern.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    cast_type: The string for the C++ cast to recommend.  This is either
-      reinterpret_cast, static_cast, or const_cast, depending.
-    pattern: The regular expression used to find C-style casts.
-    error: The function to call with any errors found.
-
-  Returns:
-    True if an error was emitted.
-    False otherwise.
-  """
-    line = clean_lines.elided[linenum]
-    match = Search(pattern, line)
-    if not match:
-        return False
-
-    # Exclude lines with keywords that tend to look like casts
-    context = line[0:match.start(1) - 1]
-    if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context):
-        return False
-
-    # Try expanding current context to see if we one level of
-    # parentheses inside a macro.
-    if linenum > 0:
-        for i in xrange(linenum - 1, max(0, linenum - 5), -1):
-            context = clean_lines.elided[i] + context
-    if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context):
-        return False
-
-    # operator++(int) and operator--(int)
-    if context.endswith(' operator++') or context.endswith(' operator--'):
-        return False
-
-    # A single unnamed argument for a function tends to look like old
-    # style cast.  If we see those, don't issue warnings for deprecated
-    # casts, instead issue warnings for unnamed arguments where
-    # appropriate.
-    #
-    # These are things that we want warnings for, since the style guide
-    # explicitly require all parameters to be named:
-    #   Function(int);
-    #   Function(int) {
-    #   ConstMember(int) const;
-    #   ConstMember(int) const {
-    #   ExceptionMember(int) throw (...);
-    #   ExceptionMember(int) throw (...) {
-    #   PureVirtual(int) = 0;
-    #   [](int) -> bool {
-    #
-    # These are functions of some sort, where the compiler would be fine
-    # if they had named parameters, but people often omit those
-    # identifiers to reduce clutter:
-    #   (FunctionPointer)(int);
-    #   (FunctionPointer)(int) = value;
-    #   Function((function_pointer_arg)(int))
-    #   Function((function_pointer_arg)(int), int param)
-    #   <TemplateArgument(int)>;
-    #   <(FunctionPointerTemplateArgument)(int)>;
-    remainder = line[match.end(0):]
-    if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)',
-             remainder):
-        # Looks like an unnamed parameter.
-
-        # Don't warn on any kind of template arguments.
-        if Match(r'^\s*>', remainder):
-            return False
-
-        # Don't warn on assignments to function pointers, but keep warnings for
-        # unnamed parameters to pure virtual functions.  Note that this pattern
-        # will also pass on assignments of "0" to function pointers, but the
-        # preferred values for those would be "nullptr" or "NULL".
-        matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
-        if matched_zero and matched_zero.group(1) != '0':
-            return False
-
-        # Don't warn on function pointer declarations.  For this we need
-        # to check what came before the "(type)" string.
-        if Match(r'.*\)\s*$', line[0:match.start(0)]):
-            return False
-
-        # Don't warn if the parameter is named with block comments, e.g.:
-        #  Function(int /*unused_param*/);
-        raw_line = clean_lines.raw_lines[linenum]
-        if '/*' in raw_line:
-            return False
-
-        # Passed all filters, issue warning here.
-        error(filename, linenum, 'readability/function', 3,
-              'All parameters should be named in a function')
-        return True
-
-    # At this point, all that should be left is actual casts.
-    error(filename, linenum, 'readability/casting', 4,
-          'Using C-style cast.  Use %s<%s>(...) instead' %
-          (cast_type, match.group(1)))
-
-    return True
-
-
-def ExpectingFunctionArgs(clean_lines, linenum):
-    """Checks whether where function type arguments are expected.
-
-  Args:
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-
-  Returns:
-    True if the line at 'linenum' is inside something that expects arguments
-    of function types.
-  """
-    line = clean_lines.elided[linenum]
-    return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
-            (linenum >= 2 and
-             (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
-                    clean_lines.elided[linenum - 1]) or
-              Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
-                    clean_lines.elided[linenum - 2]) or
-              Search(r'\bstd::m?function\s*\<\s*$',
-                     clean_lines.elided[linenum - 1]))))
-
-
-_HEADERS_CONTAINING_TEMPLATES = (
-    ('<deque>', ('deque', )),
-    ('<functional>', (
-        'unary_function',
-        'binary_function',
-        'plus',
-        'minus',
-        'multiplies',
-        'divides',
-        'modulus',
-        'negate',
-        'equal_to',
-        'not_equal_to',
-        'greater',
-        'less',
-        'greater_equal',
-        'less_equal',
-        'logical_and',
-        'logical_or',
-        'logical_not',
-        'unary_negate',
-        'not1',
-        'binary_negate',
-        'not2',
-        'bind1st',
-        'bind2nd',
-        'pointer_to_unary_function',
-        'pointer_to_binary_function',
-        'ptr_fun',
-        'mem_fun_t',
-        'mem_fun',
-        'mem_fun1_t',
-        'mem_fun1_ref_t',
-        'mem_fun_ref_t',
-        'const_mem_fun_t',
-        'const_mem_fun1_t',
-        'const_mem_fun_ref_t',
-        'const_mem_fun1_ref_t',
-        'mem_fun_ref', )),
-    ('<limits>', ('numeric_limits', )),
-    ('<list>', ('list', )),
-    ('<map>', (
-        'map',
-        'multimap', )),
-    ('<memory>', ('allocator', )),
-    ('<queue>', (
-        'queue',
-        'priority_queue', )),
-    ('<set>', (
-        'set',
-        'multiset', )),
-    ('<stack>', ('stack', )),
-    ('<string>', (
-        'char_traits',
-        'basic_string', )),
-    ('<tuple>', ('tuple', )),
-    ('<utility>', ('pair', )),
-    ('<vector>', ('vector', )),
-
-    # gcc extensions.
-    # Note: std::hash is their hash, ::hash is our hash
-    ('<hash_map>', (
-        'hash_map',
-        'hash_multimap', )),
-    ('<hash_set>', (
-        'hash_set',
-        'hash_multiset', )),
-    ('<slist>', ('slist', )), )
-
-_RE_PATTERN_STRING = re.compile(r'\bstring\b')
-
-_re_pattern_algorithm_header = []
-for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
-                  'transform'):
-    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
-    # type::max().
-    _re_pattern_algorithm_header.append(
-        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), _template,
-         '<algorithm>'))
-
-_re_pattern_templates = []
-for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
-    for _template in _templates:
-        _re_pattern_templates.append(
-            (re.compile(r'(\<|\b)' + _template + r'\s*\<'), _template + '<>',
-             _header))
-
-
-def FilesBelongToSameModule(filename_cc, filename_h):
-    """Check if these two filenames belong to the same module.
-
-  The concept of a 'module' here is a as follows:
-  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
-  same 'module' if they are in the same directory.
-  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
-  to belong to the same module here.
-
-  If the filename_cc contains a longer path than the filename_h, for example,
-  '/absolute/path/to/base/sysinfo.cc', and this file would include
-  'base/sysinfo.h', this function also produces the prefix needed to open the
-  header. This is used by the caller of this function to more robustly open the
-  header file. We don't have access to the real include paths in this context,
-  so we need this guesswork here.
-
-  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
-  according to this implementation. Because of this, this function gives
-  some false positives. This should be sufficiently rare in practice.
-
-  Args:
-    filename_cc: is the path for the .cc file
-    filename_h: is the path for the header path
-
-  Returns:
-    Tuple with a bool and a string:
-    bool: True if filename_cc and filename_h belong to the same module.
-    string: the additional prefix needed to open the header file.
-  """
-
-    if not filename_cc.endswith('.cc'):
-        return (False, '')
-    filename_cc = filename_cc[:-len('.cc')]
-    if filename_cc.endswith('_unittest'):
-        filename_cc = filename_cc[:-len('_unittest')]
-    elif filename_cc.endswith('_test'):
-        filename_cc = filename_cc[:-len('_test')]
-    filename_cc = filename_cc.replace('/public/', '/')
-    filename_cc = filename_cc.replace('/internal/', '/')
-
-    if not filename_h.endswith('.h'):
-        return (False, '')
-    filename_h = filename_h[:-len('.h')]
-    if filename_h.endswith('-inl'):
-        filename_h = filename_h[:-len('-inl')]
-    filename_h = filename_h.replace('/public/', '/')
-    filename_h = filename_h.replace('/internal/', '/')
-
-    files_belong_to_same_module = filename_cc.endswith(filename_h)
-    common_path = ''
-    if files_belong_to_same_module:
-        common_path = filename_cc[:-len(filename_h)]
-    return files_belong_to_same_module, common_path
-
-
-def UpdateIncludeState(filename, include_dict, io=codecs):
-    """Fill up the include_dict with new includes found from the file.
-
-  Args:
-    filename: the name of the header to read.
-    include_dict: a dictionary in which the headers are inserted.
-    io: The io factory to use to read the file. Provided for testability.
-
-  Returns:
-    True if a header was successfully added. False otherwise.
-  """
-    headerfile = None
-    try:
-        headerfile = io.open(filename, 'r', 'utf8', 'replace')
-    except IOError:
-        return False
-    linenum = 0
-    for line in headerfile:
-        linenum += 1
-        clean_line = CleanseComments(line)
-        match = _RE_PATTERN_INCLUDE.search(clean_line)
-        if match:
-            include = match.group(2)
-            include_dict.setdefault(include, linenum)
-    return True
-
-
-def CheckForIncludeWhatYouUse(filename,
-                              clean_lines,
-                              include_state,
-                              error,
-                              io=codecs):
-    """Reports for missing stl includes.
-
-  This function will output warnings to make sure you are including the headers
-  necessary for the stl containers and functions that you use. We only give one
-  reason to include a header. For example, if you use both equal_to<> and
-  less<> in a .h file, only one (the latter in the file) of these will be
-  reported as a reason to include the <functional>.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    include_state: An _IncludeState instance.
-    error: The function to call with any errors found.
-    io: The IO factory to use to read the header file. Provided for unittest
-        injection.
-  """
-    required = {}  # A map of header name to linenumber and the template entity.
-    # Example of required: { '<functional>': (1219, 'less<>') }
-
-    for linenum in xrange(clean_lines.NumLines()):
-        line = clean_lines.elided[linenum]
-        if not line or line[0] == '#':
-            continue
-
-        # String is special -- it is a non-templatized type in STL.
-        matched = _RE_PATTERN_STRING.search(line)
-        if matched:
-            # Don't warn about strings in non-STL namespaces:
-            # (We check only the first match per line; good enough.)
-            prefix = line[:matched.start()]
-            if prefix.endswith('std::') or not prefix.endswith('::'):
-                required['<string>'] = (linenum, 'string')
-
-        for pattern, template, header in _re_pattern_algorithm_header:
-            if pattern.search(line):
-                required[header] = (linenum, template)
-
-        # The following function is just a speed up, no semantics are changed.
-        if not '<' in line:  # Reduces the cpu time usage by skipping lines.
-            continue
-
-        for pattern, template, header in _re_pattern_templates:
-            if pattern.search(line):
-                required[header] = (linenum, template)
-
-    # The policy is that if you #include something in foo.h you don't need to
-    # include it again in foo.cc. Here, we will look at possible includes.
-    # Let's flatten the include_state include_list and copy it into a dictionary.
-    include_dict = dict(
-        [item for sublist in include_state.include_list for item in sublist])
-
-    # Did we find the header for this file (if any) and successfully load it?
-    header_found = False
-
-    # Use the absolute path so that matching works properly.
-    abs_filename = FileInfo(filename).FullName()
-
-    # For Emacs's flymake.
-    # If cpplint is invoked from Emacs's flymake, a temporary file is generated
-    # by flymake and that file name might end with '_flymake.cc'. In that case,
-    # restore original file name here so that the corresponding header file can be
-    # found.
-    # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
-    # instead of 'foo_flymake.h'
-    abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
-
-    # include_dict is modified during iteration, so we iterate over a copy of
-    # the keys.
-    header_keys = include_dict.keys()
-    for header in header_keys:
-        (same_module, common_path) = FilesBelongToSameModule(abs_filename,
-                                                             header)
-        fullpath = common_path + header
-        if same_module and UpdateIncludeState(fullpath, include_dict, io):
-            header_found = True
-
-    # If we can't find the header file for a .cc, assume it's because we don't
-    # know where to look. In that case we'll give up as we're not sure they
-    # didn't include it in the .h file.
-    # TODO(unknown): Do a better job of finding .h files so we are confident that
-    # not having the .h file means there isn't one.
-    if filename.endswith('.cc') and not header_found:
-        return
-
-    # All the lines have been processed, report the errors found.
-    for required_header_unstripped in required:
-        template = required[required_header_unstripped][1]
-        if required_header_unstripped.strip('<>"') not in include_dict:
-            error(filename, required[required_header_unstripped][0],
-                  'build/include_what_you_use', 4, 'Add #include ' +
-                  required_header_unstripped + ' for ' + template)
-
-
-_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
-
-
-def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
-    """Check that make_pair's template arguments are deduced.
-
-  G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
-  specified explicitly, and such use isn't intended in any case.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-    match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
-    if match:
-        error(
-            filename,
-            linenum,
-            'build/explicit_make_pair',
-            4,  # 4 = high confidence
-            'For C++11-compatibility, omit template arguments from make_pair'
-            ' OR use pair directly OR if appropriate, construct a pair directly')
-
-
-def CheckDefaultLambdaCaptures(filename, clean_lines, linenum, error):
-    """Check that default lambda captures are not used.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # A lambda introducer specifies a default capture if it starts with "[="
-    # or if it starts with "[&" _not_ followed by an identifier.
-    match = Match(r'^(.*)\[\s*(?:=|&[^\w])', line)
-    if match:
-        # Found a potential error, check what comes after the lambda-introducer.
-        # If it's not open parenthesis (for lambda-declarator) or open brace
-        # (for compound-statement), it's not a lambda.
-        line, _, pos = CloseExpression(clean_lines, linenum,
-                                       len(match.group(1)))
-        if pos >= 0 and Match(r'^\s*[{(]', line[pos:]):
-            error(
-                filename,
-                linenum,
-                'build/c++11',
-                4,  # 4 = high confidence
-                'Default lambda captures are an unapproved C++ feature.')
-
-
-def CheckRedundantVirtual(filename, clean_lines, linenum, error):
-    """Check if line contains a redundant "virtual" function-specifier.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    # Look for "virtual" on current line.
-    line = clean_lines.elided[linenum]
-    virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line)
-    if not virtual: return
-
-    # Ignore "virtual" keywords that are near access-specifiers.  These
-    # are only used in class base-specifier and do not apply to member
-    # functions.
-    if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or
-            Match(r'^\s+(public|protected|private)\b', virtual.group(3))):
-        return
-
-    # Ignore the "virtual" keyword from virtual base classes.  Usually
-    # there is a column on the same line in these cases (virtual base
-    # classes are rare in google3 because multiple inheritance is rare).
-    if Match(r'^.*[^:]:[^:].*$', line): return
-
-    # Look for the next opening parenthesis.  This is the start of the
-    # parameter list (possibly on the next line shortly after virtual).
-    # TODO(unknown): doesn't work if there are virtual functions with
-    # decltype() or other things that use parentheses, but csearch suggests
-    # that this is rare.
-    end_col = -1
-    end_line = -1
-    start_col = len(virtual.group(2))
-    for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())):
-        line = clean_lines.elided[start_line][start_col:]
-        parameter_list = Match(r'^([^(]*)\(', line)
-        if parameter_list:
-            # Match parentheses to find the end of the parameter list
-            (_, end_line, end_col) = CloseExpression(
-                clean_lines, start_line,
-                start_col + len(parameter_list.group(1)))
-            break
-        start_col = 0
-
-    if end_col < 0:
-        return  # Couldn't find end of parameter list, give up
-
-    # Look for "override" or "final" after the parameter list
-    # (possibly on the next few lines).
-    for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())):
-        line = clean_lines.elided[i][end_col:]
-        match = Search(r'\b(override|final)\b', line)
-        if match:
-            error(filename, linenum, 'readability/inheritance', 4,
-                  ('"virtual" is redundant since function is '
-                   'already declared as "%s"' % match.group(1)))
-
-        # Set end_col to check whole lines after we are done with the
-        # first line.
-        end_col = 0
-        if Search(r'[^\w]\s*$', line):
-            break
-
-
-def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error):
-    """Check if line contains a redundant "override" or "final" virt-specifier.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    # Look for closing parenthesis nearby.  We need one to confirm where
-    # the declarator ends and where the virt-specifier starts to avoid
-    # false positives.
-    line = clean_lines.elided[linenum]
-    declarator_end = line.rfind(')')
-    if declarator_end >= 0:
-        fragment = line[declarator_end:]
-    else:
-        if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0:
-            fragment = line
-        else:
-            return
-
-    # Check that at most one of "override" or "final" is present, not both
-    if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment):
-        error(filename, linenum, 'readability/inheritance', 4,
-              ('"override" is redundant since function is '
-               'already declared as "final"'))
-
-
-# Returns true if we are at a new block, and it is directly
-# inside of a namespace.
-def IsBlockInNameSpace(nesting_state, is_forward_declaration):
-    """Checks that the new block is directly in a namespace.
-
-  Args:
-    nesting_state: The _NestingState object that contains info about our state.
-    is_forward_declaration: If the class is a forward declared class.
-  Returns:
-    Whether or not the new block is directly in a namespace.
-  """
-    if is_forward_declaration:
-        if len(nesting_state.stack) >= 1 and (
-                isinstance(nesting_state.stack[-1], _NamespaceInfo)):
-            return True
-        else:
-            return False
-
-    return (len(nesting_state.stack) > 1 and
-            nesting_state.stack[-1].check_namespace_indentation and
-            isinstance(nesting_state.stack[-2], _NamespaceInfo))
-
-
-def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
-                                    raw_lines_no_comments, linenum):
-    """This method determines if we should apply our namespace indentation check.
-
-  Args:
-    nesting_state: The current nesting state.
-    is_namespace_indent_item: If we just put a new class on the stack, True.
-      If the top of the stack is not a class, or we did not recently
-      add the class, False.
-    raw_lines_no_comments: The lines without the comments.
-    linenum: The current line number we are processing.
-
-  Returns:
-    True if we should apply our namespace indentation check. Currently, it
-    only works for classes and namespaces inside of a namespace.
-  """
-
-    is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments,
-                                                       linenum)
-
-    if not (is_namespace_indent_item or is_forward_declaration):
-        return False
-
-    # If we are in a macro, we do not want to check the namespace indentation.
-    if IsMacroDefinition(raw_lines_no_comments, linenum):
-        return False
-
-    return IsBlockInNameSpace(nesting_state, is_forward_declaration)
-
-
-# Call this method if the line is directly inside of a namespace.
-# If the line above is blank (excluding comments) or the start of
-# an inner namespace, it cannot be indented.
-def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum,
-                                    error):
-    line = raw_lines_no_comments[linenum]
-    if Match(r'^\s+', line):
-        error(filename, linenum, 'runtime/indentation_namespace', 4,
-              'Do not indent within a namespace')
-
-
-def ProcessLine(filename,
-                file_extension,
-                clean_lines,
-                line,
-                include_state,
-                function_state,
-                nesting_state,
-                error,
-                extra_check_functions=[]):
-    """Processes a single line in the file.
-
-  Args:
-    filename: Filename of the file that is being processed.
-    file_extension: The extension (dot not included) of the file.
-    clean_lines: An array of strings, each representing a line of the file,
-                 with comments stripped.
-    line: Number of line being processed.
-    include_state: An _IncludeState instance in which the headers are inserted.
-    function_state: A _FunctionState instance which counts function lines, etc.
-    nesting_state: A NestingState instance which maintains information about
-                   the current stack of nested blocks being parsed.
-    error: A callable to which errors are reported, which takes 4 arguments:
-           filename, line number, error level, and message
-    extra_check_functions: An array of additional check functions that will be
-                           run on each source line. Each function takes 4
-                           arguments: filename, clean_lines, line, error
-  """
-    raw_lines = clean_lines.raw_lines
-    ParseNolintSuppressions(filename, raw_lines[line], line, error)
-    nesting_state.Update(filename, clean_lines, line, error)
-    CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
-                                 error)
-    if nesting_state.InAsmBlock(): return
-    CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
-    CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
-    CheckStyle(filename, clean_lines, line, file_extension, nesting_state,
-               error)
-    CheckLanguage(filename, clean_lines, line, file_extension, include_state,
-                  nesting_state, error)
-    CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
-    CheckForNonStandardConstructs(filename, clean_lines, line, nesting_state,
-                                  error)
-    CheckVlogArguments(filename, clean_lines, line, error)
-    CheckPosixThreading(filename, clean_lines, line, error)
-    CheckInvalidIncrement(filename, clean_lines, line, error)
-    CheckMakePairUsesDeduction(filename, clean_lines, line, error)
-    CheckDefaultLambdaCaptures(filename, clean_lines, line, error)
-    CheckRedundantVirtual(filename, clean_lines, line, error)
-    CheckRedundantOverrideOrFinal(filename, clean_lines, line, error)
-    for check_fn in extra_check_functions:
-        check_fn(filename, clean_lines, line, error)
-
-
-def FlagCxx11Features(filename, clean_lines, linenum, error):
-    """Flag those c++11 features that we only allow in certain places.
-
-  Args:
-    filename: The name of the current file.
-    clean_lines: A CleansedLines instance containing the file.
-    linenum: The number of the line to check.
-    error: The function to call with any errors found.
-  """
-    line = clean_lines.elided[linenum]
-
-    # Flag unapproved C++11 headers.
-    include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
-    if include and include.group(1) in (
-            'cfenv',
-            'condition_variable',
-            'fenv.h',
-            'future',
-            'mutex',
-            'thread',
-            'chrono',
-            'ratio',
-            'regex',
-            'system_error', ):
-        error(filename, linenum, 'build/c++11', 5,
-              ('<%s> is an unapproved C++11 header.') % include.group(1))
-
-    # The only place where we need to worry about C++11 keywords and library
-    # features in preprocessor directives is in macro definitions.
-    if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
-
-    # These are classes and free functions.  The classes are always
-    # mentioned as std::*, but we only catch the free functions if
-    # they're not found by ADL.  They're alphabetical by header.
-    for top_name in (
-            # type_traits
-            'alignment_of',
-            'aligned_union', ):
-        if Search(r'\bstd::%s\b' % top_name, line):
-            error(filename, linenum, 'build/c++11', 5, (
-                'std::%s is an unapproved C++11 class or function.  Send c-style '
-                'an example of where it would make your code more readable, and '
-                'they may let you use it.') % top_name)
-
-
-def ProcessFileData(filename,
-                    file_extension,
-                    lines,
-                    error,
-                    extra_check_functions=[]):
-    """Performs lint checks and reports any errors to the given error function.
-
-  Args:
-    filename: Filename of the file that is being processed.
-    file_extension: The extension (dot not included) of the file.
-    lines: An array of strings, each representing a line of the file, with the
-           last element being empty if the file is terminated with a newline.
-    error: A callable to which errors are reported, which takes 4 arguments:
-           filename, line number, error level, and message
-    extra_check_functions: An array of additional check functions that will be
-                           run on each source line. Each function takes 4
-                           arguments: filename, clean_lines, line, error
-  """
-    lines = (['// marker so line numbers and indices both start at 1'] + lines +
-             ['// marker so line numbers end in a known way'])
-
-    include_state = _IncludeState()
-    function_state = _FunctionState()
-    nesting_state = NestingState()
-
-    ResetNolintSuppressions()
-
-    CheckForCopyright(filename, lines, error)
-
-    RemoveMultiLineComments(filename, lines, error)
-    clean_lines = CleansedLines(lines)
-
-    if file_extension == 'h':
-        CheckForHeaderGuard(filename, clean_lines, error)
-
-    for line in xrange(clean_lines.NumLines()):
-        ProcessLine(filename, file_extension, clean_lines, line, include_state,
-                    function_state, nesting_state, error, extra_check_functions)
-        FlagCxx11Features(filename, clean_lines, line, error)
-    nesting_state.CheckCompletedBlocks(filename, error)
-
-    CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
-
-    # Check that the .cc file has included its header if it exists.
-    if file_extension == 'cc':
-        CheckHeaderFileIncluded(filename, include_state, error)
-
-    # We check here rather than inside ProcessLine so that we see raw
-    # lines rather than "cleaned" lines.
-    CheckForBadCharacters(filename, lines, error)
-
-    CheckForNewlineAtEOF(filename, lines, error)
-
-
-def ProcessConfigOverrides(filename):
-    """ Loads the configuration files and processes the config overrides.
-
-  Args:
-    filename: The name of the file being processed by the linter.
-
-  Returns:
-    False if the current |filename| should not be processed further.
-  """
-
-    abs_filename = os.path.abspath(filename)
-    cfg_filters = []
-    keep_looking = True
-    while keep_looking:
-        abs_path, base_name = os.path.split(abs_filename)
-        if not base_name:
-            break  # Reached the root directory.
-
-        cfg_file = os.path.join(abs_path, "CPPLINT.cfg")
-        abs_filename = abs_path
-        if not os.path.isfile(cfg_file):
-            continue
-
-        try:
-            with open(cfg_file) as file_handle:
-                for line in file_handle:
-                    line, _, _ = line.partition('#')  # Remove comments.
-                    if not line.strip():
-                        continue
-
-                    name, _, val = line.partition('=')
-                    name = name.strip()
-                    val = val.strip()
-                    if name == 'set noparent':
-                        keep_looking = False
-                    elif name == 'filter':
-                        cfg_filters.append(val)
-                    elif name == 'exclude_files':
-                        # When matching exclude_files pattern, use the base_name of
-                        # the current file name or the directory name we are processing.
-                        # For example, if we are checking for lint errors in /foo/bar/baz.cc
-                        # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
-                        # file's "exclude_files" filter is meant to be checked against "bar"
-                        # and not "baz" nor "bar/baz.cc".
-                        if base_name:
-                            pattern = re.compile(val)
-                            if pattern.match(base_name):
-                                sys.stderr.write(
-                                    'Ignoring "%s": file excluded by "%s". '
-                                    'File path component "%s" matches '
-                                    'pattern "%s"\n' %
-                                    (filename, cfg_file, base_name, val))
-                                return False
-                    elif name == 'linelength':
-                        global _line_length
-                        try:
-                            _line_length = int(val)
-                        except ValueError:
-                            sys.stderr.write('Line length must be numeric.')
-                    else:
-                        sys.stderr.write(
-                            'Invalid configuration option (%s) in file %s\n' %
-                            (name, cfg_file))
-
-        except IOError:
-            sys.stderr.write(
-                "Skipping config file '%s': Can't open for reading\n" %
-                cfg_file)
-            keep_looking = False
-
-    # Apply all the accumulated filters in reverse order (top-level directory
-    # config options having the least priority).
-    for filter in reversed(cfg_filters):
-        _AddFilters(filter)
-
-    return True
-
-
-def ProcessFile(filename, vlevel, extra_check_functions=[]):
-    """Does google-lint on a single file.
-
-  Args:
-    filename: The name of the file to parse.
-
-    vlevel: The level of errors to report.  Every error of confidence
-    >= verbose_level will be reported.  0 is a good default.
-
-    extra_check_functions: An array of additional check functions that will be
-                           run on each source line. Each function takes 4
-                           arguments: filename, clean_lines, line, error
-  """
-
-    _SetVerboseLevel(vlevel)
-    _BackupFilters()
-
-    if not ProcessConfigOverrides(filename):
-        _RestoreFilters()
-        return
-
-    lf_lines = []
-    crlf_lines = []
-    try:
-        # Support the UNIX convention of using "-" for stdin.  Note that
-        # we are not opening the file with universal newline support
-        # (which codecs doesn't support anyway), so the resulting lines do
-        # contain trailing '\r' characters if we are reading a file that
-        # has CRLF endings.
-        # If after the split a trailing '\r' is present, it is removed
-        # below.
-        if filename == '-':
-            lines = codecs.StreamReaderWriter(sys.stdin,
-                                              codecs.getreader('utf8'),
-                                              codecs.getwriter('utf8'),
-                                              'replace').read().split('\n')
-        else:
-            lines = codecs.open(filename, 'r', 'utf8',
-                                'replace').read().split('\n')
-
-        # Remove trailing '\r'.
-        # The -1 accounts for the extra trailing blank line we get from split()
-        for linenum in range(len(lines) - 1):
-            if lines[linenum].endswith('\r'):
-                lines[linenum] = lines[linenum].rstrip('\r')
-                crlf_lines.append(linenum + 1)
-            else:
-                lf_lines.append(linenum + 1)
-
-    except IOError:
-        sys.stderr.write("Skipping input '%s': Can't open for reading\n" %
-                         filename)
-        _RestoreFilters()
-        return
-
-    # Note, if no dot is found, this will give the entire filename as the ext.
-    file_extension = filename[filename.rfind('.') + 1:]
-
-    # When reading from stdin, the extension is unknown, so no cpplint tests
-    # should rely on the extension.
-    if filename != '-' and file_extension not in _valid_extensions:
-        sys.stderr.write('Ignoring %s; not a valid file name '
-                         '(%s)\n' % (filename, ', '.join(_valid_extensions)))
-    else:
-        ProcessFileData(filename, file_extension, lines, Error,
-                        extra_check_functions)
-
-        # If end-of-line sequences are a mix of LF and CR-LF, issue
-        # warnings on the lines with CR.
-        #
-        # Don't issue any warnings if all lines are uniformly LF or CR-LF,
-        # since critique can handle these just fine, and the style guide
-        # doesn't dictate a particular end of line sequence.
-        #
-        # We can't depend on os.linesep to determine what the desired
-        # end-of-line sequence should be, since that will return the
-        # server-side end-of-line sequence.
-        if lf_lines and crlf_lines:
-            # Warn on every line with CR.  An alternative approach might be to
-            # check whether the file is mostly CRLF or just LF, and warn on the
-            # minority, we bias toward LF here since most tools prefer LF.
-            for linenum in crlf_lines:
-                Error(filename, linenum, 'whitespace/newline', 1,
-                      'Unexpected \\r (^M) found; better to use only \\n')
-
-    sys.stdout.write('Done processing %s\n' % filename)
-    _RestoreFilters()
-
-
-def PrintUsage(message):
-    """Prints a brief usage string and exits, optionally with an error message.
-
-  Args:
-    message: The optional error message.
-  """
-    sys.stderr.write(_USAGE)
-    if message:
-        sys.exit('\nFATAL ERROR: ' + message)
-    else:
-        sys.exit(1)
-
-
-def PrintCategories():
-    """Prints a list of all the error-categories used by error messages.
-
-  These are the categories used to filter messages via --filter.
-  """
-    sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
-    sys.exit(0)
-
-
-def ParseArguments(args):
-    """Parses the command line arguments.
-
-  This may set the output format and verbosity level as side-effects.
-
-  Args:
-    args: The command line arguments:
-
-  Returns:
-    The list of filenames to lint.
-  """
-    try:
-        (opts, filenames) = getopt.getopt(args, '', [
-            'help', 'output=', 'verbose=', 'counting=', 'filter=', 'root=',
-            'linelength=', 'extensions=', 'write-success='
-        ])
-    except getopt.GetoptError:
-        PrintUsage('Invalid arguments.')
-
-    verbosity = _VerboseLevel()
-    output_format = _OutputFormat()
-    filters = ''
-    counting_style = ''
-
-    for (opt, val) in opts:
-        if opt == '--help':
-            PrintUsage(None)
-        elif opt == '--output':
-            if val not in ('emacs', 'vs7', 'eclipse'):
-                PrintUsage(
-                    'The only allowed output formats are emacs, vs7 and eclipse.'
-                )
-            output_format = val
-        elif opt == '--verbose':
-            verbosity = int(val)
-        elif opt == '--filter':
-            filters = val
-            if not filters:
-                PrintCategories()
-        elif opt == '--counting':
-            if val not in ('total', 'toplevel', 'detailed'):
-                PrintUsage(
-                    'Valid counting options are total, toplevel, and detailed')
-            counting_style = val
-        elif opt == '--root':
-            global _root
-            _root = val
-        elif opt == '--linelength':
-            global _line_length
-            try:
-                _line_length = int(val)
-            except ValueError:
-                PrintUsage('Line length must be digits.')
-        elif opt == '--extensions':
-            global _valid_extensions
-            try:
-                _valid_extensions = set(val.split(','))
-            except ValueError:
-                PrintUsage('Extensions must be comma seperated list.')
-        elif opt == '--write-success':
-            global _write_success
-            _write_success = val
-
-    if not filenames:
-        PrintUsage('No files were specified.')
-
-    _SetOutputFormat(output_format)
-    _SetVerboseLevel(verbosity)
-    _SetFilters(filters)
-    _SetCountingStyle(counting_style)
-
-    return filenames
-
-
-def main():
-    filenames = ParseArguments(sys.argv[1:])
-
-    # Change stderr to write with replacement characters so we don't die
-    # if we try to print something containing non-ASCII characters.
-    sys.stderr = codecs.StreamReaderWriter(sys.stderr,
-                                           codecs.getreader('utf8'),
-                                           codecs.getwriter('utf8'), 'replace')
-
-    _cpplint_state.ResetErrorCounts()
-    for filename in filenames:
-        ProcessFile(filename, _cpplint_state.verbose_level)
-    _cpplint_state.PrintErrorCounts()
-
-    if _cpplint_state.error_count == 0 and _write_success is not None:
-        with open(_write_success, 'a'):
-            os.utime(_write_success, None)
-
-    sys.exit(_cpplint_state.error_count > 0)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index 33e0ec4ee226126374413382fe8fcbdebdf50f9e..9899eee8841147a509b7997fd905a1b68bc098da 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -259,6 +259,7 @@ function check_style() {
     	eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
     fi
 
+    pip install cpplint
     # set up go environment for running gometalinter
     mkdir -p $GOPATH/src/github.com/PaddlePaddle/
     ln -sf ${PADDLE_ROOT} $GOPATH/src/github.com/PaddlePaddle/Paddle
@@ -414,10 +415,11 @@ function assert_api_not_changed() {
     source .env/bin/activate
     pip install ${PADDLE_ROOT}/build/python/dist/*whl
     python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid,paddle.reader > new.spec
+
     if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then
         # Use sed to make python2 and python3 sepc keeps the same
         sed -i 's/arg0: str/arg0: unicode/g' new.spec
-        sed -i "s/\(.*Transpiler.*\).__init__ ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec
+        sed -i "s/\(.*Transpiler.*\).__init__ (ArgSpec(args=\['self'].*/\1.__init__ /g" new.spec
     fi
     # ComposeNotAligned has significant difference between py2 and py3
     sed -i '/.*ComposeNotAligned.*/d' new.spec
@@ -431,8 +433,8 @@ function assert_api_spec_approvals() {
         BRANCH="develop"
     fi
 
-    API_FILES=("cmake/external"
-               "paddle/fluid/API.spec"
+    API_FILES=("paddle/fluid/API.spec"
+               "python/paddle/fluid/parallel_executor.py"
                "paddle/fluid/framework/operator.h"
                "paddle/fluid/framework/tensor.h"
                "paddle/fluid/framework/lod_tensor.h"
@@ -451,12 +453,21 @@ function assert_api_spec_approvals() {
       echo "checking ${API_FILE} change, PR: ${GIT_PR_ID}, changes: ${API_CHANGE}"
       if [ ${API_CHANGE} ] && [ "${GIT_PR_ID}" != "" ]; then
           # NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
-          APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
-          python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
+          if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
+            APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
+            python ${PADDLE_ROOT}/tools/check_pr_approval.py 2 2887803 35982308`
+          else
+            APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
+            python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 2887803`
+          fi
           echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
           if [ "${APPROVALS}" == "FALSE" ]; then
+            if [ "$API_FILE" == "paddle/fluid/API.spec" ];then
+              echo "You must have panyx0718 and shanyi15 approval for the api change! ${API_FILE}"
+            else
               echo "You must have panyx0718 approval for the api change! ${API_FILE}"
-              exit 1
+            fi
+            exit 1
           fi
       fi
     done
@@ -471,19 +482,6 @@ function assert_api_spec_approvals() {
             exit 1
         fi
     fi
-
-    pip install ${PADDLE_ROOT}/build/opt/paddle/share/wheels/*.whl
-    CHECK_DOCK_MD5=`python ${PADDLE_ROOT}/tools/check_doc_approval.py`
-    if [ "True" != ${CHECK_DOCK_MD5} ]; then
-        APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
-        python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308`
-        echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
-        if [ "${APPROVALS}" == "FALSE" ]; then
-            echo "You must have shanyi15 approval for the api doc change! "
-            exit 1
-        fi
-        echo ${CHECK_DOCK_MD5} >/root/.cache/doc_md5.txt
-    fi
 }
 
 
diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index a9c92efb7218213e1865d4757f1bda2a19b07e93..d12f04a6abefecbb8e3e43fd2f0b87e43264b07f 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -125,14 +125,13 @@ def __bootstrap__():
     os.environ['OMP_NUM_THREADS'] = str(num_threads)
     sysstr = platform.system()
     read_env_flags = [
-        'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_mkldnn',
-        'use_ngraph', 'initial_cpu_memory_in_mb', 'init_allocated_mem',
-        'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size",
-        'eager_delete_tensor_gb', 'fast_eager_deletion_mode',
-        'allocator_strategy', 'reader_queue_speed_test_mode',
-        'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
-        'inner_op_parallelism', 'enable_parallel_graph',
-        'multiple_of_cupti_buffer_size'
+        'check_nan_inf', 'benchmark', 'eager_delete_scope', 'use_ngraph',
+        'initial_cpu_memory_in_mb', 'init_allocated_mem', 'free_idle_memory',
+        'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb',
+        'fast_eager_deletion_mode', 'allocator_strategy',
+        'reader_queue_speed_test_mode', 'print_sub_graph_dir',
+        'pe_profile_fname', 'warpctc_dir', 'inner_op_parallelism',
+        'enable_parallel_graph', 'multiple_of_cupti_buffer_size'
     ]
     if 'Darwin' not in sysstr:
         read_env_flags.append('use_pinned_memory')
@@ -140,6 +139,9 @@ def __bootstrap__():
     if os.name != 'nt':
         read_env_flags.append('cpu_deterministic')
 
+    if core.is_compiled_with_mkldnn():
+        read_env_flags.append('use_mkldnn')
+
     if core.is_compiled_with_dist():
         read_env_flags.append('rpc_deadline')
         read_env_flags.append('rpc_server_profile_path')
diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py
index ab4011383824c67a81feba7f2a20d3aff5f4fc8f..1b7bdfc336a6851d189795a6e65a42b3e92834e9 100644
--- a/python/paddle/fluid/compiler.py
+++ b/python/paddle/fluid/compiler.py
@@ -17,7 +17,6 @@ import os
 import six
 import sys
 from .. import compat as cpt
-from . import framework
 
 from . import core
 from . import framework
@@ -36,6 +35,30 @@ def _place_obj(place):
     return p
 
 
+def _is_pserver_mode(main_program):
+    main = main_program if main_program \
+        else default_main_program()
+    for op in main.global_block().ops:
+        if op.type in ["send", "recv"]:
+            return True
+    return False
+
+
+def get_available_places(use_cuda):
+    if use_cuda:
+        gpus_env = os.getenv("FLAGS_selected_gpus")
+        if gpus_env:
+            gpus = [int(s) for s in gpus_env.split(",")]
+        else:
+            gpus = [i for i in six.moves.range(core.get_cuda_device_count())]
+        places = [core.CUDAPlace(i) for i in gpus]
+    else:
+        cpu_num = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
+        places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
+    assert places, "no place for execution"
+    return places
+
+
 class CompiledProgram(object):
     """
     Compiles to Graph for execution.
@@ -127,8 +150,7 @@ class CompiledProgram(object):
             self._exec_strategy = ExecutionStrategy()
         if self._build_strategy is None:
             self._build_strategy = BuildStrategy()
-        self._build_strategy.is_distribution = framework.is_pserver_mode(
-            self._program)
+        self._build_strategy.is_distribution = _is_pserver_mode(self._program)
         return self
 
     def with_inference_optimize(self, config):
@@ -153,9 +175,9 @@ class CompiledProgram(object):
     def _with_distributed(self):
         raise NotImplementedError()
 
-    def _compile_data_parallel(self):
+    def _compile_data_parallel(self, use_cuda=False, scope=None):
         if self._share_vars_from:
-            if self._scope:
+            if scope:
                 sys.stderr.write("share_vars_from is set, scope is ignored.\n")
             if not self._share_vars_from._is_data_parallel:
                 raise ValueError("share_vars_from is not data parallel. Cannot "
@@ -166,23 +188,11 @@ class CompiledProgram(object):
                     "var to share.")
             self._local_scopes = self._share_vars_from._executor.local_scopes()
         else:
+            assert scope is not None, ""
             self._local_scopes = []
 
-        self._exec_strategy.use_cuda = isinstance(self._place, core.CUDAPlace)
-        if self._exec_strategy.use_cuda:
-            gpus_env = os.getenv("FLAGS_selected_gpus")
-            if gpus_env:
-                gpus = [int(s) for s in gpus_env.split(",")]
-            else:
-                gpus = [
-                    i for i in six.moves.range(core.get_cuda_device_count())
-                ]
-            self._places = [core.CUDAPlace(i) for i in gpus]
-        else:
-            cpu_num = int(
-                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-            self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
-        assert self._places, "no place for execution"
+        self._exec_strategy.use_cuda = use_cuda
+        self._places = get_available_places(self._exec_strategy.use_cuda)
 
         if self._exec_strategy.num_threads == 0:
             if self._exec_strategy.use_cuda:
@@ -197,9 +207,11 @@ class CompiledProgram(object):
         # FIXME(dzhwinter): enable_inplace should be after memory_optimize
         # if turn on python memory optimize, turn off the inplace_pass.
         if self._build_strategy.memory_optimize is None:
-            self._build_strategy.memory_optimize = False if self._program and self._program._is_mem_optimized else True
+            self._build_strategy.memory_optimize = False \
+                if self._program and self._program._is_mem_optimized else True
         if self._build_strategy.enable_inplace is None:
-            self._build_strategy.enable_inplace = False if self._program and self._program._is_mem_optimized else True
+            self._build_strategy.enable_inplace = False \
+                if self._program and self._program._is_mem_optimized else True
 
         # TODO(wuyi): trainer endpoings should be passed in through
         # build_strategy, not program.xxx.
@@ -221,12 +233,12 @@ class CompiledProgram(object):
 
         places = list(map(_place_obj, self._places))
 
-        return core.ParallelExecutor(
-            places,
-            set(self._persistable_vars),
-            cpt.to_text(self._loss_name)
-            if self._loss_name else six.u(''), self._scope, self._local_scopes,
-            self._exec_strategy, self._build_strategy, self._graph)
+        return core.ParallelExecutor(places,
+                                     set(self._persistable_vars),
+                                     cpt.to_text(self._loss_name)
+                                     if self._loss_name else six.u(''), scope,
+                                     self._local_scopes, self._exec_strategy,
+                                     self._build_strategy, self._graph)
 
     def _compile_inference(self):
         return core.create_paddle_predictor(self._infer_config)
@@ -253,7 +265,9 @@ class CompiledProgram(object):
         self._scope = scope
         self._place = place
         if self._is_data_parallel:
-            self._executor = self._compile_data_parallel()
+            self._executor = self._compile_data_parallel(
+                use_cuda=isinstance(self._place, core.CUDAPlace),
+                scope=self._scope)
         elif self._is_inference:
             self._executor = self._compile_inference()
         else:
diff --git a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
index 18b58e6f388bbe9495333b12f32d63b74fddcb3a..622add48430c63a0c4293457127a49dd8d851e35 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -17,7 +17,9 @@ import numpy as np
 import six
 from ..... import compat as cpt
 from .... import core
+from .... import Executor
 from ....framework import IrGraph
+from ....framework import IrNode
 from ....framework import Program
 from ....initializer import Constant
 from .... import unique_name
@@ -31,7 +33,7 @@ __all__ = [
 class QuantizationTransformPass(object):
     def __init__(self,
                  scope=None,
-                 program_exe=None,
+                 place=None,
                  weight_bits=8,
                  activation_bits=8,
                  activation_quantize_type='abs_max',
@@ -45,7 +47,7 @@ class QuantizationTransformPass(object):
             scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
             type, this pass will create some new parameters. The scope is used to
             initialize these new parameters.
-            program_exe(fluid.Executor): program_exe is used to initialize new
+            place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new
             parameters described above.
             weight_bits (int): quantization bit number for weights,
                 the bias is not quantized.
@@ -71,13 +73,13 @@ class QuantizationTransformPass(object):
             from paddle.fluid import core
 
             graph = IrGraph(core.Graph(program.desc), for_test=False)
-            exe = fluid.Executor(fluid.CPUPlace())
+            place = fluid.CPUPlace()
             transform_pass = QuantizationTransformPass(fluid.global_scope(),
-            exe)
+            place)
             transform_pass.apply(graph)
         """
         self._scope = scope
-        self._program_exe = program_exe
+        self._place = place
         self._weight_bits = weight_bits
         self._activation_bits = activation_bits
 
@@ -118,7 +120,7 @@ class QuantizationTransformPass(object):
         self._is_test = graph.is_test()
         # marked the variable which has been dequantized.
         dequantized_vars = collections.OrderedDict()
-        persistable_vars = [p.name() for p in graph.all_persistable_vars()]
+        persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
 
         def _transform_forward(graph, op):
             for var_node in op.inputs:
@@ -149,7 +151,7 @@ class QuantizationTransformPass(object):
 
         if not self._is_test:
             self._create_global_step(graph)
-        ops = graph.all_ops()
+        ops = graph.all_op_nodes()
         # The process of _transform_forward and _transform_backward is needed in two for loops.
         # The loop for transforming the forward graph:
         for op in ops:
@@ -163,8 +165,8 @@ class QuantizationTransformPass(object):
         if len(self._need_initialized) > 0:
             assert self._scope is not None, \
             'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
-            assert self._program_exe is not None, \
-            'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
+            assert self._place is not None, \
+            'The place cannot be set None when activation_quantize_type equals to range_abs_max.'
             init_program = Program()
             for var_desc, initializer in six.iteritems(self._need_initialized):
                 var = init_program.global_block().create_var(
@@ -175,7 +177,8 @@ class QuantizationTransformPass(object):
                     lod_level=var_desc.lod_level(),
                     persistable=var_desc.persistable())
                 initializer(var, init_program.global_block())
-            self._program_exe.run(program=init_program, scope=self._scope)
+            exe = Executor(self._place)
+            exe.run(program=init_program, scope=self._scope)
 
         return graph
 
@@ -183,11 +186,11 @@ class QuantizationTransformPass(object):
         if self._weight_quantize_type == 'range_abs_max' or \
                 self._activation_quantize_type == 'range_abs_max':
             counter_name = cpt.to_text('@STEP_COUNTER@')
-            for node in graph.all_vars():
+            for node in graph.all_var_nodes():
                 if node.name() == counter_name:
                     self._global_step = node
             if self._global_step is None:
-                global_step_in = graph.create_param_node(
+                global_step_in = graph.create_persistable_node(
                     name=counter_name,
                     var_type=core.VarDesc.VarType.LOD_TENSOR,
                     shape=[1],
@@ -228,14 +231,14 @@ class QuantizationTransformPass(object):
 
         quant_var_node = graph.create_var_node(
             name=self._quantized_var_name(var_node.name()),
-            var_type=var_node.var().type(),
-            shape=var_node.var().shape(),
-            var_dtype=var_node.var().dtype())
+            var_type=var_node.type(),
+            shape=var_node.shape(),
+            var_dtype=var_node.dtype())
         scale_var_node = graph.create_var_node(
             name=self._quantized_scale_name(var_node.name()),
-            var_type=var_node.var().type(),
-            shape=var_node.var().shape(),
-            var_dtype=var_node.var().dtype())
+            var_type=var_node.type(),
+            shape=var_node.shape(),
+            var_dtype=var_node.dtype())
         quant_op_node = graph.create_op_node(
             op_type='fake_quantize_abs_max',
             attrs={
@@ -258,15 +261,15 @@ class QuantizationTransformPass(object):
 
         quant_var_node = graph.create_var_node(
             name=self._quantized_var_name(var_node.name()),
-            var_type=var_node.var().type(),
-            shape=var_node.var().shape(),
-            var_dtype=var_node.var().dtype())
+            var_type=var_node.type(),
+            shape=var_node.shape(),
+            var_dtype=var_node.dtype())
 
-        scale_in_node = graph.create_param_node(
+        scale_in_node = graph.create_persistable_node(
             name=self._quantized_scale_name(var_node.name()),
             var_type=core.VarDesc.VarType.LOD_TENSOR,
             shape=[1],
-            var_dtype=var_node.var().dtype())
+            var_dtype=var_node.dtype())
         self._need_initialized[scale_in_node.var()] = Constant(value=0.001)
 
         scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
@@ -275,11 +278,11 @@ class QuantizationTransformPass(object):
 
         if not self._is_test:
             # The name of scales_var_node maybe 'scales_0', 'scales_1', etc.
-            scales_node = graph.create_param_node(
+            scales_node = graph.create_persistable_node(
                 name=unique_name.generate('scales'),
                 var_type=core.VarDesc.VarType.LOD_TENSOR,
                 shape=[self._window_size],
-                var_dtype=var_node.var().dtype())
+                var_dtype=var_node.dtype())
             self._need_initialized[scales_node.var()] = Constant(value=0)
             inputs['Iter'] = self._global_step
             outputs['OutScales'] = scales_node
@@ -314,9 +317,9 @@ class QuantizationTransformPass(object):
 
         dequant_var_node = graph.create_var_node(
             name=self._dequantized_var_name(var_node.name()),
-            var_type=var_node.var().type(),
-            shape=var_node.var().shape(),
-            var_dtype=var_node.var().dtype())
+            var_type=var_node.type(),
+            shape=var_node.shape(),
+            var_dtype=var_node.dtype())
         max_range = (1 << (quant_bits - 1)) - 1
         dequant_op_node = graph.create_op_node(
             op_type='fake_dequantize_max_abs',
@@ -400,22 +403,22 @@ class QuantizationFreezePass(object):
         Args:
             graph(IrGraph): the applied graph.
         """
-        persistable_vars = [p.name() for p in graph.all_persistable_vars()]
-        ops = graph.all_ops()
+        persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
+        ops = graph.all_op_nodes()
         for op_node in ops:
             op_name = op_node.name()
             if op_name in self._fake_quant_op_names:
-                input_arg_name = op_node.op().input('X')[0]
+                input_arg_name = op_node.input('X')[0]
                 if input_arg_name in persistable_vars:
                     if self._weight_quantize_type == 'abs_max':
                         param = self._load_var(input_arg_name)
                         scale_v = np.max(np.abs(param))
                     else:
-                        scale_v = self._load_var(op_node.op().output('OutScale')
-                                                 [0])[0]
+                        scale_v = self._load_var(
+                            op_node.output('OutScale')[0])[0]
                     self._var_scale_map[input_arg_name] = scale_v
                 else:
-                    scale_v = graph.var_node(op_node.op().output('OutScale')[0])
+                    scale_v = graph.var_node(op_node.output('OutScale')[0])
                     self._var_scale_map[input_arg_name] = scale_v
                 if input_arg_name in persistable_vars:
                     self._remove_fake_quant_and_dequant_op(graph, op_node)
@@ -425,13 +428,13 @@ class QuantizationFreezePass(object):
                                                     self._weight_bits)
                     self._restore_var(input_arg_name, quantized_param_v)
 
-        ops = graph.all_ops()
+        ops = graph.all_op_nodes()
         for op_node in ops:
             op_name = op_node.name()
             if op_name in self._fake_dequant_op_names:
                 self._remove_fake_quant_and_dequant_op(graph, op_node)
 
-        ops = graph.all_ops()
+        ops = graph.all_op_nodes()
         for op_node in ops:
             op_name = op_node.name()
             if op_name in self._quantizable_ops:
@@ -451,8 +454,8 @@ class QuantizationFreezePass(object):
         return graph
 
     def _remove_fake_quant_and_dequant_op(self, graph, op_node):
-        k = op_node.op().output('Out')[0]
-        v = op_node.op().input('X')[0]
+        k = op_node.output('Out')[0]
+        v = op_node.input('X')[0]
         if v not in self._op_input_rename_map:
             self._op_input_rename_map[k] = v
         else:
@@ -462,7 +465,7 @@ class QuantizationFreezePass(object):
     def _insert_post_dequant_op(self, graph, op_node):
         max_range = None
         scale_var_node = None
-        persistable_vars = [p.name() for p in graph.all_persistable_vars()]
+        persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
         for var_node in op_node.inputs:
             name = var_node.name()
             if name in self._op_input_rename_map:
@@ -480,7 +483,7 @@ class QuantizationFreezePass(object):
                         original_var_name)
                 max_range = param_range * act_range / scale_v
             else:
-                assert isinstance(scale_v, core.Node)
+                assert isinstance(scale_v, IrNode)
                 scale_var_node = self._var_scale_map[original_var_name]
 
         if len(op_node.outputs) != 1:
@@ -490,9 +493,9 @@ class QuantizationFreezePass(object):
         output_var_node = op_node.outputs[0]
         dequant_var_node = graph.create_var_node(
             name=self._dequantized_var_name(output_var_node.name()),
-            var_type=output_var_node.var().type(),
-            shape=output_var_node.var().shape(),
-            var_dtype=output_var_node.var().dtype())
+            var_type=output_var_node.type(),
+            shape=output_var_node.shape(),
+            var_dtype=output_var_node.dtype())
         dequant_op_node = graph.create_op_node(
             op_type='fake_dequantize_max_abs',
             attrs={
@@ -517,14 +520,19 @@ class QuantizationFreezePass(object):
 
     def _remove_unused_var_nodes(self, graph):
         all_used_vars = set()
-        ops = graph.all_ops()
+        ops = graph.all_op_nodes()
         for op_node in ops:
             for input_node in op_node.inputs:
                 all_used_vars.add(input_node)
             for output_node in op_node.outputs:
                 all_used_vars.add(output_node)
 
-        all_unused_vars = graph.all_vars() - all_used_vars
+        all_used_vars = {n.node for n in all_used_vars}
+        all_unused_vars = {
+            n
+            for n in filter(lambda node: node.node not in all_used_vars,
+                            graph.all_var_nodes())
+        }
         graph.safe_remove_nodes(all_unused_vars)
 
     def _original_var_name(self, var_name):
@@ -583,8 +591,8 @@ class ConvertToInt8Pass(object):
         Args:
             graph(IrGraph): the applied graph.
         """
-        persistable_vars = [p.name() for p in graph.all_persistable_vars()]
-        ops = graph.all_ops()
+        persistable_vars = [p.name() for p in graph.all_persistable_nodes()]
+        ops = graph.all_op_nodes()
         input_map = {}
         for op_node in ops:
             op_name = op_node.name()
@@ -605,10 +613,10 @@ class ConvertToInt8Pass(object):
 
     def _convert_to_int8(self, graph, var_node):
         int8_var_node_name = var_node.name() + ".int8"
-        int8_var_node = graph.create_param_node(
+        int8_var_node = graph.create_persistable_node(
             name=cpt.to_text(int8_var_node_name),
-            var_type=var_node.var().type(),
-            shape=var_node.var().shape(),
+            var_type=var_node.type(),
+            shape=var_node.shape(),
             var_dtype=core.VarDesc.VarType.INT8)
         array = self._load_var(var_node.name())
         self._scope.var(int8_var_node_name)
@@ -624,14 +632,19 @@ class ConvertToInt8Pass(object):
 
     def _remove_unused_var_nodes(self, graph):
         all_used_vars = set()
-        ops = graph.all_ops()
+        ops = graph.all_op_nodes()
         for op_node in ops:
             for input_node in op_node.inputs:
                 all_used_vars.add(input_node)
             for output_node in op_node.outputs:
                 all_used_vars.add(output_node)
 
-        all_unused_vars = graph.all_vars() - all_used_vars
+        all_used_vars = {n.node for n in all_used_vars}
+        all_unused_vars = {
+            n
+            for n in filter(lambda node: node.node not in all_used_vars,
+                            graph.all_var_nodes())
+        }
         graph.safe_remove_nodes(all_unused_vars)
 
 
@@ -655,11 +668,11 @@ class TransformForMobilePass(object):
         Args:
             graph(IrGraph): the graph will be transformed.
         """
-        ops = graph.all_ops()
+        ops = graph.all_op_nodes()
         for op_node in ops:
             name = op_node.name()
             if name in self._fake_quant_op_names:
-                op_node.op().set_type('quantize')
+                op_node.set_type('quantize')
                 quant_node = graph.create_op_node_from_desc(op_node.op())
                 for input_node in op_node.inputs:
                     graph.link_to(input_node, quant_node)
@@ -667,7 +680,7 @@ class TransformForMobilePass(object):
                     graph.link_to(quant_node, output_node)
                 graph.safe_remove_nodes(op_node)
             if name in self._fake_dequant_op_names:
-                op_node.op().set_type('dequantize')
+                op_node.set_type('dequantize')
                 dequant_node = graph.create_op_node_from_desc(op_node.op())
                 for input_node in op_node.inputs:
                     graph.link_to(input_node, dequant_node)
diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph.py b/python/paddle/fluid/contrib/slim/tests/test_graph.py
index 75e0c95b5c3cc06d66eab9de0b85e5d7ed110837..2d2f1384dec65ee19dcade8a46f80bd3f9eb7013 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_graph.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py
@@ -61,16 +61,16 @@ class TestGraph(unittest.TestCase):
             opt.minimize(loss)
         graph = IrGraph(core.Graph(main.desc), for_test=False)
         marked_nodes = set()
-        for op in graph.all_ops():
+        for op in graph.all_op_nodes():
             if op.name().find('conv2d') > -1:
                 marked_nodes.add(op)
         graph.draw('.', 'residual', marked_nodes)
         self.assertFalse(graph.has_circle())
         self.assertEqual(graph.graph_num(), 1)
         nodes = graph.topology_sort()
-        self.assertEqual(len(nodes), len(graph.all_ops()))
+        self.assertEqual(len(nodes), len(graph.all_op_nodes()))
         nodes_map = graph.build_adjacency_list()
-        self.assertEqual(len(nodes_map), len(graph.all_ops()))
+        self.assertEqual(len(nodes_map), len(graph.all_op_nodes()))
         nodes_num = len(graph.all_nodes())
         graph.safe_remove_nodes(marked_nodes)
         self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
index 2f291132f3049af21420f863972792c1a862b9ad..254b73a124734f3693f4757801f0f544d6aa6f27 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -130,15 +130,16 @@ class TestQuantizationTransformPass(unittest.TestCase):
             loss = linear_fc(3)
             opt = fluid.optimizer.Adam(learning_rate=0.001)
             opt.minimize(loss)
-        exe = fluid.Executor(fluid.CPUPlace())
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
         graph = IrGraph(core.Graph(main.desc), for_test=False)
         transform_pass = QuantizationTransformPass(
             scope=fluid.global_scope(),
-            program_exe=exe,
+            place=place,
             activation_quantize_type=quant_type)
         transform_pass.apply(graph)
         marked_nodes = set()
-        for op in graph.all_ops():
+        for op in graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes)
@@ -146,7 +147,7 @@ class TestQuantizationTransformPass(unittest.TestCase):
         self.check_program(transform_pass, program)
         val_graph = IrGraph(core.Graph(program.desc), for_test=False)
         val_marked_nodes = set()
-        for op in val_graph.all_ops():
+        for op in val_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 val_marked_nodes.add(op)
         val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes)
@@ -166,15 +167,16 @@ class TestQuantizationTransformPass(unittest.TestCase):
             loss = residual_block(2)
             opt = fluid.optimizer.Adam(learning_rate=0.001)
             opt.minimize(loss)
-        exe = fluid.Executor(fluid.CPUPlace())
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
         graph = IrGraph(core.Graph(main.desc), for_test=False)
         transform_pass = QuantizationTransformPass(
             scope=fluid.global_scope(),
-            program_exe=exe,
+            place=place,
             activation_quantize_type=quant_type)
         transform_pass.apply(graph)
         marked_nodes = set()
-        for op in graph.all_ops():
+        for op in graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
@@ -182,7 +184,7 @@ class TestQuantizationTransformPass(unittest.TestCase):
         self.check_program(transform_pass, program)
         val_graph = IrGraph(core.Graph(program.desc), for_test=False)
         val_marked_nodes = set()
-        for op in val_graph.all_ops():
+        for op in val_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 val_marked_nodes.add(op)
         val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
@@ -231,17 +233,17 @@ class TestQuantizationFreezePass(unittest.TestCase):
         with fluid.scope_guard(scope):
             exe.run(startup)
         transform_pass = QuantizationTransformPass(
-            scope=scope, program_exe=exe, activation_quantize_type=quant_type)
+            scope=scope, place=place, activation_quantize_type=quant_type)
         transform_pass.apply(main_graph)
         transform_pass.apply(test_graph)
         dev_name = '_gpu_' if use_cuda else '_cpu_'
         marked_nodes = set()
-        for op in main_graph.all_ops():
+        for op in main_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
         marked_nodes = set()
-        for op in test_graph.all_ops():
+        for op in test_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
@@ -251,11 +253,6 @@ class TestQuantizationFreezePass(unittest.TestCase):
         iters = 5
         batch_size = 8
 
-        #train_exe = fluid.ParallelExecutor(
-        #    main_program=quantized_main_program,
-        #    use_cuda=bool(use_cuda),
-        #    loss_name=loss.name,
-        #    scope=scope)
         train_reader = paddle.batch(
             paddle.reader.shuffle(
                 paddle.dataset.mnist.train(), buf_size=500),
@@ -269,9 +266,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
                 loss_v = exe.run(program=quantized_main_program,
                                  feed=feeder.feed(data),
                                  fetch_list=[loss])
-                #loss_v = train_exe.run(feed=feeder.feed(data),
-                #                       fetch_list=[loss.name])
-                #print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
+                print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
 
         test_data = next(test_reader())
         with fluid.program_guard(quantized_test_program):
@@ -287,7 +282,7 @@ class TestQuantizationFreezePass(unittest.TestCase):
         freeze_pass = QuantizationFreezePass(scope=scope, place=place)
         freeze_pass.apply(test_graph)
         marked_nodes = set()
-        for op in test_graph.all_ops():
+        for op in test_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
@@ -299,21 +294,21 @@ class TestQuantizationFreezePass(unittest.TestCase):
                                   feed=feeder.feed(test_data),
                                   fetch_list=[loss])
         self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
-        #print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
-        #print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
+        print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
+        print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
         w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
         # Maybe failed, this is due to the calculation precision
         # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
-        #print('{}: {}'.format('w_freeze' + dev_name + quant_type,
-        #                      np.sum(w_freeze)))
-        #print('{}: {}'.format('w_quant' + dev_name + quant_type,
-        #                      np.sum(w_quant)))
+        print('{}: {}'.format('w_freeze' + dev_name + quant_type,
+                              np.sum(w_freeze)))
+        print('{}: {}'.format('w_quant' + dev_name + quant_type,
+                              np.sum(w_quant)))
 
         # Convert parameter to 8-bit.
         convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
         convert_int8_pass.apply(test_graph)
         marked_nodes = set()
-        for op in test_graph.all_ops():
+        for op in test_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         test_graph.draw('.', 'test_int8' + dev_name + quant_type, marked_nodes)
@@ -330,14 +325,14 @@ class TestQuantizationFreezePass(unittest.TestCase):
         w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
         self.assertEqual(w_8bit.dtype, np.int8)
         self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
-        #print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
-        #print('{}: {}'.format('w_freeze' + dev_name + quant_type,
-        #                      np.sum(w_freeze)))
+        print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
+        print('{}: {}'.format('w_freeze' + dev_name + quant_type,
+                              np.sum(w_freeze)))
 
         mobile_pass = TransformForMobilePass()
         mobile_pass.apply(test_graph)
         marked_nodes = set()
-        for op in test_graph.all_ops():
+        for op in test_graph.all_op_nodes():
             if op.name().find('quantize') > -1:
                 marked_nodes.add(op)
         test_graph.draw('.', 'test_mobile' + dev_name + quant_type,
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index c0191a34deaa5ab3b53b5fe4f33cb5449a2db8b3..dfa50e721c979703165649dccfd6e42ef08e97b7 100644
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -261,45 +261,42 @@ def _as_lodtensor(data, place):
 
 class Executor(object):
     """
-    An Executor in Python, only support the single-GPU running. For multi-cards, please refer to
-    ParallelExecutor.
-    Python executor takes a program, add feed operators and fetch operators to this program according
+    An Executor in Python, supports single/multiple-GPU running, and single/multiple-CPU running.
+    Python executor takes a program, adds feed operators and fetch operators to this program according
     to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides
-    the variables(or names) that user want to get after program run. Note: the executor will run all
+    the variables(or names) that user wants to get after program runs. Note: the executor will run all
     operators in the program but not only the operators dependent by the fetch_list.
-    It store the global variables into the global scope, and create a local scope for the temporary
-    variables. The local scope contents will be discarded after every minibatch forward/backward finished.
-    But the global scope variables will be persistent through different runs.
-    All of ops in program will be running in sequence.
+    It stores the global variables into the global scope, and creates a local scope for the temporary
+    variables. The contents in local scope may be discarded after every minibatch forward/backward
+    finished. But the global scope variables will be persistent through different runs.
 
 
     Example:
-    .. code-block:: python
-        # First create the Executor.
-        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-        exe = fluid.Executor(place)
-
-        # Run the startup program once and only once.
-        # Not need to optimize/compile the startup program.
-        exe.run(fluid.default_startup_program())
-
-        # Run the main program directly without compile.
-        loss, = exe.run(fluid.default_main_program(),
-                        feed=feed_dict,
-                        fetch_list=[loss.name])
-        # Or, compiled the program and run. See `CompiledProgram` for more detail.
-        compiled_prog = compiler.CompiledProgram(
-            fluid.default_main_program()).with_data_parallel(
-            loss_name=loss.name)
-        loss, = exe.run(compiled_prog,
-                        feed=feed_dict,
-                        fetch_list=[loss.name])
+
+        .. code-block:: python
+
+            # First create the Executor.
+            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+            exe = fluid.Executor(place)
+
+            # Run the startup program once and only once.
+            # Not need to optimize/compile the startup program.
+            exe.run(fluid.default_startup_program())
+
+            # Run the main program directly without compile.
+            loss, = exe.run(fluid.default_main_program(),
+                            feed=feed_dict,
+                            fetch_list=[loss.name])
+            # Or, compiled the program and run. See `CompiledProgram` for more detail.
+            compiled_prog = compiler.CompiledProgram(
+                fluid.default_main_program()).with_data_parallel(
+                loss_name=loss.name)
+            loss, = exe.run(compiled_prog,
+                            feed=feed_dict,
+                            fetch_list=[loss.name])
 
     Args:
         place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device
-
-    Note: For debugging complicated network in parallel-GPUs, you can test it on the executor.
-    They has the exactly same arguments, and expected the same results.
     """
 
     def __init__(self, place):
@@ -382,6 +379,12 @@ class Executor(object):
         ]
         return outs
 
+    '''
+    TODO(typhoonzero): Define "no longer use" meaning? Can user create
+    a new Executor for the same program and run?
+    TODO(panyx0718): Why ParallelExecutor doesn't have close?
+    '''
+
     def close(self):
         """
         Close this executor.
@@ -389,9 +392,6 @@ class Executor(object):
         You can no longer use this executor after calling this method.
         For the distributed training, this method would free the resource on PServers related to
         the current Trainer.
-        TODO(typhoonzero): Define "no longer use" meaning? Can user create
-        a new Executor for the same program and run?
-        TODO(panyx0718): Why ParallelExecutor doesn't have close?
 
         Example:
             >>> cpu = core.CPUPlace()
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 7fe3120acc756d9335e5adbc2e8cfb4270e41c02..7dc9178807c76b44c9aeb00054188ad1dbe18f0a 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -87,15 +87,6 @@ def _current_expected_place():
     return _imperative_current_expected_place_
 
 
-def is_pserver_mode(main_program):
-    main = main_program if main_program \
-        else default_main_program()
-    for op in main.global_block().ops:
-        if op.type in ["send", "recv"]:
-            return True
-    return False
-
-
 class NameScope(object):
     def __init__(self, name="", parent=None):
         self._children = dict()
@@ -393,6 +384,9 @@ class Variable(object):
             if not self._ivar:
                 self._ivar = core.VarBase(stop_gradient)
             self._ivar.desc = self.desc
+            self._ivar.block = block.desc
+            self._ivar.name = name
+            self._ivar.persistable = persistable
             if persistable:
                 self.block.vars[name] = self
         else:
@@ -721,7 +715,9 @@ class Operator(object):
                 out_arg_names = []
                 for arg in out_args:
                     out_arg_names.append(cpt.to_text(arg.name))
-                    arg.op = self
+                    # TODO(minqiyang): could we remove variable's op in static mode?
+                    if not _in_imperative_mode():
+                        arg.op = self
                 self.desc.set_output(out_proto.name, out_arg_names)
 
         if op_attrs is not None:
@@ -1200,15 +1196,6 @@ class Block(object):
         else:
             raise ValueError("Var {0} is not found recursively".format(name))
 
-    def _clear_block(self):
-        # TODO(minqiyang): move this to backward_hooks
-        self.desc._clear_block()
-
-        for name in self.vars.keys():
-            assert self.vars[name].persistable
-
-        del self.ops[:]
-
     def all_parameters(self):
         return list(self.iter_parameters())
 
@@ -1345,26 +1332,13 @@ class Block(object):
             #
             # TODO(minqiyang): add op stop_gradient support in static mode too.
             # currently, we only support stop_gradient in imperative mode.
-            self._trace_op(op, kwargs.get("stop_gradient", False))
-        self.ops.append(op)
+            _imperative_tracer().trace_op(op,
+                                          kwargs.get("stop_gradient", False))
+        else:
+            self.ops.append(op)
 
         return op
 
-    def _trace_op(self, op, stop_gradient=False):
-        backward_refs = _imperative_tracer().trace(
-            op.iop, op.inputs, op.outputs, self.desc,
-            _imperative_current_expected_place_, stop_gradient)
-
-        # TODO(minqiyang): support backward_hooks to eager remove backward_refs
-        op.backward_refs = defaultdict(list)
-        for k, v in six.iteritems(op.inputs):
-            if k in backward_refs:
-                op.backward_refs[k] = op.inputs[k]
-
-        for k, v in six.iteritems(op.outputs):
-            if k in backward_refs:
-                op.backward_refs[k] = op.outputs[k]
-
     def _insert_op(self, index, *args, **kwargs):
         """
         Insert a Operator according to the giving arguments.
@@ -1417,9 +1391,11 @@ class Block(object):
             inputs=kwargs.get("inputs", None),
             outputs=kwargs.get("outputs", None),
             attrs=kwargs.get("attrs", None))
-        self.ops.insert(0, op)
         if _in_imperative_mode():
-            self._trace_op(op, kwargs.get("stop_gradient", False))
+            _imperative_tracer().trace_op(op,
+                                          kwargs.get("stop_gradient", False))
+        else:
+            self.ops.insert(0, op)
         return op
 
     def _sync_with_cpp(self):
@@ -1566,10 +1542,397 @@ class Block(object):
         return ret_var
 
 
+class IrNode(object):
+    """
+    Python IrNode. Beneath it is a core.Node, which is used for Ir Pass.
+    """
+
+    def __init__(self, node):
+        """
+        Construct an IrNode using core.Node.
+
+        Args:
+            node(core.Node): C++ Node.
+        """
+        assert isinstance(node,
+                          core.Node), 'node must be the instance of core.Node.'
+        self.node = node
+
+    def name(self):
+        """
+        Return the node name.
+
+        Returns:
+            str: node name.
+        """
+        return self.node.name()
+
+    def node_type(self):
+        """
+        Return the node type.
+
+        Returns:
+            core.Node.Type: node type(core.Node.Type.Operation or core.Node.Type.Variable).
+        """
+        return self.node.node_type()
+
+    def var(self):
+        """
+        Return the node variable description.
+
+        Returns:
+            core.VarDesc: node variable description.
+        """
+        return self.node.var()
+
+    def op(self):
+        """
+        Return the node operator description.
+
+        Returns:
+            core.OpDesc: node operator description.
+        """
+        return self.node.op()
+
+    def id(self):
+        """
+        Return the node id.
+
+        Returns:
+            int: node id.
+        """
+        return self.node.id()
+
+    def is_op(self):
+        """
+        If the node is an operator, then return true.
+
+        Returns:
+            bool: indicate whether the node is an operator.
+        """
+        return self.node.is_op()
+
+    def is_var(self):
+        """
+        If the node is a variable, then return true.
+
+        Returns:
+            bool: indicate whether the node is a variable.
+        """
+        return self.node.is_var()
+
+    def is_ctrl_var(self):
+        """
+        If the node is a control dependence variable, then return true.
+
+        Returns:
+            bool: indicate whether the node is a control dependence variable.
+        """
+        return self.node.is_ctrl_var()
+
+    def clear_inputs(self):
+        """
+        Clear the node inputs. After executing the `clear_inputs` function,
+        the node inputs will be empty.
+        """
+        self.node.clear_inputs()
+
+    def remove_input_by_id(self, node_id):
+        """
+        Remove a node from inputs by the given node id.
+
+        Args:
+            node_id(int): the given node id.
+        """
+        self.node.remove_input(node_id)
+
+    def remove_input(self, node):
+        """
+        Remove a node from inputs.
+
+        Args:
+            node(IrNode): the node being removed.
+        """
+        self.node.remove_input(node.node)
+
+    def append_input(self, node):
+        """
+        Append a node in inputs.
+
+        Args:
+            node(IrNode): the node being appended.
+        """
+        self.node.append_input(node.node)
+
+    def clear_outputs(self):
+        """
+        Clear the node outputs. After executing the `clear_outputs` function,
+        the node outputs will be empty.
+        """
+        self.node.clear_outputs()
+
+    def remove_output_by_id(self, node_id):
+        """
+        Remove a node from outputs by the given node id.
+
+        Args:
+            node_id(int): the given node id.
+        """
+        self.node.remove_output(node_id)
+
+    def remove_output(self, node):
+        """
+        Remove a node from outputs.
+
+        Args:
+            node(IrNode): the node being removed.
+        """
+        self.node.remove_output(node.node)
+
+    def append_output(self, node):
+        """
+        Append a node in outputs.
+
+        Args:
+            node(IrNode): the node being appended.
+        """
+        self.node.append_output(node.node)
+
+    @property
+    def inputs(self):
+        """
+        Return the node inputs.
+
+        Returns:
+            list(IrNode): node inputs wrapped by IrNode.
+        """
+        return [IrNode(n) for n in self.node.inputs]
+
+    @property
+    def outputs(self):
+        """
+        Return the node outputs.
+
+        Returns:
+            list(IrNode): node outputs wrapped by IrNode.
+        """
+        return [IrNode(n) for n in self.node.outputs]
+
+
+class IrVarNode(IrNode):
+    """
+    Python IrVarNode. Beneath it is a core.Node, it inherits from IrNode.
+    """
+
+    def __init__(self, node):
+        """
+        Construct an IrVarNode using core.Node.
+
+        Args:
+            node(core.Node): C++ Node.
+        """
+        assert isinstance(node, core.Node) and node.is_var(), \
+            'node must be the instance of core.Node and it must be a variable node.'
+        super(IrVarNode, self).__init__(node)
+        self.node = node
+
+    def set_shape(self, shape):
+        """
+        Set the node variable shape.
+
+        Args:
+            shape(list): shape to be set.
+        """
+        assert self.node.var() is not None, \
+            "The node variable description cannot be None."
+        self.node.var().set_shape(shape)
+
+    def persistable(self):
+        """
+        If the variable node is a persistable variable, then return true.
+
+        Returns:
+            bool: indicate whether the variable is persistable.
+        """
+        assert self.node.var() is not None, \
+            "The node variable description cannot be None."
+        return self.node.var().persistable()
+
+    def type(self):
+        """
+        Return the variable type.
+
+        Returns:
+            core.VarDesc.VarType: the variable type.
+        """
+        assert self.node.var() is not None, \
+            "The node variable description cannot be None."
+        return self.node.var().type()
+
+    def dtype(self):
+        """
+        Return the variable data type.
+
+        Returns:
+            core.VarDesc.VarType: the variable data type.
+        """
+        assert self.node.var() is not None, \
+            "The node variable description cannot be None."
+        return self.node.var().dtype()
+
+    def shape(self):
+        """
+        Return the variable shape.
+
+        Returns:
+            list: the variable shape.
+        """
+        assert self.node.var() is not None, \
+            "The node variable description cannot be None."
+        return self.node.var().shape()
+
+    @property
+    def inputs(self):
+        """
+        Return the node inputs.
+
+        Returns:
+            list(IrOpNode): node inputs wrapped by IrOpNode.
+        """
+        return [IrOpNode(n) for n in self.node.inputs]
+
+    @property
+    def outputs(self):
+        """
+        Return the node outputs.
+
+        Returns:
+            list(IrOpNode): node outputs wrapped by IrOpNode.
+        """
+        return [IrOpNode(n) for n in self.node.outputs]
+
+
+class IrOpNode(IrNode):
+    """
+    Python IrOpNode. Beneath it is a core.Node, it inherits from IrNode.
+    """
+
+    def __init__(self, node):
+        """
+        Construct an IrOpNode using core.Node.
+
+        Args:
+            node(core.Node): C++ Node.
+        """
+        assert isinstance(node, core.Node) and node.is_op(), \
+            'node must be the instance of core.Node and it must be a operator node.'
+        super(IrOpNode, self).__init__(node)
+        self.node = node
+
+    def rename_input(self, old_input_name, new_input_name):
+        """
+        Rename the input of this node.
+
+        Args:
+            old_input_name(str): the old input name.
+            new_input_name(str): the new input name.
+        """
+        assert self.node.op() is not None, \
+            "The node operator description cannot be None."
+        self.node.op()._rename_input(old_input_name, new_input_name)
+
+    def input(self, name):
+        """
+        Get the argument name list by the parameter name for input.
+
+        Args:
+            name(str): the parameter name.
+
+        Returns:
+            list(str): the argument name list.
+        """
+        assert self.node.op() is not None, \
+            "The node operator description cannot be None."
+        return self.node.op().input(name)
+
+    def output(self, name):
+        """
+        Get the argument name list by the parameter name for output.
+
+        Args:
+            name(str): the parameter name.
+
+        Returns:
+            list(str): the argument name list.
+        """
+        assert self.node.op() is not None, \
+            "The node operator description cannot be None."
+        return self.node.op().output(name)
+
+    def set_type(self, new_type):
+        """
+        Change the operator type into new type.
+
+        Args:
+            new_type(str): new operator type to be set.
+        """
+        assert self.node.op() is not None, \
+            "The node operator description cannot be None."
+        return self.node.op().set_type(new_type)
+
+    def set_attr(self, name, val):
+        """
+        Set the value of attribute by attribute's name.
+
+        Args:
+            name(str): the attribute name.
+            val(bool|int|str|float|list): the value of the attribute.
+        """
+        self._update_desc_attr(name, val)
+
+    def _update_desc_attr(self, name, val):
+        """
+        Update the value of the op desc's attribute by attribute's name.
+        """
+        assert self.node.op() is not None, \
+            "The node operator description cannot be None."
+        desc = self.node.op()
+        if isinstance(val, Block):
+            desc.set_block_attr(name, val.desc)
+        elif isinstance(val, list) and val and \
+            all(isinstance(v, Block) for v in val):
+            desc.set_blocks_attr(name, [v.desc for v in val])
+        elif isinstance(val, core.BlockDesc) or \
+            isinstance(val, core.ProgramDesc):
+            desc.set_serialized_attr(name, val.serialize_to_string())
+        else:
+            desc._set_attr(name, val)
+
+    @property
+    def inputs(self):
+        """
+        Return the node inputs.
+
+        Returns:
+            list(IrVarNode): node inputs wrapped by IrVarNode.
+        """
+        return [IrVarNode(n) for n in self.node.inputs]
+
+    @property
+    def outputs(self):
+        """
+        Return the node outputs.
+
+        Returns:
+            list(IrVarNode): node outputs wrapped by IrVarNode.
+        """
+        return [IrVarNode(n) for n in self.node.outputs]
+
+
 class IrGraph(object):
     """
     Python IrGraph. Beneath it is a core.Graph, which is used for
-    create a c++ Ir Pass Graph. An IrGraph is just a graph view of
+    creating a c++ Ir Pass Graph. An IrGraph is just a graph view of
     a Program. In an IrGraph, both Variables and Operators are graph
     nodes.
     """
@@ -1597,15 +1960,15 @@ class IrGraph(object):
         """
         Return all nodes included in the graph as a set.
         """
-        return {node for node in self.graph.nodes()}
+        return {IrNode(node) for node in self.graph.nodes()}
 
-    def all_vars(self):
+    def all_var_nodes(self):
         """
         Return all variable nodes included in the graph as a set.
         """
-        return {node for node in self.graph.nodes() if node.is_var()}
+        return {IrVarNode(node) for node in self.graph.nodes() if node.is_var()}
 
-    def all_persistable_vars(self):
+    def all_persistable_nodes(self):
         """
         Return all persistable variable nodes included in the graph as a set.
         """
@@ -1614,13 +1977,13 @@ class IrGraph(object):
             if node.is_var() and node.var() is not None and node.var(
             ).persistable():
                 persistable_nodes.add(node)
-        return persistable_nodes
+        return {IrVarNode(p) for p in persistable_nodes}
 
-    def all_ops(self):
+    def all_op_nodes(self):
         """
         Return all operator nodes included in the graph as a set.
         """
-        return {node for node in self.graph.nodes() if node.is_op()}
+        return {IrOpNode(node) for node in self.graph.nodes() if node.is_op()}
 
     def var_node(self, name):
         """
@@ -1634,14 +1997,14 @@ class IrGraph(object):
             doesn't have a variable with the giving name.
 
         Returns:
-            core.Node: the variable node with the giving name.
+            IrVarNode: the variable node with the giving name.
         """
         if not isinstance(name, six.string_types):
             raise TypeError(
                 "var require string as parameter, but get %s instead." %
                 (type(name)))
         target_var_node = None
-        var_nodes = self.all_vars()
+        var_nodes = self.all_var_nodes()
         for var_node in var_nodes:
             if var_node.name() == name:
                 target_var_node = var_node
@@ -1649,7 +2012,7 @@ class IrGraph(object):
             raise ValueError("var_node %s not in this graph" % name)
         return target_var_node
 
-    def create_param_node(self, name, var_type, shape, var_dtype):
+    def create_persistable_node(self, name, var_type, shape, var_dtype):
         """
         Create a persistable variable node in the graph. In IrGraph,
         it can not distinguish between persistable variables and parameters.
@@ -1661,14 +2024,14 @@ class IrGraph(object):
             var_dtype(core.VarDesc.VarType): the data type of the persistable variable node.
 
         Returns:
-            core.Node: the created persistable variable node.
+            IrVarNode: the created persistable variable node.
         """
         var_desc = core.VarDesc(name)
         var_desc.set_type(var_type)
         var_desc.set_shape(shape)
         var_desc.set_dtype(var_dtype)
         var_desc.set_persistable(True)
-        return self.graph.create_var_node(var_desc)
+        return IrVarNode(self.graph.create_var_node(var_desc))
 
     def create_var_node(self, name, var_type, shape, var_dtype):
         """
@@ -1682,14 +2045,14 @@ class IrGraph(object):
             var_dtype(core.VarDesc.VarType): the data type of the variable node.
 
         Returns:
-            core.Node: the created variable node.
+            IrVarNode: the created variable node.
         """
 
         var_desc = core.VarDesc(name)
         var_desc.set_type(var_type)
         var_desc.set_shape(shape)
         var_desc.set_dtype(var_dtype)
-        return self.graph.create_var_node(var_desc)
+        return IrVarNode(self.graph.create_var_node(var_desc))
 
     def create_var_node_from_desc(self, var_desc):
         """
@@ -1700,9 +2063,9 @@ class IrGraph(object):
             var_desc(core.VarDesc): the giving variable description.
 
         Returns:
-            core.Node: the created variable node.
+            IrVarNode: the created variable node.
         """
-        return self.graph.create_var_node(var_desc)
+        return IrVarNode(self.graph.create_var_node(var_desc))
 
     def create_op_node(self, op_type, attrs, inputs, outputs):
         """
@@ -1715,7 +2078,7 @@ class IrGraph(object):
             outputs(dict): the outpus of the operator node.
 
         Returns:
-            core.Node: the created operator node.
+            IrOpNode: the created operator node.
         """
         op_desc = core.OpDesc()
         op_desc.set_type(op_type)
@@ -1731,7 +2094,7 @@ class IrGraph(object):
                 var_nodes = [var_nodes]
             op_desc.set_output(output_name,
                                [var_node.name() for var_node in var_nodes])
-        return self.graph.create_op_node(op_desc)
+        return IrOpNode(self.graph.create_op_node(op_desc))
 
     def create_op_node_from_desc(self, op_desc):
         """
@@ -1741,40 +2104,40 @@ class IrGraph(object):
             op_desc(core.VarDesc): the giving operator description.
 
         Returns:
-            core.Node: the created operator node.
+            IrOpNode: the created operator node.
         """
-        return self.graph.create_op_node(op_desc)
+        return IrOpNode(self.graph.create_op_node(op_desc))
 
     def update_input_link(self, old_input_node, new_input_node, op_node):
         """
         Update the input's link of a operator node.
 
         Args:
-            old_input_node(core.Node): the old input node of the giving op_node.
-            new_input_node(core.Node): the new input node of the giving op_node.
-            op_node(core.Node): the operator node that is needed to update input's link.
+            old_input_node(IrNode): the old input node of the giving op_node.
+            new_input_node(IrNode): the new input node of the giving op_node.
+            op_node(IrOpNode): the operator node that is needed to update input's link.
         """
-        assert old_input_node in self.graph.nodes() and new_input_node in \
-        self.graph.nodes() and op_node in self.graph.nodes(), \
+        assert old_input_node.node in self.graph.nodes() and new_input_node.node in \
+        self.graph.nodes() and op_node.node in self.graph.nodes(), \
         'The three arguments(old_input_node&new_input_node&op_node) must be in the graph nodes.'
-        old_input_node.outputs_remove(op_node)
-        op_node.inputs_remove(old_input_node)
-        new_input_node.outputs_append(op_node)
-        op_node.inputs_append(new_input_node)
-        op_node.op()._rename_input(old_input_node.name(), new_input_node.name())
+        old_input_node.remove_output(op_node)
+        op_node.remove_input(old_input_node)
+        new_input_node.append_output(op_node)
+        op_node.append_input(new_input_node)
+        op_node.rename_input(old_input_node.name(), new_input_node.name())
 
     def link_to(self, node_in, node_out):
         """
         Connect two nodes.
 
         Args:
-            node_in(core.Node): the input node.
-            node_out(core.Node): the output node.
+            node_in(IrNode): the input node.
+            node_out(IrNode): the output node.
         """
-        assert node_in in self.graph.nodes() and node_out in self.graph.nodes(), \
+        assert node_in.node in self.graph.nodes() and node_out.node in self.graph.nodes(), \
             'The two arguments(node_in&node_out) must be in the graph nodes.'
-        node_in.outputs_append(node_out)
-        node_out.inputs_append(node_in)
+        node_in.append_output(node_out)
+        node_out.append_input(node_in)
 
     def safe_remove_nodes(self, remove_nodes):
         """
@@ -1789,7 +2152,8 @@ class IrGraph(object):
                 remove_nodes = set(remove_nodes)
             else:
                 remove_nodes = {remove_nodes}
-        core.graph_safe_remove_nodes(self.graph, remove_nodes)
+        original_nodes = {n.node for n in remove_nodes}
+        core.graph_safe_remove_nodes(self.graph, original_nodes)
 
     def has_circle(self):
         """
@@ -1816,18 +2180,23 @@ class IrGraph(object):
         Notes: the `graph` cannot contain a circle.
 
         Returns:
-            set(core.Node): nodes in topology order.
+            set(IrNode): nodes in topology order.
         """
-        return core.topology_sort(self.graph)
+        ordered_nodes = core.topology_sort(self.graph)
+        return {IrNode(n) for n in ordered_nodes}
 
     def build_adjacency_list(self):
         """
         Build an adjacency list of operations for the `graph`.
 
         Returns:
-            dict{core.Node: set(core.Node)}: the adjacency list.
+            dict{IrNode: set(IrNode)}: the adjacency list.
         """
-        return core.build_adjacency_list(self.graph)
+        adj_list = core.build_adjacency_list(self.graph)
+        wrapped_adj_list = dict()
+        for k, v in six.iteritems(adj_list):
+            wrapped_adj_list[IrNode(k)] = {IrNode(n) for n in v}
+        return wrapped_adj_list
 
     def draw(self, save_path, name, marked_nodes=None, remove_ctr_var=True):
         """
@@ -1837,7 +2206,7 @@ class IrGraph(object):
         Args:
             save_path(str): the save path of drawn graph.
             name(str): the name of drawn graph.
-            marked_nodes(set(core.Node)): nodes that are needed to be marked.
+            marked_nodes(set(IrNode)): nodes that are needed to be marked.
             Default value is None.
             remove_ctr_var(bool): If it is set True, all control variable nodes
             in the graph will be removed. Default value is True.
@@ -1852,20 +2221,22 @@ class IrGraph(object):
                 print('The {} is saved as the dot filetype.'.format(
                     dot_file_path))
 
+        remove_ctr_vars = set()
         if remove_ctr_var:
-            remove_ctr_vars = set()
-            for node in self.graph.nodes():
+            for node in self.all_var_nodes():
                 if node.is_ctrl_var():
                     remove_ctr_vars.add(node)
             self.safe_remove_nodes(remove_ctr_vars)
-        ops_num = 0
-        for node in self.graph.nodes():
-            if node.is_op():
-                ops_num += 1
-        print('Total ops num = {}.'.format(ops_num))
+        print('Total ops num = {}.'.format(len(self.all_op_nodes())))
+
         if marked_nodes is not None:
             if not isinstance(marked_nodes, set):
-                marked_nodes = set(marked_nodes)
+                if isinstance(marked_nodes, Iterable):
+                    marked_nodes = set(marked_nodes)
+                else:
+                    marked_nodes = {marked_nodes}
+            marked_nodes = {n.node for n in marked_nodes}
+            remove_ctr_vars = {n.node for n in remove_ctr_vars}
             marked_nodes = marked_nodes - remove_ctr_vars
             if self.graph.has('__graphviz__marked_node__'):
                 self.graph.erase('__graphviz__marked_node__')
diff --git a/python/paddle/fluid/imperative/__init__.py b/python/paddle/fluid/imperative/__init__.py
index 54dc794ea6392fac6f266477fe045b37001a8666..034a11e0a6049c17800c8fd5aab5bc2291320169 100644
--- a/python/paddle/fluid/imperative/__init__.py
+++ b/python/paddle/fluid/imperative/__init__.py
@@ -23,7 +23,11 @@ from .layers import *
 from . import nn
 from .nn import *
 
+from . import tracer
+from .tracer import *
+
 __all__ = []
 __all__ += layers.__all__
 __all__ += base.__all__
 __all__ += nn.__all__
+__all__ += tracer.__all__
diff --git a/python/paddle/fluid/imperative/base.py b/python/paddle/fluid/imperative/base.py
index d4525233cc681720404770ef1d0c5d3006607a2e..174f138bfa2d3cfaa433c3235c2b0f9a5650e756 100644
--- a/python/paddle/fluid/imperative/base.py
+++ b/python/paddle/fluid/imperative/base.py
@@ -16,6 +16,7 @@ import numpy as np
 
 from paddle.fluid import core
 from paddle.fluid import framework
+from .tracer import Tracer
 
 __all__ = ['enabled', 'guard', 'to_variable']
 
@@ -28,7 +29,7 @@ def enabled():
 def guard(place=None):
     train = framework.Program()
     startup = framework.Program()
-    tracer = core.Tracer(train.current_block().desc)
+    tracer = Tracer(train.current_block().desc)
 
     if place is None:
         if core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/imperative/tracer.py b/python/paddle/fluid/imperative/tracer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1064ad63e7103acde9bb8106b7791441ce68849b
--- /dev/null
+++ b/python/paddle/fluid/imperative/tracer.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import six
+
+from collections import defaultdict
+from paddle.fluid import core
+from paddle.fluid import framework
+
+__all__ = ['Tracer']
+
+
+def release_op(op):
+    del framework._imperative_tracer()._ops[op._trace_id]
+
+
+class Tracer(core.Tracer):
+    """
+    Python wrapper of imperative tracer
+    """
+
+    def __init__(self, block):
+        super(Tracer, self).__init__(block)
+
+        self._ops = defaultdict()
+        self._trace_id = 0
+
+    def trace_op(self, op, stop_gradient=False):
+        # record op's trace id
+        op.iop._trace_id = self._trace_id
+
+        # trace op and save it
+        backward_refs = self.trace(op.iop, op.inputs, op.outputs, op.block.desc,
+                                   framework._current_expected_place(),
+                                   stop_gradient)
+
+        if not stop_gradient:
+            self._trace_id += 1
+            self._ops[op.iop._trace_id] = op
+
+            # register backward hooks and variables if needed
+            if len(backward_refs) > 0:
+                op.iop.register_backward_hooks(release_op)
+
+                # TODO(minqiyang): remove all inputs and outputs after seperate
+                # var and grad
+                op.backward_refs = defaultdict(list)
+                for k, v in six.iteritems(op.inputs):
+                    if k in backward_refs:
+                        op.backward_refs[k] = op.inputs[k]
+
+                for k, v in six.iteritems(op.outputs):
+                    if k in backward_refs:
+                        op.backward_refs[k] = op.outputs[k]
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index e8341be28683a25971a53a37c70533a16add1593..190e7b5608a0cdf156b449e919e108a0917a0980 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -19,6 +19,7 @@ import numpy as np
 from .wrapped_decorator import signature_safe_contextmanager
 from .core import VarDesc
 from . import unique_name
+from .imperative import base as imperative_base
 
 __all__ = [
     'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
@@ -165,7 +166,8 @@ class ConstantInitializer(Initializer):
                 'force_cpu': self._force_cpu or force_init_on_cpu()
             },
             stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -244,7 +246,8 @@ class UniformInitializer(Initializer):
                 attrs={"in_dtype": out_var.dtype,
                        "out_dtype": var.dtype})
 
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -322,7 +325,8 @@ class NormalInitializer(Initializer):
                 outputs={"Out": var},
                 attrs={"in_dtype": out_var.dtype,
                        "out_dtype": var.dtype})
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -400,7 +404,8 @@ class TruncatedNormalInitializer(Initializer):
                 outputs={"Out": var},
                 attrs={"in_dtype": out_var.dtype,
                        "out_dtype": var.dtype})
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -505,7 +510,8 @@ class XavierInitializer(Initializer):
                     "seed": self._seed
                 },
                 stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -605,7 +611,8 @@ class MSRAInitializer(Initializer):
                     "seed": self._seed
                 },
                 stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -703,7 +710,8 @@ class BilinearInitializer(Initializer):
                 'shape': list(shape),
                 value_name: values
             })
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
@@ -761,7 +769,8 @@ class NumpyArrayInitializer(Initializer):
                 value_name: values
             },
             stop_gradient=True)
-        var.op = op
+        if not imperative_base.enabled():
+            var.op = op
         return op
 
 
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 24e102b6c2612b58a9b8367ebbefcece535d58bb..1775159798414a98bede4a3db5b577fb5e47e611 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -468,9 +468,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None):
 
             exe = fluid.Executor(fluid.CPUPlace())
             param_path = "./my_paddle_model"
+            # `prog` can be a program defined by the user
             prog = fluid.default_main_program()
             fluid.io.save_persistables(executor=exe, dirname=param_path,
-                                       main_program=None)
+                                       main_program=prog)
     """
 
     if main_program and main_program._is_distributed:
diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py
index 617704a53138bd081a2ebe318de0c89e8db4aa96..378aeb37605f1971da3fe4a926e4b36b8eae2ca4 100644
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -28,10 +28,12 @@ from . import ops
 from . import tensor
 from ..initializer import init_on_cpu
 from ..framework import default_main_program, Parameter, unique_name, name_scope
+import math
 
 __all__ = [
     'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
-    'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS'
+    'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
+    'cosine_decay'
 ]
 
 
@@ -307,6 +309,41 @@ def piecewise_decay(boundaries, values):
     return lr
 
 
+def cosine_decay(learning_rate, step_each_epoch, epochs):
+    """
+    Applies cosine decay to the learning rate.
+
+    when training a model, it is often recommended to lower the learning rate as the
+    training progresses. By using this function, the learning rate will be decayed by
+    following cosine decay strategy.
+
+    decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1)
+    
+    Args:
+        learning_rate(Variable|float): The initial learning rate.
+        step_each_epoch(int): the number of steps in an epoch.
+        epochs(int): the number of epochs.
+
+     Returns:
+        Variable: The decayed learning rate.
+
+     Examples:
+
+    ..code-block:: python
+
+  	base_lr = 0.1
+	lr = fluid.layers.cosine_decay(
+	learning_rate = base_lr, step_each_epoch=10000, epochs=120)
+    """
+    with default_main_program()._lr_schedule_guard():
+        global_step = _decay_step_counter()
+
+        cur_epoch = ops.floor(global_step / step_each_epoch)
+        decayed_lr = learning_rate * 0.5 * (
+            ops.cos(cur_epoch * math.pi / epochs) + 1)
+        return decayed_lr
+
+
 def append_LARS(params_grads, learning_rate, weight_decay):
     """
     Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 250dc24bd8f028b22d04b3f8dde082c7e236e402..efb400ccc6d43df44325dc7ef88c14afe4b704c3 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1767,7 +1767,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
     return softmax_out
 
 
-def softmax(input, use_cudnn=True, name=None):
+def softmax(input, use_cudnn=False, name=None):
     """
     The input of the softmax operator is a tensor of any rank. The output tensor
     has the same shape as the input.
@@ -1795,7 +1795,8 @@ def softmax(input, use_cudnn=True, name=None):
     Args:
         input (Variable): The input variable.
         use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
-            library is installed.
+            library is installed. To improve numerical stablity, set use_cudnn to \
+            False by default. Default: False
         name (str|None): A name for this layer(optional). If set None, the layer
             will be named automatically. Default: None.
 
@@ -3041,7 +3042,6 @@ def data_norm(input,
               param_attr=None,
               data_layout='NCHW',
               in_place=False,
-              use_mkldnn=False,
               name=None,
               moving_mean_name=None,
               moving_variance_name=None,
@@ -3075,7 +3075,6 @@ def data_norm(input,
         param_attr(ParamAttr): The parameter attribute for Parameter `scale`.
         data_layout(string, default NCHW): NCHW|NHWC
         in_place(bool, Default False): Make the input and output of batch norm reuse memory.
-        use_mkldnn(bool, Default false): ${use_mkldnn_comment}
         name(string, Default None): A name for this layer(optional). If set None, the layer
             will be named automatically.
         moving_mean_name(string, Default None): The name of moving_mean which store the global Mean.
@@ -3156,8 +3155,7 @@ def data_norm(input,
         outputs={"Y": data_norm_out,
                  "Means": means,
                  "Scales": scales},
-        attrs={"epsilon": epsilon,
-               "use_mkldnn": use_mkldnn})
+        attrs={"epsilon": epsilon})
 
     return helper.append_activation(data_norm_out)
 
@@ -5756,7 +5754,7 @@ def softmax_with_cross_entropy(logits,
                                label,
                                soft_label=False,
                                ignore_index=kIgnoreIndex,
-                               numeric_stable_mode=False,
+                               numeric_stable_mode=True,
                                return_softmax=False):
     """
     **Softmax With Cross Entropy Operator.**
@@ -5820,7 +5818,7 @@ def softmax_with_cross_entropy(logits,
                                     When soft_label is True or CPU is used,
                                     the algorithm is always numerically stable.
                                     Note that the speed may be slower when use
-                                    stable algorithm. Default: False
+                                    stable algorithm. Default: True
         return_softmax (bool): A flag indicating whether to return the softmax
                                along with the cross entropy loss. Default: False
 
@@ -6846,56 +6844,58 @@ def image_resize(input,
 
     Example:
 
-      For scale:
-      
-        if align_corners = True && out_size > 1 :
+    .. code-block:: text
 
-          scale_factor = (in_size-1.0)/(out_size-1.0)
-        
-        else:
+        For scale:
+          
+            if align_corners = True && out_size > 1 :
+
+              scale_factor = (in_size-1.0)/(out_size-1.0)
+            
+            else:
+              
+              scale_factor = float(in_size/out_size)
+            
           
-          scale_factor = float(in_size/out_size)
-        
-      
-      Nearest neighbor interpolation:
-      
-      if:
-          align_corners = False
+        Nearest neighbor interpolation:
+          
+          if:
+              align_corners = False
 
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
 
-          H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
-          W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
+              H_out = floor (H_{in} * scale_{factor})
+              W_out = floor (W_{in} * scale_{factor})
 
-      else:
-          align_corners = True
+          else:
+              align_corners = True
 
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
 
-          H_out = round(H_{in} * scale_{factor})
-          W_out = round(W_{in} * scale_{factor})
+              H_out = round(H_{in} * scale_{factor})
+              W_out = round(W_{in} * scale_{factor})
 
-      Bilinear interpolation:
+        Bilinear interpolation:
 
-      if:
-          align_corners = False , align_mode = 0
-          
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
-          
-          H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-          W_out = (W_{in}+0.5) * scale_{factor} - 0.5
+          if:
+              align_corners = False , align_mode = 0
+              
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+              
+              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
+              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
 
 
-      else:
-       
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
+          else:
+           
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
 
-          H_out = H_{in} * scale_{factor}
-          W_out = W_{in} * scale_{factor}
+              H_out = H_{in} * scale_{factor}
+              W_out = W_{in} * scale_{factor}
 
     For details of nearest neighbor interpolation, please refer to Wikipedia: 
     https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.
@@ -7050,41 +7050,39 @@ def resize_bilinear(input,
     Align_corners and align_mode are optinal parameters,the calculation 
     method of interpolation can be selected by them.
 
-
-    Align_corners and align_mode are optinal parameters,the calculation method 
-    of interpolation can be selected by them.
-
     Example:
 
-      For scale:
-      
-        if align_corners = True && out_size > 1 :
+    .. code-block:: text
 
-          scale_factor = (in_size-1.0)/(out_size-1.0)
-        
-        else:
+        For scale:
           
-          scale_factor = float(in_size/out_size)     
+            if align_corners = True && out_size > 1 :
 
-    Bilinear interpolation:
+              scale_factor = (in_size-1.0)/(out_size-1.0)
+            
+            else:
+              
+              scale_factor = float(in_size/out_size)     
 
-      if:
-          align_corners = False , align_mode = 0
-          
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
-          
-          H_out = (H_{in}+0.5) * scale_{factor} - 0.5
-          W_out = (W_{in}+0.5) * scale_{factor} - 0.5
+        Bilinear interpolation:
+
+          if:
+              align_corners = False , align_mode = 0
+              
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+              
+              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
+              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
 
 
-      else:
+          else:
 
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
 
-          H_out = H_{in} * scale_{factor}
-          W_out = W_{in} * scale_{factor}
+              H_out = H_{in} * scale_{factor}
+              W_out = W_{in} * scale_{factor}
 
 
 
@@ -7136,42 +7134,44 @@ def resize_nearest(input,
                    align_corners=True):
     """
     Resize input by performing nearest neighbor interpolation in both the
-    3rd dimention(in height direction) and the 4th dimention(in width
-    direction) based on given output shape which specified by actual_shape,
+    3rd dimension(in height direction) and the 4th dimension(in width
+    direction) based on given output shape which is specified by actual_shape,
     out_shape and scale in priority order.
 
     Example:
 
-      For scale:
-      
-        if align_corners = True && out_size > 1 :
+    .. code-block:: text
 
-          scale_factor = (in_size-1.0)/(out_size-1.0)
-        
-        else:
+        For scale:
           
-          scale_factor = float(in_size/out_size)
-        
-      
-      Nearest neighbor interpolation:
-      
-      if:
-          align_corners = False
+            if align_corners = True && out_size > 1 :
 
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
+              scale_factor = (in_size-1.0)/(out_size-1.0)
+            
+            else:
+              
+              scale_factor = float(in_size/out_size)
+            
+          
+        Nearest neighbor interpolation:
+          
+          if:
+              align_corners = False
+
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
 
-          H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
-          W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
+              H_out = floor(H_{in} * scale_{factor})
+              W_out = floor(W_{in} * scale_{factor})
 
-      else:
-          align_corners = True
+          else:
+              align_corners = True
 
-          input : (N,C,H_in,W_in)
-          output: (N,C,H_out,W_out) where:
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
 
-          H_out = round(H_{in} * scale_{factor})
-          W_out = round(W_{in} * scale_{factor})
+              H_out = round(H_{in} * scale_{factor})
+              W_out = round(W_{in} * scale_{factor})
 
 
     For details of nearest neighbor interpolation, please refer to Wikipedia:
@@ -9945,6 +9945,7 @@ def teacher_student_sigmoid_loss(input,
 
     Examples:
         .. code-block:: python
+
           cost = fluid.layers.teacher_student_sigmoid_loss(input=similarity, label=label)
     """
     helper = LayerHelper('teacher_student_sigmoid_loss', **locals())
diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py
index 9c578ef662bfdaf03141ee263fb2078064d5bfe6..84beb37c1d995161c473c3d8c11402bee0ae7319 100644
--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -13,15 +13,11 @@
 # limitations under the License.
 
 from __future__ import print_function
-import multiprocessing
 from . import core
 from . import framework
 from . import executor
-from .. import compat as cpt
-import warnings
+from . import compiler
 import sys
-import six
-import os
 
 __all__ = ['ParallelExecutor']
 
@@ -92,104 +88,31 @@ class ParallelExecutor(object):
                  num_trainers=1,
                  trainer_id=0,
                  scope=None):
-        # step1: get places, the places are used in run too.
-        self._places = []
-        if use_cuda:
-            gpus_env = os.getenv("FLAGS_selected_gpus")
-            if gpus_env:
-                gpus = [int(s) for s in gpus_env.split(",")]
-            else:
-                gpus = [
-                    i for i in six.moves.range(core.get_cuda_device_count())
-                ]
-            self._places = [core.CUDAPlace(i) for i in gpus]
-        else:
-            cpu_num = int(
-                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-            self._places = [core.CPUPlace() for _ in six.moves.range(cpu_num)]
-        assert self._places, "no place for execution"
+        sys.stderr.write(
+            'ParallelExecutor is deprecated. '
+            'Please use CompiledProgram and Executor. CompiledProgram '
+            'is a central place for optimization and Executor is the '
+            'unified executor. Example can be found in compiler.py.\n')
 
-        # step2: init exec_strategy
-        if exec_strategy is None:
-            exec_strategy = ExecutionStrategy()
-        exec_strategy.use_cuda = use_cuda
-        if exec_strategy.num_threads == 0:
-            if use_cuda:
-                # Experiments on se-resnext shows that too many threads hurt
-                # performance. Worth tunning for other models in the future.
-                exec_strategy.num_threads = len(self._places) * 4
-            else:
-                cpu_num = int(
-                    os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-                exec_strategy.num_threads = cpu_num * 2
-
-        # step3: init build_strategy
         if build_strategy is None:
             build_strategy = BuildStrategy()
         build_strategy.num_trainers = num_trainers
         build_strategy.trainer_id = trainer_id
-        # FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode,
-        # num_trainers is 1, so the current fields of build_strategy doesn't tell if
-        # it's distributed model.
-        build_strategy.is_distribution = framework.is_pserver_mode(
-            main_program) or num_trainers > 1
-
-        # step4: get main_program, scope, local_scopes
-        main = main_program if main_program \
-            else framework.default_main_program()
-        # FIXME(dzhwinter): enable_inplace should be after memory_optimize
-        # if turn on python memory optimize, turn off the inplace_pass.
-        if build_strategy.memory_optimize is None:
-            build_strategy.memory_optimize = False if main._is_mem_optimized else True
-        if build_strategy.enable_inplace is None:
-            build_strategy.enable_inplace = False if main._is_mem_optimized else True
-        scope = scope if scope is not None else executor.global_scope()
-
-        if share_vars_from and not isinstance(share_vars_from,
-                                              ParallelExecutor):
-            raise TypeError("share_vars_from must be ParallelExecutor.")
-
-        local_scopes = share_vars_from.executor.local_scopes()\
-            if share_vars_from else []
-
-        # step5: check trainers_endpoints, it is used for distribution.
-        trainers_endpoints = main._trainers_endpoints
-        if num_trainers > 1 and trainers_endpoints:
-            assert num_trainers == len(
-                trainers_endpoints), "num_trainers == len(endpoints)"
-            build_strategy.trainers_endpoints = trainers_endpoints
-
-        # step6: get persistable_vars, places. persistable_vars
-        # need be broadcast to other local_scope.
-        persistable_vars = set([
-            cpt.to_text(v.name) for v in [
-                var for var in main.list_vars()
-                if var.persistable and var.type != core.VarDesc.VarType.RAW
-            ]
-        ])
-
-        def place_obj(place):
-            p = core.Place()
-            p.set_place(place)
-            return p
-
-        places = list(map(place_obj, self._places))
 
-        # step7: init ParallelExecutor
-        # ParallelExecutor API will be deprecated, don't support parallel graph.
-        self._graphs = []
-        if build_strategy.async_mode:
-            for _ in range(cpu_num):
-                self._graphs.append(core.Graph(main.desc))
-        else:
-            self._graphs.append(core.Graph(main.desc))
+        self._places = compiler.get_available_places(use_cuda)
+        self._scope = scope if scope is not None else executor.global_scope()
 
-        self.executor = core.ParallelExecutor(
-            places, persistable_vars,
-            cpt.to_text(loss_name) if loss_name else six.u(''), scope,
-            local_scopes, exec_strategy, build_strategy, self._graphs)
-
-        self.scope = scope
+        main_program = main_program if main_program is not None \
+            else framework.default_main_program()
+        self._compiled_program = compiler.CompiledProgram(main_program)
+        self._compiled_program.with_data_parallel(
+            loss_name=loss_name,
+            build_strategy=build_strategy,
+            exec_strategy=exec_strategy,
+            share_vars_from=share_vars_from)
+        self._place = core.CUDAPlace(0) if use_cuda else core.CPUPlace()
+        self._executor = executor.Executor(self._place)
+        self._compiled_program._compile(place=self._place, scope=self._scope)
 
     def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=True):
         """
@@ -256,56 +179,11 @@ class ParallelExecutor(object):
                 loss = pe.run(feed=feeder.feed(cur_batch),
                               fetch_list=[avg_cost.name]))
         """
-        if feed is None and feed_dict is not None:
-            feed = feed_dict
-            print(
-                "`feed_dict` is deprecated. Please use `feed=`",
-                file=sys.stderr)
-
-        if isinstance(feed, dict):
-            feed_tensor_dict = dict()
-            for feed_name in feed:
-                feed_tensor = feed[feed_name]
-                if not isinstance(feed_tensor, core.LoDTensor):
-                    feed_tensor = core.LoDTensor()
-                    # always set to CPU place, since the tensor need to be splitted
-                    # it is fast in CPU
-                    feed_tensor.set(feed[feed_name], core.CPUPlace())
-                feed_tensor_dict[feed_name] = feed_tensor
-
-            self.executor.feed_and_split_tensor_into_local_scopes(
-                feed_tensor_dict)
-        elif isinstance(feed, list) or isinstance(feed, tuple):
-            if len(feed) != len(self._places):
-                raise ValueError(
-                    "Feed a list of tensor, the list should be the same size as places"
-                )
-
-            res = list()
-
-            for i, each in enumerate(feed):
-                if not isinstance(each, dict):
-                    raise TypeError(
-                        "Each element of feed list should be a dict")
-                res_dict = dict()
-                for feed_name in each:
-                    tensor = each[feed_name]
-                    if not isinstance(tensor, core.LoDTensor):
-                        tmp = core.LoDTensor()
-                        tmp.set(tensor, self._places[i])
-                        tensor = tmp
-                    res_dict[feed_name] = tensor
-                res.append(res_dict)
-            self.executor.feed_tensors_into_local_scopes(res)
-
-        fetch_var_name = 'fetch'
-        self.executor.run(fetch_list, fetch_var_name)
-        arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array()
-
-        if return_numpy:
-            return executor.as_numpy(arr)
-
-        return [arr[i] for i in range(len(arr))]
+        return self._executor.run(program=self._compiled_program,
+                                  scope=self._scope,
+                                  feed=feed,
+                                  fetch_list=fetch_list,
+                                  return_numpy=return_numpy)
 
     @property
     def device_count(self):
diff --git a/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..079f0d22056c7a0ebe366a177f62fafad75eff61
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir_memory_optimize_net_base.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import six
+import unittest
+import time
+import math
+import multiprocessing
+import numpy as np
+
+import paddle
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+from paddle.fluid import compiler
+
+# open eager delete mode
+os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0'
+os.environ['FLAGS_fast_eager_deletion_mode'] = 'true'
+os.environ['CPU_NUM'] = '2'
+
+
+class BuildIrMemOptBase(unittest.TestCase):
+    def check_network_convergence(self,
+                                  network,
+                                  use_cuda=True,
+                                  memory_opt=True,
+                                  use_ir_memory_optimize=True,
+                                  enable_inplace=True,
+                                  iter=5):
+        if use_cuda and not core.is_compiled_with_cuda():
+            print('Skip use_cuda=True because Paddle is not compiled with cuda')
+            return
+
+        if os.name == 'nt':
+            print(
+                'Skip use_parallel_executor=True because Paddle comes without parallel support on windows'
+            )
+            return
+        fluid.default_startup_program().random_seed = 100
+        fluid.default_main_program().random_seed = 100
+        batch_size = 32
+        batch_size *= fluid.core.get_cuda_device_count() if use_cuda else int(
+            os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
+
+        # build network
+        word_dict = paddle.dataset.imdb.word_dict()
+        train_reader = paddle.batch(
+            paddle.dataset.imdb.train(word_dict), batch_size=batch_size)
+
+        data = fluid.layers.data(
+            name="words", shape=[1], dtype="int64", lod_level=1)
+
+        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+
+        cost = network(data, label, len(word_dict))
+        optimizer = fluid.optimizer.Adam(learning_rate=0.001)
+        optimizer.minimize(cost)
+        if memory_opt:
+            fluid.memory_optimize(fluid.default_main_program())
+
+        # execution
+        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
+        reader = feeder.decorate_reader(train_reader, multi_devices=True)
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+
+        train_cp = compiler.CompiledProgram(fluid.default_main_program())
+        train_cp = train_cp.with_data_parallel(loss_name=cost.name)
+        fetch_list = [cost.name]
+
+        begin = time.time()
+        first_loss, last_loss = None, None
+        step_id = 0
+        custom_iter = getattr(self, "iter", None)
+        if not custom_iter == None:
+            iter = custom_iter
+        for data in reader():
+            ret = exe.run(train_cp, feed=data, fetch_list=fetch_list)
+            print(ret)
+            step_id += 1
+            if step_id == 1:
+                first_loss = ret[0]
+            if step_id == iter:
+                last_loss = ret[0]
+                break
+        end = time.time()
+
+        print("%.4f Instance per second" % (
+            (batch_size * iter) / (end - begin)))
+
+        print(first_loss, last_loss)
+        avg_last_loss_val = np.array(last_loss).mean()
+        avg_first_loss_val = np.array(first_loss).mean()
+        if math.isnan(float(avg_last_loss_val)) or math.isnan(
+                float(avg_first_loss_val)):
+            sys.exit("got NaN loss, training failed.")
+
+        return first_loss, last_loss
+
+
+class TestIrMemOptBase(BuildIrMemOptBase):
+    def setUp(self):
+        self.network = None
+
+    def test_network(self):
+        if self.network is None or not core.is_compiled_with_cuda():
+            return
+
+        baseline_first_loss, baseline_last_loss = None, None
+        for use_cuda in [True]:
+            for use_python_mem_opt in [True, False]:
+                print(
+                    'network: {}, use_cuda: {}, use_python_mem_opt: {}, use_ir_mem_opt : {}'.
+                    format(self.network.__name__, use_cuda, use_python_mem_opt,
+                           not use_python_mem_opt))
+                with fluid.program_guard(fluid.Program(), fluid.Program()):
+                    with fluid.scope_guard(core.Scope()):
+                        if use_cuda is True and use_python_mem_opt is True:
+                            baseline_first_loss, baseline_last_loss = self.check_network_convergence(
+                                self.network,
+                                use_cuda=use_cuda,
+                                memory_opt=use_python_mem_opt)
+                        else:
+                            cur_first_loss, cur_last_loss = self.check_network_convergence(
+                                self.network,
+                                use_cuda=use_cuda,
+                                memory_opt=use_python_mem_opt)
+
+                            self.assertAlmostEquals(
+                                np.mean(baseline_last_loss),
+                                np.mean(cur_last_loss),
+                                delta=1e-2)
+                            self.assertAlmostEquals(
+                                np.mean(baseline_first_loss),
+                                np.mean(cur_first_loss),
+                                delta=1e-2)
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py b/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..871f8403f812c87ac493b82482fe01fdf61037d4
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/mkldnn/mkldnn_op_test.py
@@ -0,0 +1,72 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+
+
+def check_if_mkldnn_primitives_exist_in_bwd(test_case, op_type, x, out,
+                                            out_grad, x_grad):
+    def __assert_close(tensor, np_array, msg, atol=1e-4):
+        test_case.assertTrue(
+            np.allclose(
+                np.array(tensor), np_array, atol=atol), msg)
+
+    place = core.CPUPlace()
+
+    var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad}
+    var_names = list(var_dict.keys())
+    ground_truth = {name: var_dict[name] for name in var_names}
+
+    program = fluid.Program()
+    with fluid.program_guard(program):
+        block = program.global_block()
+        for name in ground_truth:
+            block.create_var(
+                name=name, dtype=np.float32, shape=ground_truth[name].shape)
+
+        op = block.append_op(
+            type=op_type,
+            inputs={'X': block.var('x'), },
+            outputs={'Out': block.var('out')},
+            attrs={'use_mkldnn': True})
+
+        # Generate backward op_desc
+        grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc,
+                                                                  set(), [])
+        grad_op_desc = grad_op_desc_list[0]
+        new_op_desc = block.desc.append_op()
+        new_op_desc.copy_from(grad_op_desc)
+        for var_name in grad_op_desc.output_arg_names():
+            block.desc.var(var_name.encode('ascii'))
+        grad_op_desc.infer_var_type(block.desc)
+        grad_op_desc.infer_shape(block.desc)
+        for arg in grad_op_desc.output_arg_names():
+            grad_var = block.desc.find_var(arg.encode('ascii'))
+            grad_var.set_dtype(core.VarDesc.VarType.FP32)
+
+        exe = fluid.Executor(place)
+
+        # Do at least 2 iterations
+        for i in range(2):
+            out = exe.run(
+                program,
+                feed={name: var_dict[name]
+                      for name in ['x', 'out@GRAD']},
+                fetch_list=['x@GRAD', 'out'])
+
+        __assert_close(x_grad, out[0], 'x@GRAD')
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
index 0f301de47f53f3fcacd38d1415ebdbd7b4efc8f1..7099387b887003a205c0dfb4c8e9c83f89e29494 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
@@ -19,7 +19,7 @@ import numpy as np
 import paddle.fluid.core as core
 from paddle.fluid.tests.unittests.op_test import OpTest
 from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs
-import paddle.fluid as fluid
+from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
 
 
 class TestMKLDNNReluDim2(TestRelu):
@@ -98,62 +98,24 @@ class TestMKLDNNAbsDim4(TestAbs):
 
 
 # Check if primitives already exist in backward
-class TestMKLDNNReluPrimitivesAlreadyExist(unittest.TestCase):
-    def __assert_close(self, tensor, np_array, msg, atol=1e-4):
-        self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)
-
-    def test_check_forward_backward(self):
-        place = core.CPUPlace()
+class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase):
+    def setUp(self):
+        super(TestMKLDNNAbsPrimitivesAlreadyExist, self).setUp()
 
         np.random.seed(123)
-        x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32)
-        out = np.abs(x)
-
-        out_grad = np.random.random_sample(x.shape).astype(np.float32)
-        x_grad = out_grad * np.sign(x)  # Abs grad calculation
-
-        var_dict = {'x': x, 'out': out, 'out@GRAD': out_grad, 'x@GRAD': x_grad}
-        var_names = list(var_dict.keys())
-        ground_truth = {name: var_dict[name] for name in var_names}
-
-        program = fluid.Program()
-        with fluid.program_guard(program):
-            block = program.global_block()
-            for name in ground_truth:
-                block.create_var(
-                    name=name, dtype='float32', shape=ground_truth[name].shape)
-
-            relu_op = block.append_op(
-                type="abs",
-                inputs={"X": block.var('x'), },
-                outputs={"Out": block.var('out')},
-                attrs={"use_mkldnn": True})
-
-            # Generate backward op_desc
-            grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(
-                relu_op.desc, set(), [])
-            grad_op_desc = grad_op_desc_list[0]
-            new_op_desc = block.desc.append_op()
-            new_op_desc.copy_from(grad_op_desc)
-            for var_name in grad_op_desc.output_arg_names():
-                block.desc.var(var_name.encode("ascii"))
-            grad_op_desc.infer_var_type(block.desc)
-            grad_op_desc.infer_shape(block.desc)
-            for arg in grad_op_desc.output_arg_names():
-                grad_var = block.desc.find_var(arg.encode("ascii"))
-                grad_var.set_dtype(core.VarDesc.VarType.FP32)
-
-            exe = fluid.Executor(place)
-
-            # Do at least 2 iterations
-            for i in range(2):
-                out = exe.run(
-                    program,
-                    feed={name: var_dict[name]
-                          for name in ['x', 'out@GRAD']},
-                    fetch_list=['x@GRAD'])
-
-            self.__assert_close(x_grad, out[0], "x@GRAD")
+        self.op_type = 'abs'
+        self.x = np.random.uniform(-1, 1, [2, 2]).astype(np.float32)
+        self.out = np.abs(self.x)
+        self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
+        self.x_grad = self.__abs_bwd(self.x, self.out_grad)
+
+    # Abs grad calculation
+    def __abs_bwd(self, x, out_grad):
+        return out_grad * np.sign(x)
+
+    def test_check(self):
+        check_if_mkldnn_primitives_exist_in_bwd(
+            self, self.op_type, self.x, self.out, self.out_grad, self.x_grad)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
index 0542eef80070cbf281ee013c28b7092a2dd17eaa..28b670d7ab3267a03157b7e617504eb9a35656aa 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
@@ -15,44 +15,139 @@
 from __future__ import print_function
 
 import unittest
+import numpy as np
 
-from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp, TestWithPad, TestWithStride, TestWithGroup, TestWith1x1, TestWithInput1x1Filter1x1
+import paddle.fluid.core as core
+from paddle.fluid.tests.unittests.op_test import OpTest
+from paddle.fluid.tests.unittests.test_conv2d_op import TestConv2dOp
 
 
-class TestMKLDNN(TestConv2dOp):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-        self.data_format = "NCHW"
+def conv2d_bias_naive(out, bias):
+    _, out_c, _, _ = out.shape
 
+    for l in range(out_c):
+        out[:, l, :, :] = out[:, l, :, :] + bias[l]
+    return out
 
-class TestMKLDNNWithPad(TestWithPad):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-        self.data_format = "NCHW"
 
+def conv2d_residual_naive(out, residual):
+    assert out.shape == residual.shape
+    out = np.add(out, residual)
+    return out
 
-class TestMKLDNNWithStride(TestWithStride):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-        self.data_format = "NCHW"
 
+class TestConv2dMKLDNNOp(TestConv2dOp):
+    def init_group(self):
+        self.groups = 1
 
-class TestMKLDNNWithGroup(TestWithGroup):
     def init_kernel_type(self):
-        self.use_mkldnn = True
         self.data_format = "NCHW"
+        self.use_mkldnn = True
+        self._cpu_only = True
 
+    def init_test_case(self):
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] // self.groups
+        self.filter_size = [6, f_c, 3, 3]
 
-class TestMKLDNNWith1x1(TestWith1x1):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-        self.data_format = "NCHW"
+    def setUp(self):
+        self.fuse_bias = False
+        self.bias_size = None
+        self.fuse_relu = False
+        self.fuse_residual_connection = False
+        self.input_residual_size = None
+        TestConv2dOp.setUp(self)
 
+        output = self.outputs['Output']
 
-class TestMKLDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-        self.data_format = "NCHW"
+        #mkldnn only support either conv-sum-relu, or conv-relu.
+        if self.fuse_bias and self.bias_size is not None:
+            bias = np.random.random(self.bias_size).astype(self.dtype)
+            output = conv2d_bias_naive(output, bias)
+            output = output.astype(self.dtype)
+            self.attrs['fuse_bias'] = self.fuse_bias
+            self.inputs['Bias'] = OpTest.np_dtype_to_fluid_dtype(bias)
+
+        if self.fuse_residual_connection and self.input_residual_size is not None:
+            input_residual = np.random.random(self.input_residual_size).astype(
+                self.dtype)
+            output = conv2d_residual_naive(output, input_residual)
+
+            self.attrs[
+                'fuse_residual_connection'] = self.fuse_residual_connection
+            self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
+                input_residual)
+
+        if self.fuse_relu:
+            output = np.maximum(output, 0).astype(self.dsttype)
+
+        output = output.astype(self.dtype)
+
+        self.attrs['fuse_bias'] = self.fuse_bias
+        self.attrs['fuse_relu'] = self.fuse_relu
+        self.attrs['fuse_residual_connection'] = self.fuse_residual_connection
+
+        self.outputs['Output'] = output
+
+
+class TestWithFuse(TestConv2dMKLDNNOp):
+    def init_test_case(self):
+        TestConv2dMKLDNNOp.init_test_case(self)
+        self.pad = [1, 1]
+        self.fuse_bias = True
+        self.bias_size = [6]
+        self.fuse_residual_connection = True
+        self.input_residual_size = [2, 6, 5, 5]
+
+    def test_check_grad(self):
+        pass
+
+    def test_check_grad_no_filter(self):
+        pass
+
+    def test_check_grad_no_input(self):
+        pass
+
+
+class TestWithPadWithBias(TestConv2dMKLDNNOp):
+    def init_test_case(self):
+        TestConv2dMKLDNNOp.init_test_case(self)
+        self.pad = [1, 1]
+        self.input_size = [2, 3, 6, 6]
+
+
+class TestWithStride(TestConv2dMKLDNNOp):
+    def init_test_case(self):
+        TestConv2dMKLDNNOp.init_test_case(self)
+        self.pad = [1, 1]
+        self.stride = [2, 2]
+        self.input_size = [2, 3, 6, 6]
+
+
+class TestWithGroup(TestConv2dMKLDNNOp):
+    def init_group(self):
+        self.groups = 3
+
+
+class TestWith1x1(TestConv2dMKLDNNOp):
+    def init_test_case(self):
+        TestConv2dMKLDNNOp.init_test_case(self)
+        self.filter_size = [6, 3, 1, 1]
+
+
+class TestWithInput1x1Filter1x1(TestConv2dMKLDNNOp):
+    def init_test_case(self):
+        TestConv2dMKLDNNOp.init_test_case(self)
+        self.input_size = [2, 3, 1, 1]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] // self.groups
+        self.filter_size = [6, f_c, 1, 1]
+
+    def init_group(self):
+        self.groups = 3
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
index 6de43dd46e5d184ec934f2d85e0c87137e9702e0..feb2a563eeaed7a83a82ec56ec08a0ed8664d126 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
@@ -18,6 +18,24 @@ import unittest
 from paddle.fluid.tests.unittests.test_pool2d_op import TestPool2D_Op, TestCase1, TestCase2, TestCase3, TestCase4, TestCase5
 
 
+def create_test_mkldnn_use_ceil_class(parent):
+    class TestMKLDNNPool2DUseCeilCase(parent):
+        def init_kernel_type(self):
+            self.use_mkldnn = True
+
+        def init_ceil_mode(self):
+            self.ceil_mode = True
+
+    cls_name = "{0}_{1}".format(parent.__name__, "MKLDNNCeilModeCast")
+    TestMKLDNNPool2DUseCeilCase.__name__ = cls_name
+    globals()[cls_name] = TestMKLDNNPool2DUseCeilCase
+
+
+create_test_mkldnn_use_ceil_class(TestPool2D_Op)
+create_test_mkldnn_use_ceil_class(TestCase1)
+create_test_mkldnn_use_ceil_class(TestCase2)
+
+
 def create_test_mkldnn_class(parent):
     class TestMKLDNNCase(parent):
         def init_kernel_type(self):
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..748b77f2bf48f450426d3ea918138a7db8df78f0
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softmax_mkldnn_op.py
@@ -0,0 +1,57 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from paddle.fluid.tests.unittests.op_test import OpTest
+import paddle.fluid.core as core
+from paddle.fluid.tests.unittests.test_softmax_op import TestSoftmaxOp, stable_softmax
+from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
+
+
+class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
+    def init_kernel_type(self):
+        self.use_mkldnn = True
+
+
+class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
+    def get_x_shape(self):
+        return [2, 3, 4, 5]
+
+
+# Check if primitives already exist in backward
+class TestSoftmaxMKLDNNPrimitivesAlreadyExist(unittest.TestCase):
+    def setUp(self):
+        super(TestSoftmaxMKLDNNPrimitivesAlreadyExist, self).setUp()
+
+        np.random.seed(123)
+        self.op_type = 'softmax'
+        self.x = np.random.uniform(-1, 1, 2).astype(np.float32)
+        self.out = stable_softmax(self.x)
+        self.out_grad = np.random.random_sample(self.x.shape).astype(np.float32)
+        self.x_grad = self.__softmax_bwd(self.out, self.out_grad)
+
+    # Softmax grad calculation
+    def __softmax_bwd(self, out, out_grad):
+        return out * (out_grad - np.dot(out, out_grad))
+
+    def test_check(self):
+        check_if_mkldnn_primitives_exist_in_bwd(
+            self, self.op_type, self.x, self.out, self.out_grad, self.x_grad)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_adam_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_adam_ngraph_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef2aedf65f4c0cc182738c7a7a538095f8f628d5
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ngraph/test_adam_ngraph_op.py
@@ -0,0 +1,21 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+from paddle.fluid.tests.unittests.test_adam_op import TestAdamOp1, TestAdamOp2, TestAdamOpMultipleSteps, TestSparseAdamOp
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_concat_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_concat_ngraph_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..a223d73a7416c3564d5d4ef5ca4f3e1b42595a0d
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ngraph/test_concat_ngraph_op.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+from paddle.fluid.tests.unittests.test_concat_op import TestConcatOp, TestConcatOp2, TestConcatOp3
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index 55c43ef115a316cc0fe5bb336b7a766a956c1496..d5a838540994abcd1407fd258e723218670bfb58 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -26,6 +26,7 @@ class TestActivation(OpTest):
         self.op_type = "exp"
         self.dtype = np.float32
         self.init_dtype()
+        self.init_kernel_type()
 
         x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
         out = np.exp(x)
@@ -44,6 +45,9 @@ class TestActivation(OpTest):
     def init_dtype(self):
         self.dtype = np.float32
 
+    def init_kernel_type(self):
+        pass
+
 
 class TestSigmoid(TestActivation):
     def setUp(self):
@@ -601,6 +605,25 @@ class TestSwish(TestActivation):
         self.check_grad(['X'], 'Out', max_relative_error=0.008)
 
 
+#------------------ Test Cudnn Activation----------------------
+def create_test_act_cudnn_class(parent, atol=1e-3, grad_atol=1e-3):
+    @unittest.skipIf(not core.is_compiled_with_cuda(),
+                     "core is not compiled with CUDA")
+    class TestActCudnn(parent):
+        def init_kernel_type(self):
+            self.attrs = {"use_cudnn": True}
+
+    cls_name = "{0}_{1}".format(parent.__name__, "cudnn")
+    TestActCudnn.__name__ = cls_name
+    globals()[cls_name] = TestActCudnn
+
+
+create_test_act_cudnn_class(TestRelu)
+create_test_act_cudnn_class(TestRelu6)
+create_test_act_cudnn_class(TestSigmoid)
+create_test_act_cudnn_class(TestTanh)
+
+
 #------------------ Test Fp16 ----------------------
 def create_test_act_fp16_class(parent,
                                atol=1e-3,
diff --git a/python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py b/python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d5fe114bad2b2bae73cf18e17ebd7af288a91da
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_alloc_continuous_space_op.py
@@ -0,0 +1,74 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+
+from op_test import OpTest
+
+
+class TestAllocContinuousSpace(OpTest):
+    def setUp(self):
+        self.op_type = "alloc_continuous_space"
+        self.dtype = np.float32
+        attrs = self.init_attr()
+        self.copy_data = attrs["copy_data"]
+        self.constant = attrs["constant"]
+        self.set_constant = attrs["set_constant"]
+        self.Inputs = self.init_input()
+        self.FusedOutput = self.init_output(self.Inputs, self.set_constant,
+                                            self.constant)
+        self.inputs = {'Input': self.Inputs}
+        self.attrs = attrs
+        self.outputs = {'Output': self.Inputs, 'FusedOutput': self.FusedOutput}
+
+    def init_dtype(self):
+        self.dtype = np.float32
+
+    def init_input(self):
+        inputs = []
+        inputs.append(("x1", np.random.random([20, 3]).astype(self.dtype)))
+        inputs.append(("x2", np.random.random([20]).astype(self.dtype)))
+        inputs.append(("x3", np.random.random([1]).astype(self.dtype)))
+        inputs.append(("x4", np.random.random([200, 30]).astype(self.dtype)))
+        inputs.append(("x5", np.random.random([30]).astype(self.dtype)))
+        inputs.append(("x6", np.random.random([1]).astype(self.dtype)))
+        return inputs
+
+    def init_attr(self):
+        return {"copy_data": True, "set_constant": False, "constant": 0.0}
+
+    def init_output(self, input_list, set_constant, constant):
+        inputs = [input[1].flatten() for input in input_list]
+        output = np.concatenate(inputs)
+        if set_constant:
+            output = np.ones((len(output))) * constant
+        return output
+
+    def test_check_output(self):
+        self.check_output()
+
+
+class TestAllocContinuousSpace2(TestAllocContinuousSpace):
+    def init_attr(self):
+        return {"copy_data": False, "set_constant": True, "constant": 0.5}
+
+    def test_check_output(self):
+        self.check_output(no_check_set=["Output"])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
index bc3c422f2f68b79b2d938e25625093b2ce8977bb..910f53a91a7b5ca1413adf9505ed2c3ad3d56dad 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
@@ -56,6 +56,8 @@ def train(network, use_cuda, use_parallel_executor, batch_size=32, pass_num=2):
         train_reader, multi_devices=use_parallel_executor)
 
     exe = fluid.Executor(place)
+    fluid.default_startup_program().random_seed = 1
+    fluid.default_main_program().random_seed = 1
     exe.run(fluid.default_startup_program())
 
     train_cp = compiler.CompiledProgram(fluid.default_main_program())
diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
index 820ad4af88e9dc49cbe57ac182e1ba0402725f3d..4582b2a0eed401235835374d4cd58782d8d3a68f 100644
--- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
@@ -35,7 +35,7 @@ class TestFakeQuantizeOp(OpTest):
         self.check_output()
 
 
-class TestFakeQuantizeOp(OpTest):
+class TestFakeQuantizeRangeAbsMaxOp(OpTest):
     def setUp(self):
         self.op_type = "fake_quantize_range_abs_max"
         self.attrs = {
@@ -43,8 +43,10 @@ class TestFakeQuantizeOp(OpTest):
             'window_size': int(1),
             'is_test': False
         }
+        x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
+        x = x.astype("float32")
         self.inputs = {
-            'X': np.random.random((8, 16, 7, 7)).astype("float32"),
+            'X': x,
             'Iter': np.zeros(1).astype("int64"),
             'InScale': np.zeros(1).astype("float32")
         }
@@ -62,5 +64,36 @@ class TestFakeQuantizeOp(OpTest):
         self.check_output()
 
 
+class TestFakeQuantizeRangeAbsMaxOp2(OpTest):
+    def setUp(self):
+        self.op_type = "fake_quantize_range_abs_max"
+        self.attrs = {
+            'bit_length': int(8),
+            'window_size': int(1),
+            'is_test': True
+        }
+        x = (np.random.random((8, 16, 7, 7)) - 0.5) * 10
+        x = x.astype("float32")
+        scale = np.max(np.abs(x)).astype("float32") - 1.0
+        out_scales = np.zeros(self.attrs['window_size']).astype("float32")
+        out_scales[0] = scale
+
+        self.inputs = {
+            'X': x,
+            'Iter': np.zeros(1).astype("int64"),
+            'InScale': scale.astype("float32")
+        }
+        xs = np.clip(x, -scale, scale)
+        qs = np.round(xs / scale * ((1 << (self.attrs['bit_length'] - 1)) - 1))
+        self.outputs = {
+            'Out': qs,
+            'OutScale': scale.astype("float32"),
+            'OutScales': out_scales,
+        }
+
+    def test_check_output(self):
+        self.check_output(no_check_set=set(['OutScale', 'OutScales']))
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
similarity index 100%
rename from python/paddle/fluid/tests/unittests/test_imperative.py
rename to python/paddle/fluid/tests/unittests/test_imperative_basic.py
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
index 0d0a3bbe0bd47fe0e01761f8b42c92b884a5680a..7afbf61472a3d09ba5e34731d3a3ebbb8076e310 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import print_function
+
 import contextlib
 import unittest
 import numpy as np
@@ -142,8 +144,6 @@ class TestImperativeMnist(unittest.TestCase):
                     sgd.minimize(avg_loss)
                     mnist.clear_gradients()
 
-                    fluid.default_main_program().global_block()._clear_block()
-
                     dy_param_value = {}
                     for param in mnist.parameters():
                         dy_param_value[param.name] = param._numpy()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
index c8e42d5ede57896b0d5c09a2334709ced2d16a3f..878c27d9344111d18e1ff27a1d4f41f8ae0df4b0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
@@ -243,7 +243,9 @@ class TestImperativePtbRnn(unittest.TestCase):
             dy_loss = None
             last_hidden = None
             last_cell = None
-            for i in range(2):
+            batch_num = 50
+
+            for i in range(batch_num):
                 x_data = np.arange(12).reshape(4, 3).astype('int64')
                 y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                 x_data = x_data.reshape((-1, num_steps, 1))
@@ -302,7 +304,7 @@ class TestImperativePtbRnn(unittest.TestCase):
             static_loss_value = None
             static_last_cell_value = None
             static_last_hidden_value = None
-            for i in range(2):
+            for i in range(batch_num):
                 x_data = np.arange(12).reshape(4, 3).astype('int64')
                 y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                 x_data = x_data.reshape((-1, num_steps, 1))
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
index 4892495e1108e6d2a7e96cab88dc7668e360d79f..94ac3933151ac612ea9d308f0e28c73f0c067abf 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py
@@ -231,7 +231,7 @@ class TestImperativeResnet(unittest.TestCase):
         seed = 90
 
         batch_size = train_parameters["batch_size"]
-        batch_num = 2
+        batch_num = 20
         with fluid.imperative.guard():
             fluid.default_startup_program().random_seed = seed
             fluid.default_main_program().random_seed = seed
@@ -286,8 +286,6 @@ class TestImperativeResnet(unittest.TestCase):
                 optimizer.minimize(avg_loss)
                 resnet.clear_gradients()
 
-                fluid.default_main_program().global_block()._clear_block()
-
                 dy_param_value = {}
                 for param in resnet.parameters():
                     dy_param_value[param.name] = param._numpy()
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
new file mode 100644
index 0000000000000000000000000000000000000000..30b6d6106cdc46cfed201e5bb44a0c80d7e8ca3d
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_nlp.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# nlp model stack of op operate on lod. It's a classical test case in optimize pass.
+
+from __future__ import print_function
+
+import paddle.fluid as fluid
+import unittest
+from ir_memory_optimize_net_base import TestIrMemOptBase
+
+
+def lstm_net(data,
+             label,
+             dict_dim,
+             emb_dim=128,
+             hid_dim=128,
+             hid_dim2=96,
+             class_dim=2,
+             emb_lr=30.0):
+    emb = fluid.layers.embedding(
+        input=data,
+        size=[dict_dim, emb_dim],
+        param_attr=fluid.ParamAttr(learning_rate=emb_lr))
+    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
+
+    lstm_h, c = fluid.layers.dynamic_lstm(
+        input=fc0, size=hid_dim * 4, is_reverse=False)
+    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
+    lstm_max_tanh = fluid.layers.tanh(lstm_max)
+    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
+    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    return avg_cost
+
+
+class TestIrMemOptRNN(TestIrMemOptBase):
+    def setUp(self):
+        self.network = lstm_net
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
index c0f480e34dcac3351ba3008ad632a29943afdb81..50d998990f9bbba0d35241f5e53d05675ca08c28 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
@@ -13,21 +13,44 @@
 # limitations under the License.
 
 import os
+import sys
 import unittest
+from timeit import default_timer as timer
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.dataset.wmt16 as wmt16
 
 os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0"
 os.environ[
     'RECORDIO_FILENAME'] = '/tmp/ir_memory_optimize_transformer.wmt16.recordio'
 
-from test_parallel_executor_transformer import TestTransformer
-from test_parallel_executor_transformer import transformer
+from test_parallel_executor_transformer import transformer, ModelHyperParams, transformer_model, transformer, prepare_batch_input
+from parallel_executor_test_base import TestParallelExecutorBase
 
 
 # NOTE(dzhwinter): test diferent strategy colisions.
 # open the eager delete tensor strategy by default.
-class TestTransformerWithIR(TestTransformer):
+class TestTransformerWithIR(TestParallelExecutorBase):
+    @classmethod
+    def setUpClass(cls):
+        os.environ['CPU_NUM'] = str(4)
+        reader = paddle.batch(
+            wmt16.train(ModelHyperParams.src_vocab_size,
+                        ModelHyperParams.trg_vocab_size),
+            batch_size=transformer_model.batch_size)
+
+        with fluid.recordio_writer.create_recordio_writer(
+                os.environ.get("RECORDIO_FILENAME")) as writer:
+            for batch in reader():
+                for tensor in prepare_batch_input(
+                        batch, ModelHyperParams.src_pad_idx,
+                        ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head):
+                    t = fluid.LoDTensor()
+                    t.set(tensor, fluid.CPUPlace())
+                    writer.append_tensor(t)
+                writer.complete_append_tensor()
+
     def test_main(self):
         if core.is_compiled_with_cuda():
             # check python transpiler
@@ -35,13 +58,15 @@ class TestTransformerWithIR(TestTransformer):
                 transformer,
                 use_cuda=True,
                 memory_opt=True,
-                use_ir_memory_optimize=False)
+                use_ir_memory_optimize=False,
+                iter=2)
             # check IR memory optimize
             self.check_network_convergence(
                 transformer,
                 use_cuda=True,
                 memory_opt=False,
-                use_ir_memory_optimize=True)
+                use_ir_memory_optimize=True,
+                iter=2)
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
index 0d3e6d73e0149fe633b8f1de9041068c2e3bb293..5212d97dfbc16e463e5f68456a3d735ac6679ae1 100644
--- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
@@ -82,6 +82,13 @@ def piecewise_decay(global_step, boundaries, values):
     return values[len(values) - 1]
 
 
+def cosine_decay(global_step, learning_rate, step_each_epoch, epochs):
+    cur_epoch = math.floor(global_step / step_each_epoch)
+    decayed_lr = learning_rate * 0.5 * (
+        math.cos(cur_epoch * math.pi / epochs) + 1)
+    return decayed_lr
+
+
 class TestLearningRateDecay(unittest.TestCase):
     def check_decay(self, python_decay_fn, fluid_decay_fn, kwargs):
         places = [fluid.CPUPlace()]
@@ -149,6 +156,11 @@ class TestLearningRateDecay(unittest.TestCase):
                 "boundaries": [3, 6, 9],
                 "values": [0.1, 0.2, 0.3, 0.4]
             }),
+            (cosine_decay, layers.cosine_decay, {
+                "learning_rate": 0.1,
+                "step_each_epoch": 100,
+                "epochs": 120
+            }),
         ]
 
         for py_decay_fn, fluid_decay_fn, kwargs in decay_fns:
diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py
index b46e4bfb86bd5dc9c74375693328f2506281be3e..162e6d1938c8174d342d8e4af1e4b6c424afc521 100644
--- a/python/paddle/fluid/tests/unittests/test_sgd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py
@@ -24,17 +24,28 @@ from op_test import OpTest
 class TestSGDOp(OpTest):
     def setUp(self):
         self.op_type = "sgd"
-        w = np.random.random((102, 105)).astype("float32")
-        g = np.random.random((102, 105)).astype("float32")
+        self.conf()
+        w = np.random.random((self.h, self.w)).astype("float32")
+        g = np.random.random((self.h, self.w)).astype("float32")
         lr = np.array([0.1]).astype("float32")
 
         self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
         self.outputs = {'ParamOut': w - lr * g}
 
+    def conf(self):
+        self.h = 102
+        self.w = 105
+
     def test_check_output(self):
         self.check_output()
 
 
+class TestSGDOpCase8X(TestSGDOp):
+    def conf(self):
+        self.h = 10
+        self.w = 64
+
+
 class TestSparseSGDOp(unittest.TestCase):
     def check_with_place(self, place):
         scope = core.Scope()
@@ -42,12 +53,12 @@ class TestSparseSGDOp(unittest.TestCase):
         # create and initialize Grad Variable   
         height = 10
         rows = [0, 4, 7]
-        row_numel = 12
+        self.conf()
 
         grad_selected_rows = scope.var('Grad').get_selected_rows()
         grad_selected_rows.set_height(height)
         grad_selected_rows.set_rows(rows)
-        np_array = np.ones((len(rows), row_numel)).astype("float32")
+        np_array = np.ones((len(rows), self.row_numel)).astype("float32")
         np_array[0, 0] = 2.0
         np_array[2, 8] = 4.0
 
@@ -56,7 +67,7 @@ class TestSparseSGDOp(unittest.TestCase):
 
         # create and initialize Param Variable
         param = scope.var('Param').get_tensor()
-        param_array = np.full((height, row_numel), 5.0).astype("float32")
+        param_array = np.full((height, self.row_numel), 5.0).astype("float32")
         param.set(param_array, place)
 
         # create and initialize LeraningRate Variable
@@ -98,6 +109,14 @@ class TestSparseSGDOp(unittest.TestCase):
         for place in places:
             self.check_with_place(place)
 
+    def conf(self):
+        self.row_numel = 12
+
+
+class TestSparseSGDOpCase8X(TestSparseSGDOp):
+    def conf(self):
+        self.row_numel = 16
+
 
 class TestSGDOpOptimizeSelectedRows(unittest.TestCase):
     def check_with_place(self, place):
diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py
index 40c3135183a128cd9b7324ce27da798fa2d93afd..5c56de6779d238064f03a65b54f3c73a77119f60 100644
--- a/python/paddle/fluid/tests/unittests/test_softmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py
@@ -144,15 +144,5 @@ class TestSoftmaxFP16CUDNNOp2(TestSoftmaxFP16CUDNNOp):
         return [2, 3, 4, 5]
 
 
-class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
-    def init_kernel_type(self):
-        self.use_mkldnn = True
-
-
-class TestSoftmaxMKLDNNOp2(TestSoftmaxMKLDNNOp):
-    def get_x_shape(self):
-        return [2, 3, 4, 5]
-
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py
index cc7f5ec90c26c87b7c5514c900e853be9e16d6eb..8a527e72fb9ac806254d2c055fc283c938cc55b4 100644
--- a/python/paddle/fluid/transpiler/inference_transpiler.py
+++ b/python/paddle/fluid/transpiler/inference_transpiler.py
@@ -15,6 +15,7 @@
 from __future__ import print_function
 
 import os
+import sys
 import numpy as np
 from .. import core
 from ..framework import Program
@@ -50,6 +51,9 @@ class InferenceTranspiler(object):
             place (Place): inference place
             scope (Scope|None): inference Scope
         '''
+        sys.stderr.write("InferenceTranspiler is deprecated since it's not "
+                         "safe. Users should be "
+                         "responsible for constructing the inference program\n")
         if not isinstance(program, Program):
             raise TypeError("program should be as Program type")
         if not isinstance(place, core.CPUPlace) and not isinstance(
diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
index ee8cde441ffc63ebd923bd579a7f44d1e2218cf0..c434423bae76c2ebdd7bdeb164350d6ec66621c8 100755
--- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
+++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py
@@ -15,6 +15,7 @@
 from __future__ import print_function
 
 import six
+import sys
 from collections import defaultdict, MutableSet
 from .. import core
 from ... import compat as cpt
@@ -509,6 +510,8 @@ def memory_optimize(input_program,
     Returns:
         None
     """
+    sys.stderr.write('memory_optimize is deprecated. '
+                     'Use CompiledProgram and Executor\n')
 
     def to_name_str(var):
         if isinstance(var, Variable):
diff --git a/python/paddle/utils/plot.py b/python/paddle/utils/plot.py
index 08889c0313fc24151cde6ca7b662d81eb53c9d7b..ee651f2f0cd6f2e594a4e74c896baa924f70bbf5 100644
--- a/python/paddle/utils/plot.py
+++ b/python/paddle/utils/plot.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+import six
 
 
 class PlotData(object):
@@ -60,9 +61,9 @@ class Ploter(object):
 
     def append(self, title, step, value):
         """
-	    Feed data
-	    
-            Args:
+        Feed data
+
+        Args:
                 title: assign the group data to this subtitle.
                 step: the x_axis of data.
                 value: the y_axis of data.
@@ -71,9 +72,9 @@ class Ploter(object):
                 .. code-block:: python
                 plot_curve = Ploter("Curve 1","Curve 2")
                 plot_curve.append(title="Curve 1",step=1,value=1)
-	"""
-        assert isinstance(title, basestring)
-        assert self.__plot_data__.has_key(title)
+        """
+        assert isinstance(title, six.string_types)
+        assert title in self.__plot_data__
         data = self.__plot_data__[title]
         assert isinstance(data, PlotData)
         data.append(step, value)
@@ -89,7 +90,7 @@ class Ploter(object):
                 .. code-block:: python
                 plot_curve = Ploter()
                 plot_cure.plot()
-	"""
+        """
         if self.__plot_is_disabled__():
             return
 
diff --git a/python/paddle/utils/preprocess_img.py b/python/paddle/utils/preprocess_img.py
index a322f7b769a2a32df516a4b8ea04289a7f882ff2..fc67949dfe0ef21487de29678781aa2bfd93f354 100644
--- a/python/paddle/utils/preprocess_img.py
+++ b/python/paddle/utils/preprocess_img.py
@@ -122,7 +122,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
     def create_dataset_from_list(self, path):
         data = []
         label_set = []
-        for line in open(file_list):
+        for line in open(path):
             items = line.rstrip.split()
             image_path = items[0]
             label_name = items[1]
@@ -141,7 +141,7 @@ class ImageClassificationDatasetCreater(preprocess_util.DatasetCreater):
         path: the path of the image dataset.
         """
         if self.from_list:
-            return create_dataset_from_list(path)
+            return self.create_dataset_from_list(path)
         label_set = preprocess_util.get_label_set_from_dir(path)
         data = []
         for l_name in list(label_set.keys()):
diff --git a/tools/check_doc_approval.py b/tools/check_doc_approval.py
deleted file mode 100644
index 44fdf58b49a1715696e8c28746282c38fb3c7763..0000000000000000000000000000000000000000
--- a/tools/check_doc_approval.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import ast
-import hashlib
-import importlib
-import paddle.fluid
-
-files = [
-    "paddle.fluid", "paddle.fluid.average", "paddle.fluid.backward",
-    "paddle.fluid.clip", "paddle.fluid.data_feeder", "paddle.fluid.executor",
-    "paddle.fluid.initializer", "paddle.fluid.io", "paddle.fluid.layers",
-    "paddle.fluid.metrics", "paddle.fluid.nets", "paddle.fluid.optimizer",
-    "paddle.fluid.profiler", "paddle.fluid.recordio_writer",
-    "paddle.fluid.regularizer", "paddle.fluid.transpiler"
-]
-
-
-def md5(doc):
-    hash = hashlib.md5()
-    hash.update(str(doc))
-    return hash.hexdigest()
-
-
-def get_module():
-    for fi in files:
-        fi_lib = importlib.import_module(fi)
-        doc_function = getattr(fi_lib, "__all__")
-        for api in doc_function:
-            api_name = fi + "." + api
-            try:
-                doc_module = getattr(eval(api_name), "__doc__")
-            except:
-                pass
-            doc_md5_code = md5(doc_module)
-            doc_dict[api_name] = doc_md5_code
-
-
-def doc_md5_dict(doc_md5_path):
-    with open(doc_md5_path, "rb") as f:
-        doc_md5 = f.read()
-        doc_md5_dict = ast.literal_eval(doc_md5)
-    return doc_md5_dict
-
-
-def check_doc_md5():
-    for k, v in doc_dict.items():
-        try:
-            if doc_ci_dict[k] != v:
-                return doc_dict
-        except:
-            return doc_dict
-    return True
-
-
-if __name__ == "__main__":
-    doc_dict = {}
-    doc_ci_dict = {}
-    doc_md5_file = "/root/.cache/doc_md5.txt"
-    if not os.path.exists(doc_md5_file):
-        os.mknod(doc_md5_file)
-    else:
-        doc_ci_dict = doc_md5_dict(doc_md5_file)
-    get_module()
-    if not os.path.getsize(doc_md5_file):
-        with open(doc_md5_file, 'w') as f:
-            f.write(str(doc_dict))
-        check_dic = True
-        print(check_dic)
-    else:
-        check_dic = check_doc_md5()
-        print(check_dic)
diff --git a/tools/codestyle/cpplint_pre_commit.hook b/tools/codestyle/cpplint_pre_commit.hook
index aa14d3a2a12208eda11e82d88bc582eb3d2f5893..658008d852123b6eab06d1f13d61ba896e7e9c98 100755
--- a/tools/codestyle/cpplint_pre_commit.hook
+++ b/tools/codestyle/cpplint_pre_commit.hook
@@ -1,10 +1,22 @@
 #!/bin/bash
 
 TOTAL_ERRORS=0
-
+if [[ ! $TRAVIS_BRANCH ]]; then
+  # install cpplint on local machine.
+  if [[ ! $(which cpplint) ]]; then
+    pip install cpplint
+  fi
+  # diff files on local machine. 
+  files=$(git diff --cached --name-status | awk '$1 != "D" {print $2}')
+else
+  # diff files between PR and latest commit on Travis CI. 
+  branch_ref=$(git rev-parse "$TRAVIS_BRANCH")
+  head_ref=$(git rev-parse HEAD)
+  files=$(git diff --name-status $branch_ref $head_ref | awk '$1 != "D" {print $2}')
+fi
 # The trick to remove deleted files: https://stackoverflow.com/a/2413151
-for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
-    if [[ $file =~ ^(paddle/legacy/api/.*|paddle/legacy/capi/.*|paddle/contrib/.*|paddle/legacy/cuda/.*|paddle/legacy/function/.*|paddle/legacy/gserver/.*|paddle/legacy/math/.*|paddle/legacy/optimizer/.*|paddle/legacy/parameter/.*|paddle/legacy/pserver/.*|paddle/legacy/trainer/.*|paddle/legacy/utils/.*|paddle/testing/TestUtil.*|patches/grpc/.*) ]]; then
+for file in $files; do
+    if [[ $file =~ ^(patches/grpc/.*) ]]; then
         continue;
     else
         cpplint --filter=-readability/fn_size $file;
@@ -13,4 +25,3 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
 done
 
 exit $TOTAL_ERRORS
-
diff --git a/tools/diff_api.py b/tools/diff_api.py
index 97c739ed2a5627ad9fd326f206976a4579dc26a3..ec51711d68a155dabdf3125d43fc35bab0b0c944 100644
--- a/tools/diff_api.py
+++ b/tools/diff_api.py
@@ -26,4 +26,10 @@ for each_diff in result:
         print(each_diff)
 
 if error:
+    print(
+        '''If you modify/add/delete the API files, including code and comment, please follow these steps in order to pass the CI:
+    1. cd ${paddle_path}, compile paddle;
+    2. pip install build/python/dist/(build whl package);
+    3. run "python tools/print_signatures.py paddle.fluid, paddle.reader > paddle/fluid/API.spec"'''
+    )
     sys.exit(1)
diff --git a/tools/print_signatures.py b/tools/print_signatures.py
index 7e61dde0a446cf5bfe656105ffd2472f03576f05..c56f30f724ca9f183d6c5cac427411b7711739a4 100644
--- a/tools/print_signatures.py
+++ b/tools/print_signatures.py
@@ -24,12 +24,19 @@ import inspect
 import collections
 import sys
 import pydoc
+import hashlib
 
 member_dict = collections.OrderedDict()
 
 experimental_namespace = {"paddle.fluid.imperative"}
 
 
+def md5(doc):
+    hash = hashlib.md5()
+    hash.update(str(doc).encode('utf-8'))
+    return hash.hexdigest()
+
+
 def visit_member(parent_name, member):
     cur_name = ".".join([parent_name, member.__name__])
     if inspect.isclass(member):
@@ -39,7 +46,10 @@ def visit_member(parent_name, member):
                 visit_member(cur_name, value)
     elif callable(member):
         try:
-            member_dict[cur_name] = inspect.getargspec(member)
+            doc = ('document', md5(member.__doc__))
+            args = inspect.getargspec(member)
+            all = (args, doc)
+            member_dict[cur_name] = all
         except TypeError:  # special for PyBind method
             member_dict[cur_name] = "  ".join([
                 line.strip() for line in pydoc.render_doc(member).split('\n')