Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into rewrite_allocation

test=develop

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into rewrite_allocation
test=develop
c8f6e70a · Yu Yang · e5c4cf61 · 9be99b14 · c8f6e70a · c8f6e70a
252 changed file
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -43,6 +43,7 @@
 | qingqing01 | Qing-Qing Dang |
 | reyoung | Yang Yu |
 | Superjom | Chun-Wei Yan |
+| tensor-tang | Jian Tang |
 | tianbingsz | Tian-Bing Xu |
 | tpatejko | Tomasz Patejko |
 | typhoonzero | Yi Wu |

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,11 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
        "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 if(WIN32)
    set(CMAKE_STATIC_LIBRARY_PREFIX lib)
+    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
 endif(WIN32)
 if(NOT CMAKE_CROSSCOMPILING)
@@ -41,6 +46,7 @@ option(WITH_GPU         "Compile PaddlePaddle with NVIDIA GPU"          ${CUDA_F
 option(WITH_AMD_GPU     "Compile PaddlePaddle with AMD GPU"             OFF)
 option(WITH_AVX         "Compile PaddlePaddle with AVX intrinsics"      ${AVX_FOUND})
 option(WITH_MKL         "Compile PaddlePaddle with MKL support."        ${AVX_FOUND})
+option(WITH_NGRAPH      "Compile PaddlePaddle with nGraph support."     OFF)
 option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
 option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        OFF)
 option(WITH_SWIG_PY     "Compile PaddlePaddle with inference api"       ON)
@@ -65,6 +71,8 @@ option(WITH_ARM_FP16    "Use half precision support on armv8.2-a cpu"   OFF)
 option(WITH_CONTRIB     "Compile the third-party contributation"        OFF)
 option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
 option(WITH_ANAKIN      "Compile with Anakin library"                   OFF)
+option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
+option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
 option(WITH_GRPC     "Use grpc as the default rpc framework"            ${WITH_DISTRIBUTE})
 option(WITH_BRPC_RDMA     "Use brpc rdma as the rpc protocal"           OFF)
 option(ON_INFER         "Turn on inference optimization."               OFF)
@@ -103,6 +111,8 @@ if(ANDROID OR IOS)
        "Disable RDMA when cross-compiling for Android and iOS" FORCE)
    set(WITH_MKL OFF CACHE STRING
        "Disable MKL when cross-compiling for Android and iOS" FORCE)
+    set(WITH_NGRAPH OFF CACHE STRING
+        "Disable nGraph when cross-compiling for Android and iOS" FORCE)
    set(WITH_GOLANG OFF CACHE STRING
        "Disable golang when cross-compiling for Android and iOS" FORCE)
@@ -171,6 +181,7 @@ include(external/protobuf)  # download, build, install protobuf
 include(external/python)    # download, build, install python
 include(external/openblas)  # download, build, install openblas
 include(external/mkldnn)    # download, build, install mkldnn
+include(external/ngraph)    # download, build, install nGraph
 include(external/swig)      # download, build, install swig
 include(external/boost)     # download boost
 include(external/any)       # download libn::any

--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
 if(NOT WITH_DSO)
    # TODO(panyx0718): CUPTI only allows DSO?
    list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
+    if(WIN32)
+      set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
+    endif(WIN32)
 endif(NOT WITH_DSO)
 # setting nvcc arch flags
@@ -196,10 +199,12 @@ elseif(CMAKE_BUILD_TYPE  STREQUAL "MinSizeRel")
    list(APPEND CUDA_NVCC_FLAGS  ${CMAKE_CXX_FLAGS_RELEASE})
 endif()
 else(NOT WIN32)
-if(CMAKE_BUILD_TYPE STREQUAL "Release")
+if(CMAKE_BUILD_TYPE  STREQUAL "Debug")
+    list(APPEND CUDA_NVCC_FLAGS  "-g -G")
+elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
  list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG")
 else()
-  message(FATAL "Windows only support Release build now. Please set visual studio build type to Release, x64 build.")
+  message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.")
 endif()
 endif(NOT WIN32)

--- a/cmake/cudnn.cmake
+++ b/cmake/cudnn.cmake
@@ -2,7 +2,12 @@ if(NOT WITH_GPU)
    return()
 endif()
-set(CUDNN_ROOT "/usr" CACHE PATH "CUDNN ROOT")
+if(WIN32)
+    set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
+else(WIN32)
+    set(CUDNN_ROOT "/usr" CACHE PATH "CUDNN ROOT")
+endif(WIN32)
 find_path(CUDNN_INCLUDE_DIR cudnn.h
    PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include
    $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE}

--- a/cmake/external/anakin.cmake
+++ b/cmake/external/anakin.cmake
@@ -58,19 +58,21 @@ ExternalProject_Add(
                        -DPROTOBUF_ROOT=${THIRD_PARTY_PATH}/install/protobuf
                        -DMKLML_ROOT=${THIRD_PARTY_PATH}/install/mklml
                        -DENABLE_OP_TIMER=${ANAKIN_ENABLE_OP_TIMER}
+                        -DBUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN}
+                        -DBUILD_CROSS_PLANTFORM=${ANAKIN_BUILD_CROSS_PLANTFORM}
                        ${EXTERNAL_OPTIONAL_ARGS}
    CMAKE_CACHE_ARGS    -DCMAKE_INSTALL_PREFIX:PATH=${ANAKIN_INSTALL_DIR}
 )
 message(STATUS "Anakin for inference is enabled")
 message(STATUS "Anakin is set INCLUDE:${ANAKIN_INCLUDE} LIBRARY:${ANAKIN_LIBRARY}")
+add_dependencies(extern_anakin protobuf mklml)
 add_library(anakin_shared SHARED IMPORTED GLOBAL)
 set_property(TARGET anakin_shared PROPERTY IMPORTED_LOCATION ${ANAKIN_SHARED_LIB})
-add_dependencies(anakin_shared extern_anakin protobuf mklml)
+add_dependencies(anakin_shared extern_anakin)
 add_library(anakin_saber SHARED IMPORTED GLOBAL)
 set_property(TARGET anakin_saber PROPERTY IMPORTED_LOCATION ${ANAKIN_SABER_LIB})
-add_dependencies(anakin_saber extern_anakin protobuf mklml)
+add_dependencies(anakin_saber extern_anakin)
 list(APPEND external_project_dependencies anakin_shared anakin_saber)
--- a/cmake/external/boost.cmake
+++ b/cmake/external/boost.cmake
@@ -28,34 +28,28 @@ if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
    set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
    set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
 endif()
-IF (WIN32)
-    MESSAGE(WARNING, "In windows, boost can not be downloaded automaticlly, please build it manually and put it at " ${THIRD_PARTY_PATH}install/boost)
+MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
-else()
-    MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
-ENDIF(WIN32)
 set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
 set(BOOST_DOWNLOAD_DIR  "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
-set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
-set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
+set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}" CACHE PATH "boost include directory." FORCE)
+set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
 include_directories(${BOOST_INCLUDE_DIR})
-if (NOT WIN32)
 ExternalProject_Add(
    ${BOOST_PROJECT}
    ${EXTERNAL_PROJECT_LOG_ARGS}
    DOWNLOAD_DIR          ${BOOST_DOWNLOAD_DIR}
-    DOWNLOAD_COMMAND      wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz
+    URL      ${BOOST_URL}
-    && tar zxf ${BOOST_TAR}.tar.gz
    DOWNLOAD_NO_PROGRESS  1
    PREFIX                ${BOOST_SOURCES_DIR}
    CONFIGURE_COMMAND     ""
    BUILD_COMMAND         ""
    INSTALL_COMMAND       ""
    UPDATE_COMMAND        ""
-)
+    )
-endif(NOT WIN32)
 if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32)
    set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)

--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@@ -35,7 +35,12 @@ ExternalProject_Add(
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                    -DBUILD_STATIC_LIBS=ON
                    -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DBUILD_TESTING=OFF
@@ -48,8 +53,8 @@ ExternalProject_Add(
 IF(WIN32)
  IF(NOT EXISTS "${GFLAGS_INSTALL_DIR}/lib/libgflags.lib")
    add_custom_command(TARGET extern_gflags POST_BUILD
-    COMMAND cmake -E rename ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib ${GFLAGS_INSTALL_DIR}/lib/libgflags.lib
+            COMMAND cmake -E copy ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib ${GFLAGS_INSTALL_DIR}/lib/libgflags.lib
-  )
+            )
  ENDIF()
 ENDIF(WIN32)
 ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)

--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@@ -46,7 +46,11 @@ ExternalProject_Add(
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
                    -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
                    -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
@@ -63,7 +67,7 @@ ExternalProject_Add(
 IF(WIN32)
  IF(NOT EXISTS "${GLOG_INSTALL_DIR}/lib/libglog.lib")
    add_custom_command(TARGET extern_glog POST_BUILD
-    COMMAND cmake -E rename ${GLOG_INSTALL_DIR}/lib/glog.lib ${GLOG_INSTALL_DIR}/lib/libglog.lib
+    COMMAND cmake -E copy ${GLOG_INSTALL_DIR}/lib/glog.lib ${GLOG_INSTALL_DIR}/lib/libglog.lib
  )
  ENDIF()
 ENDIF(WIN32)

--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -37,7 +37,6 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
 INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) # For MKLDNN code to include internal headers.
-INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include mkldnn.h
 IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
    SET(MKLDNN_DEPENDS   ${MKLML_PROJECT})

--- a/cmake/external/ngraph.cmake
+++ b/cmake/external/ngraph.cmake
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+add_library(ngraph INTERFACE)
+IF(WIN32 OR APPLE)
+    MESSAGE(WARNING
+        "Windows or Mac is not supported with nGraph in Paddle yet."
+        "Force WITH_NGRAPH=OFF")
+    SET(WITH_NGRAPH OFF CACHE STRING "Disable nGraph in Windows and MacOS" FORCE)
+ENDIF()
+IF(${WITH_NGRAPH} AND NOT ${WITH_MKLDNN})
+    MESSAGE(WARNING
+        "nGraph needs mkl-dnn to be enabled."
+        "Force WITH_NGRAPH=OFF")
+    SET(WITH_NGRAPH OFF CACHE STRING "Disable nGraph if mkl-dnn is disabled" FORCE)
+ENDIF()
+IF(NOT ${WITH_NGRAPH})
+    return()
+ENDIF()
+INCLUDE(ExternalProject)
+SET(NGRAPH_PROJECT         "extern_ngraph")
+SET(NGRAPH_VERSION         "0.9")
+SET(NGRAPH_GIT_TAG         "f9fd9d4cc318dc59dd4b68448e7fbb5f67a28bd0")
+SET(NGRAPH_SOURCES_DIR     ${THIRD_PARTY_PATH}/ngraph)
+SET(NGRAPH_INSTALL_DIR     ${THIRD_PARTY_PATH}/install/ngraph)
+SET(NGRAPH_INC_DIR         ${NGRAPH_INSTALL_DIR}/include)
+SET(NGRAPH_SHARED_LIB_NAME libngraph.so.${NGRAPH_VERSION})
+SET(NGRAPH_CPU_LIB_NAME    libcpu_backend.so)
+SET(NGRAPH_TBB_LIB_NAME    libtbb.so.2)
+SET(NGRAPH_GIT_REPO        "https://github.com/NervanaSystems/ngraph.git")
+ExternalProject_Add(
+    ${NGRAPH_PROJECT}
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    DEPENDS             ${MKLDNN_PROJECT} ${MKLML_PROJECT}
+    GIT_REPOSITORY      ${NGRAPH_GIT_REPO}
+    GIT_TAG             ${NGRAPH_GIT_TAG}
+    PREFIX              ${NGRAPH_SOURCES_DIR}
+    UPDATE_COMMAND      ""
+    CMAKE_ARGS          -DCMAKE_INSTALL_PREFIX=${NGRAPH_INSTALL_DIR}
+    CMAKE_ARGS          -DNGRAPH_UNIT_TEST_ENABLE=FALSE
+    CMAKE_ARGS          -DNGRAPH_TOOLS_ENABLE=FALSE
+    CMAKE_ARGS          -DNGRAPH_INTERPRETER_ENABLE=FALSE
+    CMAKE_ARGS          -DNGRAPH_DEX_ONLY=TRUE
+    CMAKE_ARGS          -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+    CMAKE_ARGS          -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
+    CMAKE_ARGS          -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
+)
+if(UNIX AND NOT APPLE)
+    include(GNUInstallDirs)
+    SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR})
+else()
+    SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/lib)
+endif()
+MESSAGE(STATUS "nGraph lib will be installed at: ${NGRAPH_LIB_DIR}")
+SET(NGRAPH_SHARED_LIB      ${NGRAPH_LIB_DIR}/${NGRAPH_SHARED_LIB_NAME})
+SET(NGRAPH_CPU_LIB         ${NGRAPH_LIB_DIR}/${NGRAPH_CPU_LIB_NAME})
+SET(NGRAPH_TBB_LIB         ${NGRAPH_LIB_DIR}/${NGRAPH_TBB_LIB_NAME})
+# Workaround for nGraph expecting mklml to be in mkldnn install directory.
+ExternalProject_Add_Step(
+    ${NGRAPH_PROJECT}
+    PrepareMKL
+    COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_LIB} ${MKLDNN_INSTALL_DIR}/lib/libmklml_intel.so
+    COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_IOMP_LIB} ${MKLDNN_INSTALL_DIR}/lib/libiomp5.so
+    DEPENDEES download
+    DEPENDERS configure
+)
+add_dependencies(ngraph ${NGRAPH_PROJECT})
+target_compile_definitions(ngraph INTERFACE -DPADDLE_WITH_NGRAPH)
+target_include_directories(ngraph INTERFACE ${NGRAPH_INC_DIR})
+target_link_libraries(ngraph INTERFACE ${NGRAPH_SHARED_LIB})
+LIST(APPEND external_project_dependencies ngraph)
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -17,12 +17,8 @@ IF(USE_EIGEN_FOR_BLAS)
 ENDIF(USE_EIGEN_FOR_BLAS)
 INCLUDE(cblas)
-# IF(WIN32 AND NOT ${CBLAS_FOUND})
 IF(NOT ${CBLAS_FOUND})
    INCLUDE(ExternalProject)
    SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
@@ -34,6 +30,7 @@ IF(NOT ${CBLAS_FOUND})
        CACHE FILEPATH "openblas library." FORCE)
    ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS)
    IF (WIN32)
        SET(CBLAS_FOUND true)
        MESSAGE(WARNING, "In windows, openblas only support msvc build, please build it manually and put it at " ${CBLAS_INSTALL_DIR})

--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@@ -30,66 +30,61 @@ UNSET_VAR(PROTOBUF_LITE_LIBRARY)
 UNSET_VAR(PROTOBUF_LIBRARY)
 UNSET_VAR(PROTOBUF_INCLUDE_DIR)
 UNSET_VAR(Protobuf_PROTOC_EXECUTABLE)
+function(protobuf_generate_python SRCS)
+    # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
+    if(NOT ARGN)
+        message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files")
+        return()
+    endif()
-if(NOT COMMAND protobuf_generate_python)  # before cmake 3.4, protobuf_genrerate_python is not defined.
+    if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
-    function(protobuf_generate_python SRCS)
+        # Create an include path for each file specified
-        # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
-        if(NOT ARGN)
-            message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files")
-            return()
-        endif()
-        if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
-            # Create an include path for each file specified
-            foreach(FIL ${ARGN})
-                get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
-                get_filename_component(ABS_PATH ${ABS_FIL} PATH)
-                list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
-                if(${_contains_already} EQUAL -1)
-                    list(APPEND _protobuf_include_path -I ${ABS_PATH})
-                endif()
-            endforeach()
-        else()
-            set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
-        endif()
-        if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
-            set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
-        endif()
-        if(DEFINED Protobuf_IMPORT_DIRS)
-            foreach(DIR ${Protobuf_IMPORT_DIRS})
-                get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
-                list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
-                if(${_contains_already} EQUAL -1)
-                    list(APPEND _protobuf_include_path -I ${ABS_PATH})
-                endif()
-            endforeach()
-        endif()
-        set(${SRCS})
        foreach(FIL ${ARGN})
            get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
-            get_filename_component(FIL_WE ${FIL} NAME_WE)
+            get_filename_component(ABS_PATH ${ABS_FIL} PATH)
-            if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH)
+            list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
-                get_filename_component(FIL_DIR ${FIL} DIRECTORY)
+            if(${_contains_already} EQUAL -1)
-                if(FIL_DIR)
+                list(APPEND _protobuf_include_path -I ${ABS_PATH})
-                    set(FIL_WE "${FIL_DIR}/${FIL_WE}")
-                endif()
            endif()
+        endforeach()
+    else()
+        set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
+    endif()
+    if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
+        set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
+    endif()
-            list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py")
+    if(DEFINED Protobuf_IMPORT_DIRS)
-            add_custom_command(
+        foreach(DIR ${Protobuf_IMPORT_DIRS})
-                    OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py"
+            get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
-                    COMMAND  ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL}
+            list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
-                    DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
+            if(${_contains_already} EQUAL -1)
-                    COMMENT "Running Python protocol buffer compiler on ${FIL}"
+                list(APPEND _protobuf_include_path -I ${ABS_PATH})
-                    VERBATIM )
+            endif()
        endforeach()
+    endif()
-        set(${SRCS} ${${SRCS}} PARENT_SCOPE)
+    set(${SRCS})
-    endfunction()
+    foreach(FIL ${ARGN})
-endif()
+        get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+        get_filename_component(FIL_WE ${FIL} NAME_WE)
+        if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH)
+            get_filename_component(FIL_DIR ${FIL} DIRECTORY)
+            if(FIL_DIR)
+                set(FIL_WE "${FIL_DIR}/${FIL_WE}")
+            endif()
+        endif()
+        list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py")
+        add_custom_command(
+                OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py"
+                COMMAND  ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL}
+                DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE}
+                COMMENT "Running Python protocol buffer compiler on ${FIL}"
+                VERBATIM )
+    endforeach()
+    set(${SRCS} ${${SRCS}} PARENT_SCOPE)
+endfunction()
 # Print and set the protobuf library information,
 # finish this cmake process and exit from this file.
@@ -126,6 +121,7 @@ macro(PROMPT_PROTOBUF_LIB)
    # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`.
    # make `protobuf_generate_cpp` happy.
    SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE})
    FOREACH(dep ${protobuf_DEPS})
        ADD_DEPENDENCIES(protobuf ${dep})
        ADD_DEPENDENCIES(protobuf_lite ${dep})
@@ -144,7 +140,6 @@ endmacro()
 set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf")
 IF (WIN32)
    SET(PROTOBUF_ROOT ${THIRD_PARTY_PATH}/install/protobuf)
-    MESSAGE(WARNING, "In windows, protobuf only support msvc build, please build it manually and put it at " ${PROTOBUF_ROOT})
 ENDIF(WIN32)
 if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
@@ -192,13 +187,20 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
        SET(OPTIONAL_ARGS
            "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
            "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
-            "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
            "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
+            "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
+            "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}"
+            "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
+            "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
+            "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
            "-Dprotobuf_WITH_ZLIB=ON"
            "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}"
            ${EXTERNAL_OPTIONAL_ARGS})
        SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
    ENDIF()
+    IF(WIN32)
+        SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64")
+    ENDIF()
    SET(PROTOBUF_REPO "https://github.com/google/protobuf.git")
    SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")

--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
@@ -21,6 +21,48 @@ INCLUDE(python_module)
 FIND_PACKAGE(PythonInterp ${PY_VERSION})
 FIND_PACKAGE(PythonLibs ${PY_VERSION})
+if(WIN32)
+    execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+"from distutils import sysconfig as s;import sys;import struct;
+print(sys.prefix);
+print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+"
+            RESULT_VARIABLE _PYTHON_SUCCESS
+            OUTPUT_VARIABLE _PYTHON_VALUES
+            ERROR_VARIABLE _PYTHON_ERROR_VALUE)
+    if(NOT _PYTHON_SUCCESS MATCHES 0)
+        set(PYTHONLIBS_FOUND FALSE)
+        return()
+    endif()
+    # Convert the process output into a list
+    string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
+    string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
+    list(GET _PYTHON_VALUES 0 PYTHON_PREFIX)
+    list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX)
+    # Make sure all directory separators are '/'
+    string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
+    set(PYTHON_LIBRARY
+            "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
+    # original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
+        set(PYTHON_LIBRARY
+                "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    endif()
+    # raise an error if the python libs are still not found.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        message(FATAL_ERROR "Python libraries not found")
+    endif()
+    SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
+endif(WIN32)
 # Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
 ADD_LIBRARY(python SHARED IMPORTED GLOBAL)
 SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES})

--- a/cmake/external/xxhash.cmake
+++ b/cmake/external/xxhash.cmake
@@ -14,23 +14,52 @@ ELSE()
  ENDIF(APPLE)
 ENDIF()
-ExternalProject_Add(
+if(WIN32)
-    extern_xxhash
+  ExternalProject_Add(
-    ${EXTERNAL_PROJECT_LOG_ARGS}
+          extern_xxhash
-    GIT_REPOSITORY  "https://github.com/Cyan4973/xxHash"
+          ${EXTERNAL_PROJECT_LOG_ARGS}
-    GIT_TAG         "v0.6.5"
+          GIT_REPOSITORY  "https://github.com/Cyan4973/xxHash"
-    PREFIX          ${XXHASH_SOURCE_DIR}
+          GIT_TAG         "v0.6.5"
-    DOWNLOAD_NAME   "xxhash"
+          PREFIX          ${XXHASH_SOURCE_DIR}
-    UPDATE_COMMAND  ""
+          DOWNLOAD_NAME   "xxhash"
-    CONFIGURE_COMMAND ""
+          UPDATE_COMMAND  ""
-    BUILD_IN_SOURCE 1
+          BUILD_IN_SOURCE 1
-    PATCH_COMMAND
+          PATCH_COMMAND
-    BUILD_COMMAND     ${BUILD_CMD}
+          CONFIGURE_COMMAND
-    INSTALL_COMMAND   export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
+          ${CMAKE_COMMAND} ${XXHASH_SOURCE_DIR}/src/extern_xxhash/cmake_unofficial
-    TEST_COMMAND      ""
+          -DCMAKE_INSTALL_PREFIX:PATH=${XXHASH_INSTALL_DIR}
-)
+          -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+          -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
+          -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
+          -DBUILD_XXHSUM=OFF
+          -DCMAKE_GENERATOR_PLATFORM=x64
+          -DBUILD_SHARED_LIBS=OFF
+          ${OPTIONAL_CACHE_ARGS}
+          TEST_COMMAND      ""
+  )
+else()
+  ExternalProject_Add(
+      extern_xxhash
+      ${EXTERNAL_PROJECT_LOG_ARGS}
+      GIT_REPOSITORY  "https://github.com/Cyan4973/xxHash"
+      GIT_TAG         "v0.6.5"
+      PREFIX          ${XXHASH_SOURCE_DIR}
+      DOWNLOAD_NAME   "xxhash"
+      UPDATE_COMMAND  ""
+      CONFIGURE_COMMAND ""
+      BUILD_IN_SOURCE 1
+      PATCH_COMMAND
+      BUILD_COMMAND     ${BUILD_CMD}
+      INSTALL_COMMAND   export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
+      TEST_COMMAND      ""
+  )
+endif()
-set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
+if (WIN32)
+  set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/xxhash.lib")
+else()
+  set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
+endif ()
 INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
 add_library(xxhash STATIC IMPORTED GLOBAL)

--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -266,7 +266,11 @@ function(cc_library TARGET_NAME)
      if("${cc_library_DEPS};" MATCHES "python;")
        list(REMOVE_ITEM cc_library_DEPS python)
        add_dependencies(${TARGET_NAME} python)
-        target_link_libraries(${TARGET_NAME} "-Wl,-undefined,dynamic_lookup")
+        if(WIN32)
+          target_link_libraries(${TARGET_NAME} ${PYTHON_LIBRARIES})
+        else()
+          target_link_libraries(${TARGET_NAME} "-Wl,-undefined,dynamic_lookup")
+        endif(WIN32)
      endif()
      target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
      add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
@@ -288,6 +292,45 @@ function(cc_library TARGET_NAME)
  endif(cc_library_SRCS)
 endfunction(cc_library)
+# The link operation under windows may exceeds the maximum characters limit, simply break the link command
+# into multiple link opeartion can fix that, say
+# original:
+#     lib /out:target.lib a.lib b.lib c.lib d.lib
+# after:
+#    1. lib /out:dummy_lib_1.lib a.lib b.lib
+#    2. lib /out:dummy_lib_2.lib c.lib d.lib
+#    1. lib /out:target.lib dummy_lib_1.lib dummy_lib_2.lib
+function(sep_library TARGET_NAME)
+  set(options STATIC static SHARED shared)
+  set(oneValueArgs "")
+  set(multiValueArgs SRCS DEPS)
+  cmake_parse_arguments(sep_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+  set(dummy_index 1)
+  set(dummy_offset 1)
+  # the dummy target would be consisted of limit size libraries
+  set(dummy_limit 50)
+  list(LENGTH sep_library_DEPS sep_all_len)
+  foreach(v ${sep_library_DEPS})
+    list(APPEND dummy_list ${v})
+    list(LENGTH dummy_list listlen )
+    if ((${listlen} GREATER ${dummy_limit}) OR (${dummy_offset} EQUAL ${sep_all_len}))
+      message("create dummy library ${TARGET_NAME}_dummy_lib_${dummy_index} for ${TARGET_NAME}")
+      cc_library(${TARGET_NAME}_dummy_lib_${dummy_index} STATIC DEPS ${dummy_list})
+      foreach(i ${dummy_list})
+        list(REMOVE_AT dummy_list 0)
+      endforeach()
+      list(APPEND ${TARGET_NAME}_dummy_list ${TARGET_NAME}_dummy_lib_${dummy_index})
+      MATH(EXPR dummy_index "${dummy_index}+1")
+    endif()
+    MATH(EXPR dummy_offset "${dummy_offset}+1")
+  endforeach()
+  if(${sep_library_SHARED})
+    cc_library(${TARGET_NAME} SHARED SRCS ${sep_library_SRCS} DEPS ${${TARGET_NAME}_dummy_list})
+  else(${sep_library_SHARED})
+    cc_library(${TARGET_NAME} STATIC SRCS ${sep_library_SRCS} DEPS ${${TARGET_NAME}_dummy_list})
+  endif(${sep_library_SHARED})
+endfunction(sep_library)
 function(cc_binary TARGET_NAME)
  set(options "")
  set(oneValueArgs "")

--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -22,175 +22,196 @@ function(copy TARGET)
    list(LENGTH copy_lib_SRCS copy_lib_SRCS_len)
    list(LENGTH copy_lib_DSTS copy_lib_DSTS_len)
-    if(NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len})
+    if (NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len})
        message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers")
-    endif()
+    endif ()
    math(EXPR len "${copy_lib_SRCS_len} - 1")
    add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS})
-    foreach(index RANGE ${len})
+    foreach (index RANGE ${len})
        list(GET copy_lib_SRCS ${index} src)
        list(GET copy_lib_DSTS ${index} dst)
-        add_custom_command(TARGET ${TARGET} PRE_BUILD
+        if (WIN32)
-          COMMAND mkdir -p "${dst}"
+            # windows cmd shell will not expand wildcard automatically.
-          COMMAND cp -r "${src}" "${dst}"
+            # below expand the files,libs and copy them by rules.
-          COMMENT "copying ${src} -> ${dst}")
+            file(GLOB header_files ${src} "*.h")
-    endforeach()
+            file(GLOB static_lib_files ${src} "*.lib")
+            file(GLOB dll_lib_files ${src} "*.dll")
+            set(src_files ${header_files} ${static_lib_files} ${dll_lib_files})
+            if (NOT "${src_files}" STREQUAL "")
+                list(REMOVE_DUPLICATES src_files)
+            endif ()
+            add_custom_command(TARGET ${TARGET} PRE_BUILD
+                    COMMAND ${CMAKE_COMMAND} -E make_directory "${dst}"
+                    )
+            foreach (src_file ${src_files})
+                add_custom_command(TARGET ${TARGET} PRE_BUILD
+                        COMMAND ${CMAKE_COMMAND} -E copy "${src_file}" "${dst}"
+                        COMMENT "copying ${src_file} -> ${dst}")
+            endforeach ()
+        else (WIN32) # not windows
+            add_custom_command(TARGET ${TARGET} PRE_BUILD
+                    COMMAND mkdir -p "${dst}"
+                    COMMAND cp -r "${src}" "${dst}"
+                    COMMENT "copying ${src} -> ${dst}")
+        endif (WIN32) # not windows
+    endforeach ()
 endfunction()
 # third party
 set(dst_dir "${FLUID_INSTALL_DIR}/third_party/eigen3")
 copy(eigen3_lib
-  SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen
+        SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen
-  DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported
+        DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported
-  DEPS eigen3
+        DEPS eigen3
-)
+        )
 set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/gflags")
 copy(gflags_lib
-  SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES}
+        SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES}
-  DSTS ${dst_dir} ${dst_dir}/lib
+        DSTS ${dst_dir} ${dst_dir}/lib
-  DEPS gflags
+        DEPS gflags
-)
+        )
 set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/glog")
 copy(glog_lib
-  SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES}
+        SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES}
-  DSTS ${dst_dir} ${dst_dir}/lib
+        DSTS ${dst_dir} ${dst_dir}/lib
-  DEPS glog
+        DEPS glog
-)
+        )
 set(dst_dir "${FLUID_INSTALL_DIR}/third_party/boost/")
 copy(boost_lib
-  SRCS ${BOOST_INCLUDE_DIR}/boost
+        SRCS ${BOOST_INCLUDE_DIR}/boost
-  DSTS ${dst_dir}
+        DSTS ${dst_dir}
-  DEPS boost
+        DEPS boost
-)
+        )
 set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
 copy(xxhash_lib
-  SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
+        SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
-  DSTS ${dst_dir} ${dst_dir}/lib
+        DSTS ${dst_dir} ${dst_dir}/lib
-  DEPS xxhash
+        DEPS xxhash
-)
+        )
-if(NOT PROTOBUF_FOUND)
+if (NOT PROTOBUF_FOUND)
    set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
    copy(protobuf_lib
-      SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY}
+            SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY}
-      DSTS ${dst_dir} ${dst_dir}/lib
+            DSTS ${dst_dir} ${dst_dir}/lib
-      DEPS extern_protobuf
+            DEPS extern_protobuf
-    )
+            )
-endif()
+endif ()
-if(NOT CBLAS_FOUND)
+if (NOT CBLAS_FOUND)
    set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas")
    copy(openblas_lib
-      SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include
+            SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include
-      DSTS ${dst_dir} ${dst_dir}
+            DSTS ${dst_dir} ${dst_dir}
-      DEPS extern_openblas
+            DEPS extern_openblas
-    )
+            )
 elseif (WITH_MKLML)
    set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mklml")
    copy(mklml_lib
-      SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR}
+            SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR}
-      DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}
+            DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}
-      DEPS mklml
+            DEPS mklml
-    )
+            )
-endif()
+endif ()
-if(WITH_MKLDNN)
+if (WITH_MKLDNN)
-  set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mkldnn")
+    set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mkldnn")
-  copy(mkldnn_lib
+    copy(mkldnn_lib
-    SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
+            SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
-    DSTS ${dst_dir} ${dst_dir}/lib
+            DSTS ${dst_dir} ${dst_dir}/lib
-    DEPS mkldnn
+            DEPS mkldnn
-  )
+            )
-endif()
+endif ()
 if (NOT WIN32)
-if(NOT MOBILE_INFERENCE AND NOT RPI)
+    if (NOT MOBILE_INFERENCE AND NOT RPI)
-  set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")
+        set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")
-  copy(snappy_lib
+        copy(snappy_lib
-    SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
+                SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
-    DSTS ${dst_dir} ${dst_dir}/lib
+                DSTS ${dst_dir} ${dst_dir}/lib
-    DEPS snappy)
+                DEPS snappy)
-  set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream")
+        set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappystream")
-  copy(snappystream_lib
+        copy(snappystream_lib
-    SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
+                SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
-    DSTS ${dst_dir} ${dst_dir}/lib
+                DSTS ${dst_dir} ${dst_dir}/lib
-    DEPS snappystream)
+                DEPS snappystream)
-  set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib")
+        set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib")
-  copy(zlib_lib
+        copy(zlib_lib
-    SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
+                SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
-    DSTS ${dst_dir} ${dst_dir}/lib
+                DSTS ${dst_dir} ${dst_dir}/lib
-    DEPS zlib)
+                DEPS zlib)
-endif()
+    endif ()
-endif(NOT WIN32)
+endif (NOT WIN32)
 # paddle fluid module
 set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
 set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
 set(module "framework")
 if (NOT WIN32)
-set(framework_lib_deps framework_py_proto)
+    set(framework_lib_deps framework_py_proto)
-endif(NOT WIN32)
+endif (NOT WIN32)
 copy(framework_lib DEPS ${framework_lib_deps}
-  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
+        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
-       ${src_dir}/${module}/ir/*.h
+        ${src_dir}/${module}/ir/*.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}/ir
+        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}/ir
-)
+        )
 set(module "memory")
 copy(memory_lib
-  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h
+        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail
+        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail
-)
+        )
 set(inference_deps paddle_fluid_shared paddle_fluid)
 set(module "inference/api")
 if (WITH_ANAKIN AND WITH_MKL)
    copy(anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
-        SRCS
+            SRCS
-        ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
+            ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
-        ${ANAKIN_INSTALL_DIR} # anakin release
+            ${ANAKIN_INSTALL_DIR} # anakin release
-        DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
+            DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
-     list(APPEND inference_deps anakin_inference_lib)
+    list(APPEND inference_deps anakin_inference_lib)
-endif()
+endif ()
 set(module "inference")
 copy(inference_lib DEPS ${inference_deps}
  SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
-       ${src_dir}/${module}/api/paddle_inference_api.h
+       ${src_dir}/${module}/api/paddle_*.h
       ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
-)
+        )
 set(module "platform")
 copy(platform_lib DEPS profiler_py_proto
-  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
+        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details
+        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details
-)
+        )
 set(module "string")
 copy(string_lib
-  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/tinyformat/*.h
+        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/tinyformat/*.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat
+        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat
-)
+        )
 set(module "pybind")
 copy(pybind_lib
-  SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h
+        SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h
-  DSTS ${dst_dir}/${module}
+        DSTS ${dst_dir}/${module}
-)
+        )
 # CMakeCache Info
 copy(cmake_cache
-  SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
+        SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
-  DSTS ${FLUID_INSTALL_DIR})
+        DSTS ${FLUID_INSTALL_DIR})
 # This command generates a complete fluid library for both train and inference
 add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
@@ -198,14 +219,14 @@ add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
 # Following commands generate a inference-only fluid library
 # third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
 copy(third_party DEPS fluid_lib_dist
-  SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt
+        SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt
-  DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
+        DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
-)
+        )
-# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
+# only need libpaddle_fluid.so/a and paddle_*.h for inference-only library
 copy(inference_api_lib DEPS fluid_lib_dist
  SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
-       ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
+       ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_*.h
  DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
 )
@@ -213,20 +234,20 @@ add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)
 # paddle fluid version
 function(version version_file)
-  execute_process(
+    execute_process(
-    COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
+            COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
-    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
+            WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
-    OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
+            OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
-  file(WRITE ${version_file}
+    file(WRITE ${version_file}
-    "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
+            "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
-    "WITH_MKL: ${WITH_MKL}\n"
+            "WITH_MKL: ${WITH_MKL}\n"
-    "WITH_MKLDNN: ${WITH_MKLDNN}\n"
+            "WITH_MKLDNN: ${WITH_MKLDNN}\n"
-    "WITH_GPU: ${WITH_GPU}\n")
+            "WITH_GPU: ${WITH_GPU}\n")
-  if(WITH_GPU)
+    if (WITH_GPU)
-    file(APPEND ${version_file}
+        file(APPEND ${version_file}
-      "CUDA version: ${CUDA_VERSION}\n"
+                "CUDA version: ${CUDA_VERSION}\n"
-      "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
+                "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
-  endif()
+    endif ()
 endfunction()
 version(${FLUID_INSTALL_DIR}/version.txt)
 version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt)
--- a/cmake/tensorrt.cmake
+++ b/cmake/tensorrt.cmake
@@ -34,4 +34,5 @@ if(TENSORRT_FOUND)
        "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
    include_directories(${TENSORRT_INCLUDE_DIR})
    list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY})
+    add_definitions(-DPADDLE_WITH_TENSORRT)
 endif()
--- a/doc/v2/dev/contribute_to_paddle_en.md
+++ b/doc/v2/dev/contribute_to_paddle_en.md
 ../../../CONTRIBUTING.md
\ No newline at end of file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -103,7 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
 paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
 paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
-paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode'], varargs=None, keywords=None, defaults=(False, -100, False))
+paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
 paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
@@ -184,6 +184,7 @@ paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name']
 paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
 paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
 paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
 paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@@ -273,6 +274,7 @@ paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, k
 paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False))
+paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, None))
 paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False))
 paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None))

--- a/paddle/fluid/CMakeLists.txt
+++ b/paddle/fluid/CMakeLists.txt
@@ -4,11 +4,12 @@ add_subdirectory(framework)
 add_subdirectory(operators)
 add_subdirectory(string)
-if (NOT WIN32)
 add_subdirectory(pybind)
+if (NOT WIN32)
 add_subdirectory(recordio)
 endif(NOT WIN32)
 # NOTE: please add subdirectory inference at last.
 add_subdirectory(inference)
 add_subdirectory(train)
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -136,20 +136,32 @@ cc_library(version SRCS version.cc)
 cc_test(version_test SRCS version_test.cc DEPS version)
 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
+cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto)
+if(NOT WIN32)
+cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
+  shape_inference data_transform lod_tensor profiler)
+endif(NOT WIN32)
 cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
 nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
-if (NOT WIN32)
 py_proto_compile(framework_py_proto SRCS framework.proto)
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
-add_custom_command(TARGET framework_py_proto POST_BUILD
+if (NOT WIN32)
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
+  add_custom_command(TARGET framework_py_proto POST_BUILD
-    COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
+      COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
-    COMMENT "Copy generated python proto into directory paddle/fluid/proto."
+      COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+      COMMENT "Copy generated python proto into directory paddle/fluid/proto."
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+else(NOT WIN32)
+  string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/")
+  add_custom_command(TARGET framework_py_proto POST_BUILD
+          COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
+          COMMAND copy /Y *.py ${proto_dstpath}
+          COMMENT "Copy generated python proto into directory paddle/fluid/proto."
+          WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
 endif(NOT WIN32)
 cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
@@ -163,10 +175,14 @@ if(WITH_DISTRIBUTE)
  set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
  set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
 else()
-  cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
+  if(NOT WIN32)
+    cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph_operator)
+  else(NOT WIN32)
+    cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass)
+  endif(NOT WIN32)
  cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
 endif()
 if (NOT WIN32)
 cc_library(parallel_executor SRCS parallel_executor.cc DEPS
        threaded_ssa_graph_executor scope_buffered_ssa_graph_executor

--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@@ -79,9 +79,15 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
  BuildStrategy strategy_;
 };
-std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy()
+std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy(
-    const {
+    bool finalize_strategy) const {
+  if (is_finalized_) {
+    return pass_builder_;
+  }
  pass_builder_.reset(new ParallelExecutorPassBuilder(*this));
+  if (finalize_strategy) {
+    is_finalized_ = true;
+  }
  return pass_builder_;
 }
@@ -95,10 +101,8 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
 #else
    const bool use_cuda) const {
 #endif
-  // Create a default one if not initialized by user.
+  // Create a default one if not finalized by user.
-  if (!pass_builder_) {
+  CreatePassesFromStrategy(false);
-    CreatePassesFromStrategy();
-  }
  std::unique_ptr<ir::Graph> graph(new ir::Graph(main_program));

--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@@ -75,12 +75,20 @@ struct BuildStrategy {
  bool remove_unnecessary_lock_{false};
+  // NOTE:
+  // Before you add new options, think if it's a general strategy that works
+  // with other strategy. If not, the strategy should be created through
+  // CreatePassesFromStrategy and the pass can be managed separately.
  // User normally doesn't need to call this API.
  // The PassBuilder allows for more customized insert, remove of passes
  // from python side.
  // A new PassBuilder is created based on configs defined above and
  // passes are owned by the PassBuilder.
-  std::shared_ptr<ir::PassBuilder> CreatePassesFromStrategy() const;
+  std::shared_ptr<ir::PassBuilder> CreatePassesFromStrategy(
+      bool finalize_strategy) const;
+  bool IsFinalized() const { return is_finalized_; }
  // Apply the passes built by the pass_builder_. The passes will be
  // applied to the Program and output an ir::Graph.
@@ -97,6 +105,7 @@ struct BuildStrategy {
 #endif
 private:
+  mutable bool is_finalized_ = false;
  mutable std::shared_ptr<ir::PassBuilder> pass_builder_;
 };

--- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc
@@ -30,8 +30,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
      local_scopes_(local_scopes),
      places_(places),
      graph_(std::move(graph)),
-      pool_(strategy.num_threads_ +
+      pool_(strategy.num_threads_),
-            1),  // add one more thread for generate op_deps
+      prepare_pool_(1),  // add one more thread for generate op_deps
      fetch_ctxs_(places) {
  for (auto &op : ir::FilterByNodeWrapper<OpHandleBase>(*graph_)) {
    int dep = static_cast<int>(op->NotReadyInputSize());
@@ -160,7 +160,7 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
  });
 }
 void FastThreadedSSAGraphExecutor::PrepareAtomicOpDeps() {
-  atomic_op_deps_ = pool_.enqueue([&] {
+  atomic_op_deps_ = prepare_pool_.enqueue([&] {
    auto *op_deps = new std::unordered_map<OpHandleBase *, std::atomic<int>>;
    for (auto &pair : op_deps_) {
      (*op_deps)[pair.first] = pair.second;

--- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h
@@ -46,6 +46,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
  std::vector<OpHandleBase *> bootstrap_ops_;
  ::ThreadPool pool_;
+  ::ThreadPool prepare_pool_;
  platform::DeviceContextPool fetch_ctxs_;
  std::atomic<int> remaining_;

--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
+#include "paddle/fluid/framework/ngraph_operator.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/operators/detail/macros.h"
@@ -25,6 +26,7 @@ limitations under the License. */
 DECLARE_bool(benchmark);
 DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
+DEFINE_bool(use_ngraph, false, "Use NGRAPH to run");
 namespace paddle {
 namespace framework {
@@ -81,6 +83,24 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
  }
 }
+static void EnableFusedOp(ExecutorPrepareContext* ctx) {
+#ifdef PADDLE_WITH_NGRAPH
+  VLOG(3) << "use_ngraph=True";
+  auto intervals = FusedOperator::FusedOpIntervals(&ctx->ops_);
+  for (auto& interval : intervals) {
+    auto* fused_op = new FusedOperator(ctx->prog_, ctx->block_id_,
+                                       interval.at(0), interval.at(1));
+    *interval[0] = std::unique_ptr<OperatorBase>(fused_op);
+  }
+  for (auto it = intervals.rbegin(); it != intervals.rend(); ++it) {
+    ctx->ops_.erase(it->at(0) + 1, it->at(1));
+  }
+#else
+  LOG(WARNING)
+      << "'NGRAPH' is not supported, Please re-compile with WITH_NGRAPH option";
+#endif
+}
 Executor::Executor(const platform::Place& place) : place_(place) {}
 void Executor::Close() {
@@ -338,6 +358,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
  for (auto& op_desc : block.AllOps()) {
    ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc));
  }
+  if (FLAGS_use_ngraph) EnableFusedOp(ctx.get());
  return ctx;
 }
@@ -359,6 +380,7 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
 void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
                                  bool create_local_scope, bool create_vars,
                                  bool keep_kids) {
+  PADDLE_ENFORCE_NOT_NULL(scope);
  Scope* local_scope = scope;
  if (create_vars) {
    if (create_local_scope) {
@@ -473,6 +495,5 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
      << "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
 #endif
 }
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/garbage_collector.h
+++ b/paddle/fluid/framework/garbage_collector.h
@@ -29,7 +29,7 @@ template <typename T>
 class GarbageCollector {
 public:
  GarbageCollector(const platform::Place &place, size_t max_memory_size)
-      : max_memory_size_(std::max(max_memory_size, static_cast<size_t>(1))) {
+      : max_memory_size_((std::max)(max_memory_size, static_cast<size_t>(1))) {
    garbages_.reset(new std::deque<T *>());
    dev_ctx_ = platform::DeviceContextPool::Instance().Get(place);
  }

--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -5,6 +5,7 @@ file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
 # Usage: pass_library(target inference) will append to paddle_inference_pass.h
+unset(INFER_IR_PASSES CACHE) # clear the global variable
 function(pass_library TARGET DEST)
    set(options "")
    set(oneValueArgs "")
@@ -15,10 +16,11 @@ function(pass_library TARGET DEST)
    if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference")
        message(STATUS "add pass ${TARGET} ${DEST}")
        file(APPEND ${pass_file} "USE_PASS(${TARGET});\n")
-        set(PASS_LIBRARY ${TARGET} ${PASS_LIBRARY} PARENT_SCOPE)
+        set(INFER_IR_PASSES ${INFER_IR_PASSES} ${TARGET} CACHE INTERNAL "")
    endif()
 endfunction()
 cc_library(node SRCS node.cc DEPS proto_desc)
 cc_library(graph SRCS graph.cc DEPS node pretty_log)
 cc_library(graph_helper SRCS graph_helper.cc DEPS graph)

--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@@ -91,10 +91,10 @@ void FindWhileOp(Graph* graph) {
 #undef OP_SET_IN
 #undef OP_SET_OUT
-  auto* X = graph->RetriveNode(34);
+  auto* X = graph->RetrieveNode(34);
-  auto* LSTMOUT = graph->RetriveNode(81);
+  auto* LSTMOUT = graph->RetrieveNode(81);
-  auto* cell_init = graph->RetriveNode(6);
+  auto* cell_init = graph->RetrieveNode(6);
-  auto* hidden_init = graph->RetriveNode(8);
+  auto* hidden_init = graph->RetrieveNode(8);
  auto* lstm_op = graph->CreateOpNode(&op_desc);
  PrepareParameters(graph, param);
@@ -211,12 +211,12 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
  VLOG(30) << "LSTMWeight resized to " << out->dims();
  float* out_data = out->mutable_data<float>(platform::CPUPlace());
-  std::array<const float*, 4> tensors(
+  std::array<const float*, 4> tensors{
-      {{W_forget_w0.data<float>(), W_input_w0.data<float>(),
+      W_forget_w0.data<float>(), W_input_w0.data<float>(),
-        W_output_w0.data<float>(), W_cell_w0.data<float>()}});
+      W_output_w0.data<float>(), W_cell_w0.data<float>()};
-  std::array<const float*, 4> tensors1(
+  std::array<const float*, 4> tensors1{
-      {{W_forget_w1.data<float>(), W_input_w1.data<float>(),
+      W_forget_w1.data<float>(), W_input_w1.data<float>(),
-        W_output_w1.data<float>(), W_cell_w1.data<float>()}});
+      W_output_w1.data<float>(), W_cell_w1.data<float>()};
  for (int row = 0; row < D; row++) {
    for (int col = 0; col < 4; col++) {
@@ -238,9 +238,9 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
 void PrepareLSTMBias(const LoDTensor& B_forget, const LoDTensor& B_input,
                     const LoDTensor& B_output, const LoDTensor& B_cell,
                     LoDTensor* out) {
-  std::array<const float*, 4> tensors(
+  std::array<const float*, 4> tensors{
-      {{B_forget.data<float>(), B_input.data<float>(), B_output.data<float>(),
+      B_forget.data<float>(), B_input.data<float>(), B_output.data<float>(),
-        B_cell.data<float>()}});
+      B_cell.data<float>()};
  PADDLE_ENFORCE_EQ(B_forget.dims().size(), 1);
  int D = B_forget.dims()[0];

--- a/paddle/fluid/framework/ir/fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc
@@ -57,6 +57,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
    desc.SetInput("W", std::vector<std::string>({fc_Y_in}));
    desc.SetInput("Bias", std::vector<std::string>({fc_bias_in}));
    desc.SetOutput("Out", std::vector<std::string>({fc_out_out}));
+    desc.SetAttr("in_num_col_dims", mul->Op()->GetAttr("x_num_col_dims"));
    desc.SetType("fc");
    auto fc_node = g->CreateOpNode(&desc);  // OpDesc will be copied.
    GraphSafeRemoveNodes(graph.get(), {mul, elementwise_add, mul_out});

--- a/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc
@@ -29,6 +29,7 @@ void SetOp(ProgramDesc* prog, const std::string& type,
  if (type == "mul") {
    op->SetInput("X", {inputs[0]});
    op->SetInput("Y", {inputs[1]});
+    op->SetAttr("x_num_col_dims", {1});
  } else if (type == "elementwise_add") {
    op->SetInput("X", inputs);
  }

--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@@ -84,8 +84,6 @@ void CheckProgram(const ProgramDesc &program) {
 Graph::Graph(const ProgramDesc &program) : program_(program) {
  CheckProgram(program_);
-  // Make the nodes id start from 0.
-  Node::ResetId();
  auto var_nodes = InitFromProgram(program_);
  ResolveHazard(var_nodes);
 }

--- a/paddle/fluid/framework/ir/graph.h
+++ b/paddle/fluid/framework/ir/graph.h
@@ -116,13 +116,17 @@ class Graph {
  // Create a normal variable with non-null VarDesc.
  ir::Node *CreateVarNode(VarDesc *var_desc) {
    PADDLE_ENFORCE(var_desc);
-    return AddNode(new ir::Node(var_desc));
+    auto *x = AddNode(new ir::Node(var_desc));
+    x->SetId(num_node_created_++);
+    return x;
  }
  // Create a normal runnable operator with OpDesc.
  ir::Node *CreateOpNode(OpDesc *op_desc) {
    PADDLE_ENFORCE(op_desc);
-    return AddNode(new ir::Node(op_desc));
+    auto *x = AddNode(new ir::Node(op_desc));
+    x->SetId(num_node_created_++);
+    return x;
  }
  // Create a control dependency var that connects 2 operations. The
@@ -132,13 +136,17 @@ class Graph {
    // TODO(panyx0718): control var name should be really unique.
    const std::string name = string::Sprintf(
        "%s@%llu", ir::Node::kControlDepVarName, node_set_.size());
-    return AddNode(new ir::Node(name, ir::Node::Type::kVariable));
+    auto *x = AddNode(new ir::Node(name, ir::Node::Type::kVariable));
+    x->SetId(num_node_created_++);
+    return x;
  }
  // A more free style way of creating a graph node. Mostly use for test
  // or "copy" from another node. Avoid using it if possible.
  ir::Node *CreateEmptyNode(const std::string &name, ir::Node::Type type) {
-    return AddNode(new ir::Node(name, type));
+    auto *x = AddNode(new ir::Node(name, type));
+    x->SetId(num_node_created_++);
+    return x;
  }
  // Clear all node information of the graph and return the ownership of the
@@ -160,7 +168,7 @@ class Graph {
  }
  // NOTE low performance, but simple and secure.
-  Node *RetriveNode(int id) {
+  Node *RetrieveNode(int id) {
    for (auto &node : nodes_) {
      if (node.second->id() == id) {
        return node.second.get();
@@ -169,6 +177,7 @@ class Graph {
    return nullptr;
  }
+  const ProgramDesc &program() const { return program_; }
  std::map<std::string, std::vector<ir::Node *>> InitFromProgram(
      const ProgramDesc &program);
@@ -190,6 +199,7 @@ class Graph {
  std::map<std::string, std::function<void(void)>> attr_dels_;
  std::map<ir::Node *, std::unique_ptr<ir::Node>> nodes_;
  std::unordered_set<ir::Node *> node_set_;
+  size_t num_node_created_{0};  // help to generate a unique node id.
 };
 bool IsControlDepVar(const ir::Node &var);

--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -167,10 +167,12 @@ struct HitGroup {
  bool Match(Node *node, PDNode *pat) {
    if (nodes_.count(node)) {
-      if (!roles.count(pat)) return false;
+      if (roles.count(pat) && roles[pat] == node) return true;
-      return roles[pat] == node;
+      return false;
+    } else {
+      if (roles.count(pat) && roles[pat] != node) return false;
+      return true;
    }
-    return !roles.count(pat) || roles.at(pat) == node;
  }
  void Register(Node *node, PDNode *pat) {
@@ -198,7 +200,6 @@ GraphPatternDetector::DetectPatterns() {
  std::vector<GraphPatternDetector::subgraph_t> result;
  std::vector<HitGroup> init_groups;
  std::array<std::vector<HitGroup>, 2> bi_records;
-  // PADDLE_ENFORCE(!pattern_.edges().empty(), "At least one edge is needed");
  auto *first_pnode = pattern_.edges().empty() ? pattern().nodes().front().get()
                                               : pattern_.edges().front().first;
  if (!pdnodes2nodes_.count(first_pnode)) return result;
@@ -228,11 +229,12 @@ GraphPatternDetector::DetectPatterns() {
        VLOG(80) << "check " << source->id() << " -- " << target->id();
        // TODO(Superjomn) add some prune strategies.
        for (const auto &group : pre_groups) {
-          HitGroup new_group = group;
+          if (IsNodesLink(source, target)) {
-          if (IsNodesLink(source, target) &&
+            HitGroup new_group = group;
-              new_group.Match(source, edge.first)) {
+            bool flag = new_group.Match(source, edge.first) &&
-            new_group.Register(source, edge.first);
+                        new_group.Match(target, edge.second);
-            if (new_group.Match(target, edge.second)) {
+            if (flag) {
+              new_group.Register(source, edge.first);
              new_group.Register(target, edge.second);
              cur_groups.push_back(new_group);
              // TODO(Superjomn) need to unique
@@ -261,14 +263,16 @@ GraphPatternDetector::DetectPatterns() {
  return result;
 }
-bool GraphItemCMP(const std::pair<PDNode *, Node *> &a,
+struct GraphItemLessThan {
+  bool operator()(const std::pair<PDNode *, Node *> &a,
                  const std::pair<PDNode *, Node *> &b) {
-  if (a.first != b.first) {
+    if (a.first != b.first) {
-    return a.first < b.first;
+      return a.first < b.first;
-  } else {
+    } else {
-    return a.second < b.second;
+      return a.second < b.second;
+    }
  }
-}
+};
 // TODO(Superjomn) enhance the function as it marks unique unique as duplicates
 // see https://github.com/PaddlePaddle/Paddle/issues/13550
@@ -282,7 +286,7 @@ void GraphPatternDetector::UniquePatterns(
  for (auto &g : *subgraphs) {
    // Sort the items in the sub-graph, and transform to a string key.
    std::vector<std::pair<PDNode *, Node *>> sorted_keys(g.begin(), g.end());
-    std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemCMP);
+    std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemLessThan());
    std::stringstream ss;
    for (auto &item : sorted_keys) {
      ss << item.first << ":" << item.second;

--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -310,8 +310,8 @@ void GraphSafeRemoveNodes(Graph* graph,
                          const std::unordered_set<const Node*>& nodes);
 // Some pre-defined patterns those can be reused in multiple passes.
-// The related Fluid Layer or Op should be one pattern here for better reusage
+// The related Fluid Layer or Op should be one pattern here for better re-usage
-// accross different fusion.
+// across different fusion.
 namespace patterns {
 struct KeyCounter {

--- a/paddle/fluid/framework/ir/graph_to_program_pass.cc
+++ b/paddle/fluid/framework/ir/graph_to_program_pass.cc
@@ -35,10 +35,11 @@ std::unique_ptr<Graph> GraphToProgramPass::ApplyImpl(
      new proto::ProgramDesc(*program.Proto()));
  auto block = program_pb->mutable_blocks(kRootBlockIndex);
+  block->set_idx(kRootBlockIndex);
  block->clear_vars();
  std::unordered_set<std::string> visited_vars;
  for (ir::Node* n : graph->Nodes()) {
-    if (n->NodeType() == ir::Node::Type::kVariable) {
+    if (n->IsVar()) {
      if (n->Var() && visited_vars.count(n->Var()->Name()) == 0) {
        visited_vars.insert(n->Var()->Name());
        block->add_vars()->MergeFrom(*n->Var()->Proto());

--- a/paddle/fluid/framework/ir/graph_traits.cc
+++ b/paddle/fluid/framework/ir/graph_traits.cc
@@ -66,6 +66,76 @@ NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) {
 }
 Node *NodesDFSIterator::operator->() { return stack_.top(); }
+inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
+  return node.inputs.size() == n;
+}
+NodesTSIterator::NodesTSIterator(const std::vector<Node *> &source) {
+  PADDLE_ENFORCE(!source.empty(),
+                 "Start points of topological sorting should not be empty!");
+  // CHECK all the inputs' in-degree is 0
+  for (auto *node : source) {
+    PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0));
+  }
+  std::unordered_set<Node *> visited;
+  std::unordered_set<Node *> to_visit{source.begin(), source.end()};
+  std::vector<Node *> inlink_visited;
+  while (!to_visit.empty()) {
+    std::vector<Node *> queue(to_visit.begin(), to_visit.end());
+    for (auto *p : queue) {
+      inlink_visited.clear();
+      std::copy_if(p->inputs.begin(), p->inputs.end(),
+                   std::back_inserter(inlink_visited),
+                   [&](Node *x) -> bool { return visited.count(x) != 0; });
+      if (inlink_visited.size() == p->inputs.size()) {
+        sorted_.push_back(p);
+        for (auto *_ : p->outputs) {
+          if (!visited.count(_)) {
+            to_visit.insert(_);
+          }
+        }
+        to_visit.erase(p);
+        visited.insert(p);
+      }
+    }
+  }
+}
+NodesTSIterator::NodesTSIterator(const NodesTSIterator &other)
+    : sorted_(other.sorted_), cursor_(other.cursor_) {}
+Node &NodesTSIterator::operator*() {
+  PADDLE_ENFORCE_LT(cursor_, sorted_.size());
+  return *sorted_[cursor_];
+}
+NodesTSIterator &NodesTSIterator::operator++() {
+  if (++cursor_ >= sorted_.size()) {
+    sorted_.clear();
+    cursor_ = 0;
+  }
+  return *this;
+}
+NodesTSIterator &NodesTSIterator::operator=(const NodesTSIterator &other) {
+  cursor_ = other.cursor_;
+  sorted_ = other.sorted_;
+  return *this;
+}
+bool NodesTSIterator::operator==(const NodesTSIterator &other) {
+  return sorted_ == other.sorted_ && cursor_ == other.cursor_;
+}
+Node *NodesTSIterator::operator->() {
+  PADDLE_ENFORCE_LT(cursor_, sorted_.size());
+  return sorted_[cursor_];
+}
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/graph_traits.h
+++ b/paddle/fluid/framework/ir/graph_traits.h
@@ -62,6 +62,32 @@ struct NodesDFSIterator
  std::unordered_set<Node *> visited_;
 };
+// Topological sorting iterator on nodes.
+struct NodesTSIterator
+    : public std::iterator<std::forward_iterator_tag, Node *> {
+  NodesTSIterator() = default;
+  NodesTSIterator(const std::vector<Node *> &source);
+  NodesTSIterator(NodesTSIterator &&other)
+      : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) {
+    other.cursor_ = 0;
+  }
+  NodesTSIterator(const NodesTSIterator &other);
+  Node &operator*();
+  NodesTSIterator &operator++();
+  // TODO(Superjomn) current implementation just compare the first
+  // element, need to compare the graph and all the elements in the queue and
+  // set.
+  NodesTSIterator &operator=(const NodesTSIterator &other);
+  bool operator==(const NodesTSIterator &other);
+  bool operator!=(const NodesTSIterator &other) { return !(*this == other); }
+  Node *operator->();
+ private:
+  std::vector<Node *> sorted_;
+  size_t cursor_{0};
+};
 /*
 * GraphTraits contains some graph traversal algorithms.
 *
@@ -76,6 +102,14 @@ struct GraphTraits {
                                            NodesDFSIterator());
  }
+  static iterator_range<NodesTSIterator> TS(const Graph &g) {
+    auto start_points = ExtractStartPoints(g);
+    PADDLE_ENFORCE(!start_points.empty());
+    NodesTSIterator x(start_points);
+    return iterator_range<NodesTSIterator>(NodesTSIterator(start_points),
+                                           NodesTSIterator());
+  }
 private:
  // The nodes those have no input will be treated as start points.
  static std::vector<Node *> ExtractStartPoints(const Graph &g) {

--- a/paddle/fluid/framework/ir/node.cc
+++ b/paddle/fluid/framework/ir/node.cc
@@ -17,8 +17,12 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 namespace ir {
+// msvc15 don't support constexpr in correct way.
+#if !defined(_WIN32)
 constexpr char Node::kControlDepVarName[];
-int Node::count_ = 0;
+#else
+const char Node::kControlDepVarName[] = "__control_var";
+#endif
 std::unique_ptr<Node> CreateNodeForTest(const std::string& name,
                                        Node::Type type) {

--- a/paddle/fluid/framework/ir/node.h
+++ b/paddle/fluid/framework/ir/node.h
@@ -55,7 +55,11 @@ class Node {
  }
  enum class Type { kOperation, kVariable };
+#if !defined(_WIN32)  // msvc not support constexpr correctly.
  static constexpr char kControlDepVarName[] = "__control_var";
+#else
+  static const char kControlDepVarName[];
+#endif
  Type NodeType() const { return type_; }
@@ -115,37 +119,30 @@ class Node {
  int id_;
 private:
+  // ID can only set by a Graph.
+  void SetId(int id) { id_ = id; }
  friend class Graph;
  friend std::unique_ptr<Node> CreateNodeForTest(const std::string& name,
                                                 Node::Type type);
  explicit Node(const std::string& name, Type type)
-      : name_(name),
+      : name_(name), var_desc_(nullptr), op_desc_(nullptr), type_(type) {}
-        var_desc_(nullptr),
-        op_desc_(nullptr),
-        type_(type),
-        id_(count_++) {}
  explicit Node(VarDesc* var_desc)
      : name_(var_desc->Name()),
        var_desc_(new VarDesc(*var_desc)),
        op_desc_(nullptr),
-        type_(Type::kVariable),
+        type_(Type::kVariable) {}
-        id_(count_++) {}
  explicit Node(OpDesc* op_desc)
      : name_(op_desc->Type()),
        var_desc_(nullptr),
        op_desc_(new OpDesc(*op_desc, op_desc->Block())),
-        type_(Type::kOperation),
+        type_(Type::kOperation) {}
-        id_(count_++) {}
  Node() = delete;
-  static int count_;
-  // Please don't use this API or make this public.
-  static void ResetId() { count_ = 0; }
  boost::any wrapper_;
  std::function<void(void)> wrapper_deleter_;
  std::type_index wrapper_type_ = std::type_index(typeid(void));

--- a/paddle/fluid/framework/ir/pass.h
+++ b/paddle/fluid/framework/ir/pass.h
@@ -93,6 +93,7 @@ class Pass {
 protected:
  virtual std::unique_ptr<Graph> ApplyImpl(std::unique_ptr<Graph> graph) const {
    LOG(FATAL) << "Calling virtual Pass not implemented.";
+    return graph;
  }
 private:
@@ -196,26 +197,26 @@ struct PassRegistrar : public Registrar {
                msg)
 // Register a new pass that can be applied on the IR.
-#define REGISTER_PASS(pass_type, pass_class)                          \
+#define REGISTER_PASS(pass_type, pass_class)                \
-  STATIC_ASSERT_PASS_GLOBAL_NAMESPACE(                                \
+  STATIC_ASSERT_PASS_GLOBAL_NAMESPACE(                      \
-      __reg_pass__##pass_type,                                        \
+      __reg_pass__##pass_type,                              \
-      "REGISTER_PASS must be called in global namespace");            \
+      "REGISTER_PASS must be called in global namespace");  \
-  static ::paddle::framework::ir::PassRegistrar<pass_class>           \
+  static ::paddle::framework::ir::PassRegistrar<pass_class> \
-      __pass_registrar_##pass_type##__(#pass_type);                   \
+      __pass_registrar_##pass_type##__(#pass_type);         \
-  int TouchPassRegistrar_##pass_type() {                              \
+  int TouchPassRegistrar_##pass_type() {                    \
-    __pass_registrar_##pass_type##__.Touch();                         \
+    __pass_registrar_##pass_type##__.Touch();               \
-    return 0;                                                         \
+    return 0;                                               \
-  }                                                                   \
+  }                                                         \
-  static ::paddle::framework::ir::PassRegistrar<pass_class>           \
+  static ::paddle::framework::ir::PassRegistrar<pass_class> \
-      &__pass_tmp_registrar_##pass_type##__ __attribute__((unused)) = \
+      &__pass_tmp_registrar_##pass_type##__ UNUSED =        \
          __pass_registrar_##pass_type##__
-#define USE_PASS(pass_type)                                           \
+#define USE_PASS(pass_type)                           \
-  STATIC_ASSERT_PASS_GLOBAL_NAMESPACE(                                \
+  STATIC_ASSERT_PASS_GLOBAL_NAMESPACE(                \
-      __use_pass_itself_##pass_type,                                  \
+      __use_pass_itself_##pass_type,                  \
-      "USE_PASS must be called in global namespace");                 \
+      "USE_PASS must be called in global namespace"); \
-  extern int TouchPassRegistrar_##pass_type();                        \
+  extern int TouchPassRegistrar_##pass_type();        \
-  static int use_pass_itself_##pass_type##_ __attribute__((unused)) = \
+  static int use_pass_itself_##pass_type##_ UNUSED =  \
      TouchPassRegistrar_##pass_type()
 }  // namespace ir

--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -57,60 +57,58 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
  }
 }
-void NaiveExecutor::Prepare(Scope *parent_scope,
+void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
-                            const ProgramDesc &program_desc, int block_id,
+                            int block_id, bool with_feed_fetch_ops) {
-                            bool with_feed_fetch_ops) {
+  if (!scope) {
-  if (!parent_scope) {
    scope_ = new framework::Scope;
  } else {
-    scope_ = &parent_scope->NewScope();
+    scope_ = scope;
  }
-  CreateVariables(program_desc, scope_, block_id);
+  VLOG(3) << "NaiveExecutor init with scope " << scope;
  CreateOps(program_desc, block_id, with_feed_fetch_ops);
 }
 void NaiveExecutor::Run() {
  for (auto &op : ops_) {
-    VLOG(40) << "run " << op->Type();
+    VLOG(3) << std::this_thread::get_id() << " run " << op->Type()
+            << " on scope " << scope_;
    op->Run(*scope_, place_);
  }
 }
-void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
+void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id,
-                                    int block_id) {
+                                    bool persistable, Scope *scope) {
-  PADDLE_ENFORCE(scope);
+  PADDLE_ENFORCE_NOT_NULL(scope);
  auto &global_block = desc.Block(block_id);
-  const Scope *ancestor_scope = scope;
+  const auto *anc = scope;
-  while (ancestor_scope->parent()) {
+  PADDLE_ENFORCE(anc->parent() != anc);
-    ancestor_scope = ancestor_scope->parent();
+  while (anc->parent()) {
+    anc = anc->parent();
  }
-  if (ancestor_scope != scope) {
+  for (auto &var : global_block.AllVars()) {
-    for (auto &var : global_block.AllVars()) {
+    if (var->Name() == framework::kEmptyVarName) {
-      if (var->Name() == framework::kEmptyVarName) {
+      continue;
-        continue;
+    }
-      }
-      // Create persistable vars in ancestor scope.
+    if (persistable == var->Persistable()) {
-      if (var->Persistable()) {
+      if (persistable) {
-        auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
+        if (!anc->FindVar(var->Name())) {
-        InitializeVariable(ptr, var->GetType());
+          auto *ptr = const_cast<Scope *>(anc)->Var(var->Name());
-        VLOG(30) << "Create Variable " << var->Name()
+          VLOG(3) << scope << " Create persistable variable " << var->Name()
-                 << " global, which pointer is " << ptr;
+                  << ", which pointer is " << ptr;
-      } else {  // Create temporary variables in local scope.
+          InitializeVariable(ptr, var->GetType());
-        auto *ptr = scope->Var(var->Name());
+        }
+      } else {
+        auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
+        VLOG(3) << scope << " Create variable " << var->Name()
+                << ", which pointer is " << ptr;
        InitializeVariable(ptr, var->GetType());
-        VLOG(30) << "Create Variable " << var->Name()
-                 << " locally, which pointer is " << ptr;
      }
    }
-  } else {
-    for (auto &var : global_block.AllVars()) {
-      auto *ptr = scope->Var(var->Name());
-      InitializeVariable(ptr, var->GetType());
-      VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
-               << ptr;
-    }
  }
 }

--- a/paddle/fluid/framework/naive_executor.h
+++ b/paddle/fluid/framework/naive_executor.h
@@ -35,8 +35,14 @@ class NaiveExecutor {
  // Create child scope.
  // Create variables.
  // @with_feed_fetch_ops: whether to work with the feed and fetch operators.
-  void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
+  void Prepare(Scope* scope, const ProgramDesc& program_desc, int block_id,
-               int block_id, bool with_feed_fetch_ops);
+               bool with_feed_fetch_ops);
+  // Create variables before head.
+  // Create parameters if persistable is ture, or create the temporary variables
+  // instead.
+  void CreateVariables(const ProgramDesc& desc, int block_id, bool persistable,
+                       Scope* scope);
  // Run all the operators.
  void Run();
@@ -49,8 +55,6 @@ class NaiveExecutor {
  void CleanFeedFetchOps();
 protected:
-  void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
  void CreateOps(const ProgramDesc& desc, int block_id,
                 bool with_feed_fetch_ops);

--- a/paddle/fluid/framework/naive_executor_test.cc
+++ b/paddle/fluid/framework/naive_executor_test.cc
@@ -39,7 +39,7 @@ TEST(NaiveExecutor, Basic) {
  auto place = platform::CPUPlace();
  NaiveExecutor exe(place);
-  exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
+  exe.Prepare(nullptr, program, 0, false);
  auto* a_tensor = exe.FindTensor("a");
  auto* b_tensor = exe.FindTensor("b");
  auto* c_tensor = exe.FindTensor("c");

--- a/paddle/fluid/inference/analysis/graph_traits.cc
+++ b/paddle/fluid/inference/analysis/graph_traits.cc
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/inference/analysis/graph_traits.h"
+#ifdef PADDLE_WITH_NGRAPH
+#include <algorithm>
+#include <functional>
+#include "paddle/fluid/framework/ngraph_bridge.h"
+#include "ngraph/ngraph.hpp"
+namespace paddle {
+namespace framework {
+std::map<std::string,
+         std::function<void(const std::shared_ptr<OperatorBase>&,
+                            std::shared_ptr<std::unordered_map<
+                                std::string, std::shared_ptr<ngraph::Node>>>)>>
+    NgraphBridge::NG_NODE_MAP = {};
+void NgraphBridge::build_graph(const std::shared_ptr<OperatorBase>& op) {
+  auto& op_type = op->Type();
+  NG_NODE_MAP[op_type](op, ngb_node_map);
+}
+}  // namespace framework
+}  // namespace paddle
+#endif
--- a/paddle/fluid/inference/analysis/node_tester.cc
+++ b/paddle/fluid/inference/analysis/node_tester.cc
@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
+    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
@@ -12,44 +12,47 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/fluid/inference/analysis/node.h"
+#pragma once
-#include <gtest/gtest.h>
+#ifdef PADDLE_WITH_NGRAPH
+#include <algorithm>
+#include <map>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "ngraph/ngraph.hpp"
 namespace paddle {
-namespace inference {
+namespace framework {
-namespace analysis {
+class NgraphBridge {
-TEST(NodeAttr, bool) {
+ public:
-  AnyAttr x;
+  static std::map<
-  x.Bool() = true;
+      std::string,
-  ASSERT_EQ(x.Bool(), true);
+      std::function<void(const std::shared_ptr<OperatorBase>&,
-}
+                         std::shared_ptr<std::unordered_map<
+                             std::string, std::shared_ptr<ngraph::Node>>>)>>
-TEST(NodeAttr, int32) {
+      NG_NODE_MAP;
-  AnyAttr x;
-  x.Int32() = 32;
+  explicit NgraphBridge(
-  ASSERT_EQ(x.Int32(), 32);
+      std::shared_ptr<
-}
+          std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
+          var_node_map)
-TEST(NodeAttr, string) {
+      : ngb_node_map(var_node_map) {}
-  AnyAttr x;
-  x.String() = "Hello";
+  void build_graph(const std::shared_ptr<OperatorBase>& op);
-  ASSERT_EQ(x.String(), "Hello");
-}
+ private:
+  std::shared_ptr<
-TEST(Node, Attr) {
+      std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
-  // Node is an abstract class, use Value instead for they share the same Attr
+      ngb_node_map;
-  // logic.
+};
-  NodeMap nodes;
-  auto* node = nodes.Create(Node::Type::kValue);
+}  // namespace framework
-  node->attr("v0").Int32() = 2008;
-  ASSERT_EQ(node->attr("v0").Int32(), 2008);
-  node->attr("str").String() = "hello world";
-  ASSERT_EQ(node->attr("str").String(), "hello world");
-}
-}  // namespace analysis
-}  // namespace inference
 }  // namespace paddle
+#endif
--- a/paddle/fluid/framework/ngraph_operator.cc
+++ b/paddle/fluid/framework/ngraph_operator.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef PADDLE_WITH_NGRAPH
+#include <glog/logging.h>
+#include <algorithm>
+#include <map>
+#include "paddle/fluid/framework/feed_fetch_type.h"
+#include "paddle/fluid/framework/ngraph_operator.h"
+#include "paddle/fluid/framework/shape_inference.h"
+#include "paddle/fluid/framework/var_desc.h"
+#include "paddle/fluid/framework/var_type.h"
+namespace paddle {
+namespace framework {
+static std::map<proto::VarType::Type, ngraph::element::Type> pd2ng_type_map = {
+    {proto::VarType::FP32, ngraph::element::f32},
+    {proto::VarType::FP64, ngraph::element::f64},
+    {proto::VarType::INT32, ngraph::element::i32},
+    {proto::VarType::INT64, ngraph::element::i64},
+    {proto::VarType::BOOL, ngraph::element::boolean},
+};
+typedef enum {                /* nGraph support state on ops          */
+               FULL_TRAIN,    /* Support full ops for train           */
+               PARTIAL_TRAIN, /* Support partial ops for train        */
+               FULL_TEST,     /* Support full list of ops for test    */
+               PARTIAL_TEST   /* Support partial list of ops for test */
+} op_state;
+class NgraphOperator {
+ public:
+  explicit NgraphOperator(const Scope& scope, const platform::Place& place,
+                          const std::vector<std::shared_ptr<OperatorBase>>& ops,
+                          const std::unordered_map<
+                              std::string, ngraph::element::Type>& var_type_map,
+                          const std::unordered_set<std::string>& persist,
+                          const std::unordered_set<std::string>& fetches,
+                          const std::unordered_set<std::string>& post_op_inputs,
+                          op_state ng_op_state)
+      : scope_(scope),
+        place_(place),
+        fused_ops_(ops),
+        var_type_map_(var_type_map),
+        persistables_(persist),
+        fetches_(fetches),
+        post_op_inputs_(post_op_inputs),
+        ng_op_state_(ng_op_state) {}
+  void Run(const Scope& scope, const platform::Place& place) const;
+ private:
+  static std::unordered_map<std::string, std::shared_ptr<ngraph::Function>>
+      func_cache;
+  const Scope& scope_;
+  const platform::Place& place_;
+  std::vector<std::shared_ptr<OperatorBase>> fused_ops_;
+  std::unordered_map<std::string, ngraph::element::Type> var_type_map_;
+  std::unordered_set<std::string> persistables_;
+  std::unordered_set<std::string> fetches_;
+  std::unordered_set<std::string> post_op_inputs_;
+  op_state ng_op_state_;
+};
+std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
+FusedOperator::FusedOpIntervals(
+    std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops) {
+  std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
+      intervals;
+  if (ops->empty()) {
+    return intervals;
+  }
+  size_t size = ops->size();
+  size_t left = 0;
+  while (left < size && ops.at(left)->Type() != kFeedOpType) {
+    ++left;
+  }
+  if (left == size) {
+    return intervals;
+  }
+  while (left < size && ops->at(left)->Type() == kFeedOpType) {
+    ++left;
+  }
+  size_t right = left;
+  while (right < size && ops->at(right)->Type() != kFetchOpType) {
+    ++right;
+  }
+  if (right == size) {
+    return intervals;
+  }
+  if (left >= right) return intervals;
+  // (left, right - 1) represents indices between feed and fetch
+  size_t pivot = left;
+  while (pivot < right) {
+    auto op_type = ops->at(pivot)->Type();
+    if (paddle::framework::NgraphBridge::NG_NODE_MAP.find(op_type) ==
+        paddle::framework::NgraphBridge::NG_NODE_MAP.end()) {
+      ++pivot;
+    } else {
+      size_t start = pivot, end = start;
+      while (pivot < right &&
+             (paddle::framework::NgraphBridge::NG_NODE_MAP.find(
+                  ops.at(pivot)->Type()) !=
+              paddle::framework::NgraphBridge::NG_NODE_MAP.end())) {
+        ++pivot;
+        ++end;
+      }
+      std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>
+          interval = {ops->begin() + start, ops->begin() + end};
+      intervals.push_back(interval);
+    }
+  }  // end while
+  return intervals;
+}
+FusedOperator::FusedOperator(
+    const ProgramDesc& prog, size_t block_id,
+    std::vector<std::unique_ptr<OperatorBase>>::iterator start,
+    std::vector<std::unique_ptr<OperatorBase>>::iterator end,
+    const std::string& type, const VariableNameMap& inputs,
+    const VariableNameMap& outputs, const AttributeMap& attrs)
+    : OperatorBase(type, inputs, outputs, attrs), pdesc(prog), block(block_id) {
+  for (std::vector<std::unique_ptr<OperatorBase>>::iterator it = start;
+       it != end; ++it) {
+    fused_ops_.push_back(std::move(*it));
+  }
+  for (std::vector<std::unique_ptr<OperatorBase>>::iterator it = end;
+       (*it)->Type() != kFetchOpType; ++it) {
+    for (auto& var_name_item : (*it)->Inputs()) {
+      for (auto& var_name : var_name_item.second) {
+        post_op_inputs_.insert(var_name);
+      }
+    }
+  }
+  if ((*(start - 1))->Type() == kFeedOpType && (*end)->Type() == kFetchOpType) {
+    is_complete = true;
+  }
+  Process();
+}
+void FusedOperator::Process() {
+  auto& bdesc = pdesc_.Block(block_);
+  for (auto& var : bdesc.AllVars()) {
+    if (!(var->GetType() == proto::VarType::SELECTED_ROWS ||
+          var->GetType() == proto::VarType::LOD_TENSOR ||
+          var->GetType() == proto::VarType::LOD_TENSOR_ARRAY)) {
+      continue;
+    }
+    auto var_name = var->Name();
+    if (var->Name() == framework::kEmptyVarName) {
+      continue;
+    }
+    if (var_name != "fetch" && var_name != "feed") {
+      auto pd_type = var->GetDataType();
+      if (pd2ng_type_map.find(pd_type) == pd2ng_type_map.end()) {
+        PADDLE_THROW("Data type of var %s not found in pd2ng_type_map",
+                     var_name);
+      }
+      var_type_map_[var_name] = pd2ng_type_map[pd_type];
+    }
+    if (var->Persistable()) {
+      persistables_.insert(var->Name());
+    }
+  }
+  for (auto* op : bdesc.AllOps()) {
+    if (op->Type() == kFetchOpType) {
+      std::string fetch_target_name = op->Input("X")[0];
+      fetches_.insert(fetch_target_name);
+    }
+  }
+}
+void FusedOperator::RunImpl(const Scope& scope,
+                            const platform::Place& place) const {
+  op_state ng_op_state = PARTIAL_TEST;
+  auto& bdesc = pdesc_.Block(block_);
+  for (auto* op : bdesc.AllOps()) {
+    if (op->Type().find("_grad") != std::string::npos) {
+      ng_op_state = PARTIAL_TRAIN;
+      break;
+    }
+  }
+  if (is_full) {
+    ng_op_state = ng_op_state == PARTIAL_TEST ? FULL_TEST : FULL_TRAIN;
+  }
+  NgraphOperator ngraph_op(scope, place, fused_ops_, var_type_map_,
+                           persistables_, fetches_, post_op_inputs_,
+                           ng_op_state);
+  ngraph_op.Run(scope, place);
+}
+}  // namespace framework
+}  // namespace paddle
+#endif
--- a/paddle/fluid/framework/ngraph_operator.h
+++ b/paddle/fluid/framework/ngraph_operator.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#ifdef PADDLE_WITH_NGRAPH
+#include <algorithm>
+#include <atomic>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "paddle/fluid/framework/attribute.h"
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/ngraph_bridge.h"
+#include "paddle/fluid/framework/op_info.h"
+#include "paddle/fluid/framework/op_kernel_type.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/variant.h"
+#include "ngraph/ngraph.hpp"
+namespace paddle {
+namespace framework {
+class FusedOperator : public OperatorBase {
+ public:
+  static std::vector<
+      std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
+  FusedOpIntervals(
+      std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops);
+  explicit FusedOperator(
+      const ProgramDesc& prog, size_t block_id,
+      std::vector<std::unique_ptr<OperatorBase>>::iterator start,
+      std::vector<std::unique_ptr<OperatorBase>>::iterator end,
+      const std::string& type = "fused_op", const VariableNameMap& inputs = {},
+      const VariableNameMap& outputs = {}, const AttributeMap& attrs = {});
+  void RunImpl(const Scope& scope, const platform::Place& place) const final;
+ private:
+  const ProgramDesc pdesc_;
+  size_t block_;
+  std::vector<std::shared_ptr<OperatorBase>> fused_ops_;
+  std::unordered_map<std::string, ngraph::element::Type> var_type_map_;
+  std::unordered_set<std::string> persistables_;
+  std::unordered_set<std::string> fetches_;
+  std::unordered_set<std::string> post_op_inputs_;
+  bool is_full_ = false;
+  void Process();
+};
+}  // namespace framework
+}  // namespace paddle
+#endif
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -150,14 +150,17 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
 #endif
  }
-  // The profile has a process-wide mutex, results in serious performance issue
+// The profile has a process-wide mutex, results in serious performance issue
-  // in concurrency scenerio. Here use an `if` to fix this issue.
+// in concurrency scenerio. Here use an `if` to fix this issue.
-  // Please not remove the `if`, ask @Superjomn if there are any concern.
+// Please not remove the `if`, ask @Superjomn if there are any concern.
+#ifndef _WIN32
  if (platform::IsProfileEnabled()) {
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    platform::RecordEvent record_event(Type(), pool.Get(place));
    RunImpl(scope, place);
-  } else {
+  } else
+#endif
+  {
    RunImpl(scope, place);
  }
  VLOG(30) << place << " " << DebugStringEx(&scope);

--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -15,7 +15,9 @@ limitations under the License. */
 #include "paddle/fluid/framework/scope.h"
 #include <memory>  // for unique_ptr
+#include <queue>
 #include <set>
+#include <unordered_set>
 #include "glog/logging.h"
 #include "paddle/fluid/framework/threadpool.h"
 #include "paddle/fluid/string/printf.h"
@@ -36,6 +38,16 @@ DEFINE_double(
    "Memory size threshold (GB) when the garbage collector clear tensors."
    "Disabled when this value is less than 0");
+// When in inference scenario, the scopes will not be written by two threads in
+// a mean time, but a scope may be read by multiple threads concurrently, and
+// the mutex will cause serious performance issue.
+// So the mutex is disabled when `ON_INFER`.
+#ifdef ON_INFER
+#define SCOPE_LOCK_GUARD
+#else
+#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
+#endif
 namespace paddle {
 namespace framework {
@@ -49,18 +61,18 @@ int64_t GetEagerDeletionThreshold() {
 Scope::~Scope() { DropKids(); }
 Scope& Scope::NewScope() const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  kids_.push_back(new Scope(this));
  return *kids_.back();
 }
 Variable* Scope::Var(const std::string& name) {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  return VarInternal(name);
 }
 Variable* Scope::Var(std::string* name) {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  if (name != nullptr) {
    *name = new_name;
@@ -69,34 +81,34 @@ Variable* Scope::Var(std::string* name) {
 }
 Variable* Scope::FindVar(const std::string& name) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  return FindVarInternal(name);
 }
 Variable* Scope::FindLocalVar(const std::string& name) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  return FindVarLocally(name);
 }
 const Scope* Scope::FindScope(const Variable* var) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  return FindScopeInternal(var);
 }
 void Scope::DropKids() {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  for (Scope* s : kids_) delete s;
  kids_.clear();
 }
 bool Scope::HasKid(const Scope* scope) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
  return it != this->kids_.end();
 }
 std::vector<std::string> Scope::LocalVarNames() const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  std::vector<std::string> known_vars;
  known_vars.reserve(this->vars_.size());
  for (auto& p : vars_) {
@@ -106,9 +118,10 @@ std::vector<std::string> Scope::LocalVarNames() const {
 }
 void Scope::DeleteScope(Scope* scope) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
-  PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
+  PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
+                 this, scope);
  this->kids_.erase(it);
  // When making memory benchmark on Fluid, we have to delete scope sync.
  if (FLAGS_benchmark || FLAGS_eager_delete_scope) {
@@ -119,7 +132,7 @@ void Scope::DeleteScope(Scope* scope) const {
 }
 void Scope::EraseVars(const std::vector<std::string>& var_names) {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  std::set<std::string> var_set(var_names.begin(), var_names.end());
  for (auto it = vars_.begin(); it != vars_.end();) {
    if (var_set.find(it->first) != var_set.end()) {
@@ -132,12 +145,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
 void Scope::Rename(const std::string& origin_name,
                   const std::string& new_name) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  RenameInternal(origin_name, new_name);
 }
 std::string Scope::Rename(const std::string& origin_name) const {
-  std::lock_guard<std::mutex> lock(mutex_);
+  SCOPE_LOCK_GUARD
  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  RenameInternal(origin_name, new_name);
  return new_name;
@@ -189,5 +202,46 @@ Variable* Scope::FindVarLocally(const std::string& name) const {
  return nullptr;
 }
+std::string GenScopeTreeDebugInfo(Scope* root) {
+  std::stringstream os;
+  if (!root) return "";
+  // level traversal
+  std::queue<Scope*> queue;
+  queue.push(root);
+  std::vector<Scope*> scopes;
+  while (!queue.empty()) {
+    auto* end = queue.back();
+    Scope* q = nullptr;
+    while (q != end) {
+      q = queue.front();
+      queue.pop();
+      os << q << " ";
+      scopes.push_back(q);
+      for (auto* c : q->kids()) {
+        queue.push(c);
+      }
+    }
+    // end of a level
+    os << "\n------------------------------------------\n";
+  }
+  os << "\nDetails:\n\n";
+  for (Scope* q : scopes) {
+    os << "====\n";
+    os << q << ":\n";
+    for (auto& var : q->LocalVarNames()) {
+      os << "  - " << var << "\n";
+    }
+  }
+  return os.str();
+}
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/scope.h
+++ b/paddle/fluid/framework/scope.h
@@ -78,11 +78,11 @@ class Scope {
  /// Drop all kids scopes belonged to this scope.
  void DropKids();
-  std::list<Scope*>& kids() const { return kids_; }
  /// Find if a scope exists in the kid scopes
  bool HasKid(const Scope* scope) const;
+  const std::list<Scope*>& kids() const { return kids_; }
  // enumerate all the variables current contains.
  std::vector<std::string> LocalVarNames() const;
@@ -118,12 +118,17 @@ class Scope {
  // Scope in `kids_` are owned by this class.
  mutable std::list<Scope*> kids_;
-  Scope const* parent_{nullptr};
+  const Scope* parent_{nullptr};
  DISABLE_COPY_AND_ASSIGN(Scope);
 private:
  mutable std::mutex mutex_;
 };
+// Generate some debug string about the inherience structure of scope, quite
+// naive.
+std::string GenScopeTreeDebugInfo(Scope*);
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/selected_rows.cc
+++ b/paddle/fluid/framework/selected_rows.cc
@@ -63,6 +63,26 @@ struct TensorCopyVisitor {
  int64_t size_;
 };
+struct TensorFillVisitor {
+  TensorFillVisitor(framework::Tensor* dst, int64_t dst_offset, int64_t size,
+                    float value)
+      : dst_(dst), dst_offset_(dst_offset), size_(size) {}
+  template <typename T>
+  void apply() const {
+    // TODO(qiao): support other place
+    platform::CPUPlace cpu;
+    auto* tensor_data = dst_->mutable_data<T>(cpu);
+    auto* start = tensor_data + dst_offset_;
+    auto* end = start + size_;
+    std::fill(start, end, static_cast<T>(0.0));
+  }
+  framework::Tensor* dst_;
+  int64_t dst_offset_;
+  int64_t size_;
+};
 void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows,
                       const platform::DeviceContext& dev_ctx) {
  {  // the 1st field, uint32_t version
@@ -120,7 +140,17 @@ bool SelectedRows::HasKey(int64_t key) const {
                                                                   : true;
 }
-int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) {
+int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
+                                     bool is_test) {
+  if (is_test) {
+    auto iter = id_to_index_.find(key);
+    if (iter == id_to_index_.end()) {
+      return -1;
+    } else {
+      return iter->second;
+    }
+  }
  rwlock_->RDLock();
  auto iter = id_to_index_.find(key);
  if (iter == id_to_index_.end()) {
@@ -172,7 +202,7 @@ void SelectedRows::SyncIndex() {
 }
 void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
-                       bool auto_grown) {
+                       bool auto_grown, bool is_test) {
  PADDLE_ENFORCE(value->IsInitialized(),
                 "The value tensor should be initialized.");
  if (ids.numel() == 0) {
@@ -183,11 +213,19 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
                      "output tensor should have the same shape with table "
                      "except the dims[0].");
    for (int i = 0; i < ids.numel(); ++i) {
-      int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown);
+      auto id = ids.data<int64_t>()[i];
-      framework::VisitDataType(
+      int64_t index = AutoGrownIndex(id, auto_grown, is_test);
-          framework::ToDataType(value_->type()),
+      if (index < 0) {
-          TensorCopyVisitor(value, i * value_width, *value_.get(),
+        VLOG(5) << "id " << id << " not in the table, return 0";
-                            index * value_width, value_width));
+        framework::VisitDataType(
+            framework::ToDataType(value_->type()),
+            TensorFillVisitor(value, i * value_width, value_width, 0.0));
+      } else {
+        framework::VisitDataType(
+            framework::ToDataType(value_->type()),
+            TensorCopyVisitor(value, i * value_width, *value_.get(),
+                              index * value_width, value_width));
+      }
    }
  }
 }

--- a/paddle/fluid/framework/selected_rows.h
+++ b/paddle/fluid/framework/selected_rows.h
@@ -105,7 +105,7 @@ class SelectedRows {
   * the value
   */
  void Get(const framework::Tensor& ids, framework::Tensor* value,
-           bool auto_grown = false);
+           bool auto_grown = false, bool is_test = false);
  /*
   * @brief Get the index of the key from id_to_index_ map. If the key not
@@ -118,7 +118,7 @@ class SelectedRows {
   *
   * @return index of the key.
   */
-  int64_t AutoGrownIndex(int64_t key, bool auto_grown);
+  int64_t AutoGrownIndex(int64_t key, bool auto_grown, bool is_test = false);
  void SyncIndex();

--- a/paddle/fluid/framework/selected_rows_test.cc
+++ b/paddle/fluid/framework/selected_rows_test.cc
@@ -84,10 +84,14 @@ TEST(SelectedRows, SparseTable) {
      data[i * embedding_width + j] = static_cast<float>(i);
    }
  }
-  ASSERT_EQ(table.AutoGrownIndex(10, true), 0);
+  ASSERT_EQ(table.AutoGrownIndex(10, true, false), 0);
-  ASSERT_EQ(table.AutoGrownIndex(8, true), 1);
+  ASSERT_EQ(table.AutoGrownIndex(8, true, false), 1);
-  ASSERT_EQ(table.AutoGrownIndex(8, true), 1);
+  ASSERT_EQ(table.AutoGrownIndex(8, true, false), 1);
-  ASSERT_EQ(table.AutoGrownIndex(6, true), 2);
+  ASSERT_EQ(table.AutoGrownIndex(6, true, false), 2);
+  for (int64_t i = 11; i < 20; i++) {
+    ASSERT_EQ(table.AutoGrownIndex(i, true, true), -1);
+    ASSERT_TRUE(!table.HasKey(i));
+  }
  ASSERT_TRUE(table.HasKey(10));
  ASSERT_TRUE(table.HasKey(8));
  ASSERT_TRUE(table.HasKey(6));

--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -16,9 +16,21 @@ cc_library(paddle_fluid_api
    DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) 
 get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
+get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
+get_property(fluid_third_partys GLOBAL PROPERTY FLUID_THRID_PARTYS)
+if (WIN32)
+list(APPEND fluid_third_partys gflags glog protobuf cblas)
+endif(WIN32)
 # paddle_fluid_origin exclude inference api interface
-cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
+if(WIN32)
+  sep_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
+  if(WITH_GPU AND NOT WITH_DSO)
+    target_link_libraries(paddle_fluid_origin ${cuda_modules})
+  endif(WITH_GPU AND NOT WITH_DSO)
+else(WIN32)
+  cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
+endif(WIN32)
 add_subdirectory(api)
@@ -27,13 +39,17 @@ set(SHARED_INFERENCE_SRCS
    io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc)
-if (WITH_GPU AND TENSORRT_FOUND)
-  set(STATIC_INFERENCE_APIS ${STATIC_INFERENCE_APIS} paddle_inference_tensorrt_subgraph_engine)
-  set(SHARED_INFERENCE_SRCS ${SHARED_INFERENCE_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/api/api_tensorrt_subgraph_engine.cc)
-endif()
-# Create static library
+if(WIN32)
-cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
+  sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array
+    analysis_config paddle_pass_builder)
+  if(WITH_GPU AND NOT WITH_DSO)
+    target_link_libraries(paddle_fluid ${cuda_modules})
+  endif(WITH_GPU AND NOT WITH_DSO)
+else(WIN32)
+  cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array
+    analysis_config paddle_pass_builder)
+endif(WIN32)
 if(NOT APPLE)
  # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
@@ -42,11 +58,20 @@ if(NOT APPLE)
 endif()
 # Create shared library
-cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
+if(WIN32)
-    DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
+  sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
+          DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder)
+  target_link_libraries(paddle_fluid_shared shlwapi)
+  if(WITH_GPU AND NOT WITH_DSO)
+    target_link_libraries(paddle_fluid_origin ${cuda_modules})
+  endif(WITH_GPU AND NOT WITH_DSO)
+else(WIN32)
+  cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
+      DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder)
+endif()
 set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
-if(NOT APPLE)
+if(NOT APPLE AND NOT WIN32)
  # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
  set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.map")
  set_target_properties(paddle_fluid_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")

--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
-cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass)
+unset(analysis_deps CACHE)
-set(analysis_deps
+set(analysis_deps # analysis_deps can be extended accross the project
-        framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log)
+        framework_proto proto_desc graph pass paddle_fluid_api executor pretty_log
+        ir_pass_manager
+        CACHE INTERNAL "")
-cc_library(analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
+add_subdirectory(ir_passes)
+add_subdirectory(passes)
+cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES})
+cc_library(argument SRCS argument.cc DEPS scope proto_desc)
+cc_library(analysis_pass SRCS analysis_pass.cc DEPS proto_desc)
+cc_library(analysis SRCS
  analyzer.cc
  helper.cc
-  # passes
+  analysis_pass
-  analysis_pass.cc
+  DEPS ${analysis_deps}
-  fluid_to_data_flow_graph_pass.cc
+  )
-  data_flow_graph_to_fluid_pass.cc
-  dfg_graphviz_draw_pass.cc
-  tensorrt_subgraph_pass.cc
-  tensorrt_subgraph_node_mark_pass.cc
-  fluid_to_ir_pass.cc
-  model_store_pass.cc
-  DEPS ${analysis_deps})
-cc_test(test_node SRCS node_tester.cc DEPS analysis)
 cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
-cc_binary(inference_analyzer SRCS analyzer_main.cc DEPS analysis paddle_fluid)
 function(inference_analysis_test TARGET)
  if(WITH_TESTING)
@@ -34,13 +35,3 @@ function(inference_analysis_test TARGET)
 endfunction(inference_analysis_test)
 inference_analysis_test(test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS paddle_inference_api)
-inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
-inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
-inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc)
-inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
-inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
-inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
-inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc)
-inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc)
-inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc)
-inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)
--- a/paddle/fluid/inference/analysis/analysis_pass.h
+++ b/paddle/fluid/inference/analysis/analysis_pass.h
@@ -19,42 +19,36 @@ limitations under the License. */
 #include <string>
 #include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/inference/analysis/argument.h"
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
 #include "paddle/fluid/inference/analysis/helper.h"
-#include "paddle/fluid/inference/analysis/node.h"
 namespace paddle {
 namespace inference {
 namespace analysis {
+/*
+ * AnalysisPass is a pass used to control the IR passes.
+ */
 class AnalysisPass {
 public:
  AnalysisPass() = default;
  virtual ~AnalysisPass() = default;
-  // Mutable Pass.
-  virtual bool Initialize(Argument *argument) { return false; }
-  // Readonly Pass.
-  virtual bool Initialize(const Argument &argument) { return false; }
-  // Virtual method overriden by subclasses to do any necessary clean up after
+  // Run on a single Graph.
-  // all passes have run.
+  void Run(Argument* argument) { RunImpl(argument); }
-  virtual bool Finalize() { return false; }
-  // Create a debugger Pass that draw the DFG by graphviz toolkit.
-  virtual AnalysisPass *CreateGraphvizDebugerPass() const { return nullptr; }
-  // Run on a single DataFlowGraph.
-  virtual void Run(DataFlowGraph *x) = 0;
  // Human-readable short representation.
  virtual std::string repr() const = 0;
  // Human-readable long description.
  virtual std::string description() const { return "No DOC"; }
-};
-// GraphPass processes on any GraphType.
+ protected:
-class DataFlowGraphPass : public AnalysisPass {};
+  // User should implement these.
+  virtual void RunImpl(Argument* argument) = 0;
+  Argument* argument_{nullptr};
+};
 }  // namespace analysis
 }  // namespace inference

--- a/paddle/fluid/inference/analysis/analyzer.cc
+++ b/paddle/fluid/inference/analysis/analyzer.cc
@@ -15,138 +15,23 @@
 #include "paddle/fluid/inference/analysis/analyzer.h"
 #include <string>
 #include <vector>
+#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h"
-#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
+#include "paddle/fluid/inference/analysis/passes/passes.h"
-#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
-#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
-#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h"
-#include "paddle/fluid/inference/analysis/model_store_pass.h"
-#include "paddle/fluid/inference/analysis/pass_manager.h"
-#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
-#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
-DEFINE_bool(IA_enable_tensorrt_subgraph_engine, false,
-            "Enable subgraph to TensorRT engine for acceleration");
-DEFINE_bool(IA_enable_ir, false, "Turn on IR support");
-DEFINE_string(IA_graphviz_log_root, "./",
-              "Graphviz debuger for data flow graphs.");
-DEFINE_string(IA_output_storage_path, "", "optimized model output path");
 namespace paddle {
 namespace inference {
 namespace analysis {
-class DfgPassManagerImpl final : public DfgPassManager {
+Analyzer::Analyzer() {}
- public:
-  DfgPassManagerImpl() {
-    // TODO(Superjomn) set the key with pass reprs.
-    if (!FLAGS_IA_enable_ir) {
-      AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass);
-    } else {
-      AddPass("fluid-to-ir-pass", new FluidToIrPass);
-    }
-    TryAddTensorRtPass();
-    AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass);
-    if (!FLAGS_IA_output_storage_path.empty()) {
-      AddPass("model-store-pass", new ModelStorePass);
-    }
-  }
-  std::string repr() const override { return "dfg-pass-manager"; }
+void Analyzer::Run(Argument *argument) { RunIrAnalysis(argument); }
-  std::string description() const override { return "DFG pass manager."; }
- private:
+void Analyzer::RunIrAnalysis(Argument *argument) {
-  void AddPass(const std::string& name, AnalysisPass* pass) {
+  std::vector<std::string> passes({"ir_analysis_compose_pass"});
-    VLOG(30) << "Adding pass " << name;
-    Register(name, pass);
-    AddGraphvizDebugerPass(pass);
-  }
-  void TryAddTensorRtPass() {
+  for (auto &pass : passes) {
-    if (FLAGS_IA_enable_tensorrt_subgraph_engine) {
+    PassRegistry::Global().Retreive(pass)->Run(argument);
-      auto trt_teller = [&](const Node* node) {
-        std::unordered_set<std::string> teller_set(
-            {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
-             "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
-             "elementwise_add", "dropout"});
-        if (!node->IsFunction()) return false;
-        const auto* func = static_cast<const Function*>(node);
-        if (teller_set.count(func->func_type())) {
-          return true;
-        } else {
-          return false;
-        }
-      };
-      AddPass("tensorrt-subgraph-marker",
-              new TensorRTSubgraphNodeMarkPass(trt_teller));
-      AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller));
-    }
-  }
-  // Add the graphviz debuger pass if the parent pass has one.
-  void AddGraphvizDebugerPass(AnalysisPass* pass) {
-    auto* debuger_pass = pass->CreateGraphvizDebugerPass();
-    if (debuger_pass) {
-      Register(debuger_pass->repr(), debuger_pass);
-    }
  }
-};
-Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
-void Analyzer::Run(Argument* argument) {
-  std::vector<std::string> passes;
-  passes.push_back("graph_viz_pass");  // add graphviz for debug.
-#ifdef PADDLE_WITH_MKLDNN
-  if (use_mkldnn_) {
-    VLOG(30) << "Adding MKL-DNN placement pass";
-    passes.push_back("mkldnn_placement_pass");
-  }
-#endif
-  // infer_clean_graph_pass should be the first default pass
-  // after mkldnn_placement_pass.
-  passes.push_back("infer_clean_graph_pass");
-  passes.push_back("graph_viz_pass");  // add graphviz for debug.
-  for (auto& pass : ir_passes_) {
-    // skip mkldnn pass when use_mkldnn_ = false;
-    bool skip_pass = (!use_mkldnn_) && pass.find("mkldnn") != std::string::npos;
-    if (!disabled_ir_passes_.count(pass) && !skip_pass) {
-      passes.push_back(pass);
-      passes.push_back("graph_viz_pass");  // add graphviz for debug.
-    }
-  }
-  argument->Set(kFluidToIrPassesAttr, new std::vector<std::string>(passes));
-  for (auto& x : data_) {
-    PADDLE_ENFORCE(x->Initialize(argument));
-    x->RunAll();
-    PADDLE_ENFORCE(x->Finalize());
-  }
-}
-Analyzer& Analyzer::IncludeAllIrPasses() {
-  ir_passes_ = all_ir_passes_;
-  return *this;
-}
-Analyzer& Analyzer::DisableIrPasses(const std::vector<std::string>& passes) {
-  disabled_ir_passes_.insert(passes.begin(), passes.end());
-  return *this;
-}
-Analyzer& Analyzer::IncludeIrPasses(const std::vector<std::string>& passes) {
-  ir_passes_ = passes;
-  return *this;
-}
-Analyzer& Analyzer::SetUseMkldnn(bool use_mkldnn) {
-  use_mkldnn_ = use_mkldnn;
-  return *this;
 }
 }  // namespace analysis

--- a/paddle/fluid/inference/analysis/analyzer.h
+++ b/paddle/fluid/inference/analysis/analyzer.h
@@ -40,56 +40,21 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/inference/analysis/analysis_pass.h"
 #include "paddle/fluid/inference/analysis/flags.h"
-#include "paddle/fluid/inference/analysis/pass_manager.h"
 namespace paddle {
 namespace inference {
 namespace analysis {
-class Analyzer : public OrderedRegistry<PassManager> {
+class Analyzer final {
 public:
-  // Register all the pass-managers.
  Analyzer();
  void Run(Argument* argument);
-  Analyzer& DisableIrPasses(const std::vector<std::string>& passes);
-  Analyzer& IncludeIrPasses(const std::vector<std::string>& passes);
-  Analyzer& IncludeAllIrPasses();
-  Analyzer& SetUseMkldnn(bool use_mkldnn);
  DISABLE_COPY_AND_ASSIGN(Analyzer);
- private:
+ protected:
-  // All avaiable IR passes.
+  void RunIrAnalysis(Argument* argument);
-  // The bigger fuse comes first, so that the small operators prefer to be
-  // merged in a larger fuse op. The small fusion will not break the pattern of
-  // larger fusion.
-  const std::vector<std::string> all_ir_passes_{{
-      // Manual update the passes here.
-      "attention_lstm_fuse_pass",       //
-      "seqconv_eltadd_relu_fuse_pass",  //
-      "embedding_fc_lstm_fuse_pass",    //
-      "fc_lstm_fuse_pass",              //
-      "mul_lstm_fuse_pass",             //
-      "fc_gru_fuse_pass",               //
-      "mul_gru_fuse_pass",              //
-      "seq_concat_fc_fuse_pass",        //
-      "fc_fuse_pass",                   //
-      "conv_bn_fuse_pass",              //
-      "conv_eltwiseadd_bn_fuse_pass",   //
-#ifdef PADDLE_WITH_MKLDNN
-      "depthwise_conv_mkldnn_pass",             //
-      "conv_bias_mkldnn_fuse_pass",             //
-      "conv_relu_mkldnn_fuse_pass",             //
-      "conv_elementwise_add_mkldnn_fuse_pass",  //
-#endif
-  }};
-  std::unordered_set<std::string> disabled_ir_passes_;
-  // Ir passes to run
-  std::vector<std::string> ir_passes_;
-  bool use_mkldnn_;
 };
 }  // namespace analysis

--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@@ -27,21 +27,21 @@ namespace analysis {
 using namespace framework;  // NOLINT
 TEST(Analyzer, analysis_without_tensorrt) {
-  FLAGS_IA_enable_tensorrt_subgraph_engine = false;
  Argument argument;
-  argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
+  argument.SetModelDir(FLAGS_inference_model_dir);
+  argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
  Analyzer analyser;
  analyser.Run(&argument);
 }
 TEST(Analyzer, analysis_with_tensorrt) {
-  FLAGS_IA_enable_tensorrt_subgraph_engine = true;
  Argument argument;
-  argument.Set<int>("minimum_subgraph_size", new int(0));
+  argument.SetTensorRtMaxBatchSize(3);
-  argument.Set<int>("max_batch_size", new int(3));
+  argument.SetTensorRtWorkspaceSize(1 << 20);
-  argument.Set<int>("workspace_size", new int(1 << 20));
+  argument.SetModelDir(FLAGS_inference_model_dir);
-  argument.Set<std::string>("precision_mode", new std::string("FP32"));
+  argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
-  argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
  Analyzer analyser;
  analyser.Run(&argument);
 }

--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -24,13 +24,16 @@
 #pragma once
 #include <string>
+#include <vector>
+#include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
+#include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/platform/variant.h"
 namespace paddle {
 namespace inference {
 namespace analysis {
+using framework::ir::Graph;
 /*
 * The argument definition of both Pass and PassManagers.
@@ -39,75 +42,99 @@ namespace analysis {
 */
 struct Argument {
  Argument() = default;
-  explicit Argument(const std::string& fluid_model_dir)
+  explicit Argument(const std::string& model_dir) { SetModelDir(model_dir); }
-      : fluid_model_dir(new std::string(fluid_model_dir)) {}
-  // The directory of the trained model.
+  using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
-  std::unique_ptr<std::string> fluid_model_dir;
+  using fusion_statis_t = std::unordered_map<std::string, int>;
-  // The path of `__model__` and `param`, this is used when the file name of
-  // model and param is changed.
+  bool Has(const std::string& key) const { return valid_fields_.count(key); }
-  std::unique_ptr<std::string> fluid_model_program_path;
-  std::unique_ptr<std::string> fluid_model_param_path;
+#define DECL_ARGUMENT_FIELD(field__, Field, type__) \
+ public:                                            \
-  // The graph that process by the Passes or PassManagers.
+  type__& field__() {                               \
-  std::unique_ptr<DataFlowGraph> main_dfg;
+    PADDLE_ENFORCE(Has(#field__));                  \
+    return field__##_;                              \
-  // The original program desc.
+  }                                                 \
-  std::unique_ptr<framework::proto::ProgramDesc> origin_program_desc;
+  void Set##Field(const type__& x) {                \
+    field__##_ = x;                                 \
-  // The processed program desc.
+    valid_fields_.insert(#field__);                 \
-  std::unique_ptr<framework::proto::ProgramDesc> transformed_program_desc;
+  }                                                 \
+  DECL_ARGUMENT_FIELD_VALID(field__);               \
-  // The output storage path of ModelStorePass.
+  type__* field__##_ptr() { return &field__##_; }   \
-  std::unique_ptr<std::string> model_output_store_path;
+                                                    \
+ private:                                           \
-  // Support for any other attributes.
+  type__ field__##_;
-  template <typename T>
-  void Set(const std::string& key, T* data) {
+#define DECL_ARGUMENT_FIELD_VALID(field__) \
-    PADDLE_ENFORCE_NOT_NULL(data);
+  bool field__##_valid() { return Has(#field__); }
-    PADDLE_ENFORCE(!attrs_.count(key), "Duplicate set Argument's attr [%s]",
-                   key);
+#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__)                \
-    attrs_[key] = data;
+ public:                                                                  \
-    attr_deleters_[key] = [data, key]() {
+  type__& field__() {                                                     \
-      VLOG(30) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    PADDLE_ENFORCE_NOT_NULL(field__##_);                                  \
-      VLOG(30) << "argument delete attr: " << key;
+    PADDLE_ENFORCE(Has(#field__));                                        \
-      delete data;
+    return *static_cast<type__*>(field__##_.get());                       \
-    };
+  }                                                                       \
-  }
+  void Set##Field(type__* x) {                                            \
+    field__##_ =                                                          \
-  bool Has(const std::string& name) const { return attrs_.count(name); }
+        unique_ptr_t(x, [](void* x) { delete static_cast<type__*>(x); }); \
+    valid_fields_.insert(#field__);                                       \
-  template <typename T>
+  }                                                                       \
-  T* Release(const std::string& key) {
+  void Set##Field##NotOwned(type__* x) {                                  \
-    PADDLE_ENFORCE(attrs_.count(key));
+    valid_fields_.insert(#field__);                                       \
-    auto* res = boost::any_cast<T*>(attrs_.at(key));
+    field__##_ = unique_ptr_t(x, [](void* x) {});                         \
-    attrs_.erase(key);
+  }                                                                       \
-    attr_deleters_.erase(key);
+  DECL_ARGUMENT_FIELD_VALID(field__);                                     \
-    return res;
+  type__* field__##_ptr() {                                               \
-  }
+    PADDLE_ENFORCE(Has(#field__));                                        \
+    return static_cast<type__*>(field__##_.get());                        \
-  template <typename T>
+  }                                                                       \
-  T& Get(const std::string& key) {
+  type__* Release##Field() {                                              \
-    PADDLE_ENFORCE(Has(key));
+    PADDLE_ENFORCE(Has(#field__));                                        \
-    return *boost::any_cast<T*>(attrs_.at(key));
+    valid_fields_.erase(#field__);                                        \
-  }
+    return static_cast<type__*>(field__##_.release());                    \
+  }                                                                       \
-  ~Argument() {
+                                                                          \
-    for (auto& item : attr_deleters_) {
+ private:                                                                 \
-      item.second();
+  unique_ptr_t field__##_;
-    }
-  }
+  // Model path
+  DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string);
+  // Model specified with program and parameters files.
+  DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
+  DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
+  // The overall graph to work on.
+  DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);
+  // The overall Scope to work on.
+  DECL_ARGUMENT_UNIQUE_FIELD(scope, Scope, framework::Scope);
+  DECL_ARGUMENT_UNIQUE_FIELD(main_program, MainProgram, framework::ProgramDesc);
+  // The ir passes to perform in analysis phase.
+  DECL_ARGUMENT_FIELD(ir_analysis_passes, IrAnalysisPasses,
+                      std::vector<std::string>);
+  DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool);
+  DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool);
+  DECL_ARGUMENT_FIELD(tensorrt_node_teller, TensorRtNodeTeller,
+                      std::function<bool(const framework::ir::Node*)>);
+  DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int);
+  DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
+  // The program transformed by IR analysis phase.
+  DECL_ARGUMENT_UNIQUE_FIELD(ir_analyzed_program, IrAnalyzedProgram,
+                             framework::proto::ProgramDesc);
+  DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t);
 private:
-  std::unordered_map<std::string, boost::any> attrs_;
+  std::unordered_set<std::string> valid_fields_;
-  std::unordered_map<std::string, std::function<void()>> attr_deleters_;
 };
-#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
+#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \
-#define ANALYSIS_ARGUMENT_CHECK_FIELD(field__)               \
+  PADDLE_ENFORCE(argument__->Has(#fieldname__),       \
-  if (UNLIKELY(!(field__))) {                                \
+                 "the argument field [%s] should be set", #fieldname__);
-    LOG(ERROR) << "field " << #field__ << " should be set."; \
-    return false;                                            \
-  }
 }  // namespace analysis
 }  // namespace inference

--- a/paddle/fluid/inference/analysis/data_flow_graph.cc
+++ b/paddle/fluid/inference/analysis/data_flow_graph.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
-#include "paddle/fluid/inference/analysis/dot.h"
-#include "paddle/fluid/inference/analysis/node.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-using ir_node_t = framework::ir::Node;
-using ir_graph_t = framework::ir::Graph;
-// It is a better idea that the inputs and outputs of this graph is set manually
-// before, but there must be a Pass that helps to prune the unnecessary ops that
-// do not contribute to the given targets, so in this pass, analysis and get the
-// inputs and outputs is OK.
-void DataFlowGraph::Build() {
-  inputs_.clear();
-  outputs_.clear();
-  std::unordered_set<Node *> ins;
-  std::unordered_set<Node *> outs;
-  for (auto &node : nodes.nodes()) {
-    for (auto *in : node->inlinks) {
-      ins.insert(in);
-    }
-    for (auto *out : node->outlinks) {
-      outs.insert(out);
-    }
-  }
-  // The nodes that in ins but not in outs is the graph's inputs
-  // similarly, the nodes that in outs but not in ins is the graphs' outputs
-  for (auto *in : ins) {
-    if (!outs.count(in)) {
-      inputs_.push_back(in);
-    }
-  }
-  for (auto *out : outs) {
-    if (!ins.count(out)) {
-      outputs_.push_back(out);
-    }
-  }
-  Clean();
-}
-void DataFlowGraph::Build(const framework::proto::ProgramDesc &prog) {
-  // insert vars
-  // The `var2id` keeps a map from a variable's name to its Node-id, the Node-id
-  // will keep updating to its latest alias during the graph-building.
-  std::unordered_map<std::string, size_t> var2id;
-  auto &main_block = prog.blocks(framework::kRootBlockIndex);
-  for (int i = 0; i < main_block.vars_size(); i++) {
-    const auto &var = main_block.vars(i);
-    auto *v = nodes.Create(Node::Type::kValue);
-    v->SetName(var.name());
-    v->SetPbDesc(const_cast<void *>(static_cast<const void *>(&var)));
-    v->SetPbMsg(var.SerializeAsString());
-    var2id[var.name()] = v->id();
-  }
-  // The variables in a SSA can only write once, so if a variable is written
-  // multiple times(quite common in our ProgramDesc design), multiple alias
-  // Nodes of this variable will be created, and each will just write once.
-  // An set that keep all the names of the variables(the original, not alias)
-  // that have been written(as outputs). Once an Op's output variable hit the
-  // set, it should create a new alias and update the global alias for this
-  // variable. And that make a Data Flow Graph a SSA.
-  std::unordered_set<Node *> unique_written_vars;
-  for (int i = 0; i < main_block.ops_size(); i++) {
-    const auto &op = main_block.ops(i);
-    auto *o = nodes.Create(Node::Type::kFunction);
-    o->SetName(op.type());
-    static_cast<Function *>(o)->SetFuncType(op.type());
-    // Link to the original protobuf message's memory, make it easier to
-    // generate from a data flow graph to fluid ProgramDesc.
-    o->SetPbDesc(const_cast<void *>(static_cast<const void *>(&op)));
-    o->SetPbMsg(op.SerializeAsString());
-    // set inputs and outputs
-    for (int j = 0; j < op.inputs_size(); j++) {
-      auto &in_var = op.inputs(j);
-      for (int k = 0; k < in_var.arguments_size(); k++) {
-        auto *in = nodes.GetMutable(var2id.at(in_var.arguments(k)));
-        in->outlinks.push_back(o);
-        o->inlinks.push_back(in);
-        unique_written_vars.insert(in);
-      }
-    }
-    for (int j = 0; j < op.outputs_size(); j++) {
-      auto &out_var = op.outputs(j);
-      for (int k = 0; k < out_var.arguments_size(); k++) {
-        auto *out = nodes.GetMutable(var2id[out_var.arguments(k)]);
-        if (unique_written_vars.count(out)) {
-          // Loop found, for example, a = op(a), use SSA, change to a1 = op(a).
-          auto *out_alias = nodes.Create(Node::Type::kValue);
-          out_alias->SetName(out->name());
-          out_alias->SetPbDesc(out->pb_desc());
-          out_alias->SetPbMsg(out->pb_msg());
-          var2id[out_alias->name()] =
-              out_alias->id();  // update variable's alias Node
-          LOG(INFO) << "loop found in graph, create SSA alias node ["
-                    << out_alias->repr() << "] for [" << out->repr() << "]";
-          out = out_alias;
-        }
-        out->inlinks.push_back(o);
-        o->outlinks.push_back(out);
-      }
-    }
-  }
-  // Analysis and extract the inputs and outputs of this graph.
-  Build();
-}
-void DataFlowGraph::Build(const framework::ir::Graph &graph) {
-  // Create nodes
-  std::unordered_map<ir_node_t *, Node *> ir_node_map;
-  for (auto *ir_node : graph.Nodes()) {
-    Node *x{nullptr};
-    if (ir_node->IsOp()) {
-      PADDLE_ENFORCE(ir_node->Op());
-      VLOG(40) << "get op " << ir_node << " " << ir_node->Name();
-      x = nodes.Create(Node::Type::kFunction);
-      x->attr("ir_node").Pointer() = ir_node;
-      PADDLE_ENFORCE(ir_node->Op()->Proto());
-      x->SetName(ir_node->Op()->Proto()->type());
-      x->SetPbMsg(ir_node->Op()->Proto()->SerializeAsString());
-    } else if (ir_node->IsVar()) {
-      // Not create a Node for IR ControlDepVar, considering Inference currently
-      // just used in single thread scenerio.
-      VLOG(40) << "get var " << ir_node->Name();
-      x = nodes.Create(Node::Type::kValue);
-      x->attr("ir_node").Pointer() = ir_node;
-      x->SetName(ir_node->Name());
-      // x->SetPbMsg(ir_node->Var()->Proto()->SerializeAsString());
-    } else {
-      PADDLE_THROW("Failed to create an Node from IR, unknown type");
-    }
-    ir_node_map.emplace(ir_node, x);
-  }
-  VLOG(40) << "finish creating Nodes";
-  VLOG(40) << "to create edge";
-  // Create links
-  for (auto *ir_node : graph.Nodes()) {
-    auto it = ir_node_map.find(ir_node);
-    // Skip ControlDepVar.
-    if (it == ir_node_map.end()) continue;
-    auto *node = it->second;
-    for (auto *x : ir_node->inputs) {
-      if (!ir_node_map.count(x)) continue;
-      node->inlinks.push_back(ir_node_map.at(x));
-    }
-    for (auto *x : ir_node->outputs) {
-      if (!ir_node_map.count(x)) continue;
-      node->outlinks.push_back(ir_node_map.at(x));
-    }
-  }
-  Build();
-  PADDLE_ENFORCE(!inputs_.empty(),
-                 "Can't deduce any inputs from the graph, Is the graph empty?");
-  ir_graph = &graph;
-  VLOG(30) << "finished build from IR";
-}
-void DataFlowGraph::Clean() {
-  for (auto &node : nodes.nodes()) {
-    std::unordered_set<Node *> inlinks_set(node->inlinks.begin(),
-                                           node->inlinks.end());
-    std::unordered_set<Node *> outlinks_set(node->outlinks.begin(),
-                                            node->outlinks.end());
-    if (inlinks_set.size() < node->inlinks.size()) {
-      node->inlinks.assign(inlinks_set.begin(), inlinks_set.end());
-    }
-    if (outlinks_set.size() < node->outlinks.size()) {
-      node->outlinks.assign(outlinks_set.begin(), outlinks_set.end());
-    }
-  }
-}
-std::string DataFlowGraph::DotString() const {
-  Dot dot;
-  // Add nodes
-  for (size_t i = 0; i < nodes.size(); i++) {
-    const Node &node = nodes.Get(i);
-    dot.AddNode(node.repr(), node.dot_attrs());
-  }
-  // Add edges
-  for (size_t i = 0; i < nodes.size(); i++) {
-    const Node &node = nodes.Get(i);
-    for (auto &in : node.inlinks) {
-      dot.AddEdge(in->repr(), node.repr(), {});
-    }
-  }
-  return dot.Build();
-}
-std::string DataFlowGraph::HumanReadableInfo(bool show_values,
-                                             bool show_functions) const {
-  std::stringstream values, functions;
-  for (auto &n : nodes.nodes()) {
-    if (show_values && n->IsValue()) {
-      values << n->repr() << "\n";
-    }
-    if (show_functions && n->IsFunction()) {
-      functions << n->repr() << "\n";
-    }
-  }
-  return "Values:\n" + values.str() + "\n\n" + "Functions:\n" + functions.str();
-}
-//
-// NodesBFSIterator
-//
-GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
-    const std::vector<Node *> &source)
-    : queue_(source.begin(), source.end()) {}
-GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
-    GraphTraits<DataFlowGraph>::NodesBFSIterator &&other) noexcept
-    : queue_(std::move(other.queue_)),
-      visited_(std::move(other.visited_)) {}
-GraphTraits<DataFlowGraph>::NodesBFSIterator::NodesBFSIterator(
-    const GraphTraits<DataFlowGraph>::NodesBFSIterator &other)
-    : queue_(other.queue_), visited_(other.visited_) {}
-Node &GraphTraits<DataFlowGraph>::NodesBFSIterator::operator*() {
-  PADDLE_ENFORCE(!queue_.empty());
-  return *queue_.front();
-}
-Node *GraphTraits<DataFlowGraph>::NodesBFSIterator::operator->() {
-  PADDLE_ENFORCE(!queue_.empty());
-  return queue_.front();
-}
-GraphTraits<DataFlowGraph>::NodesBFSIterator &
-GraphTraits<DataFlowGraph>::NodesBFSIterator::operator=(
-    const GraphTraits<DataFlowGraph>::NodesBFSIterator &other) {
-  queue_ = other.queue_;
-  visited_ = other.visited_;
-  return *this;
-}
-GraphTraits<DataFlowGraph>::NodesBFSIterator
-    &GraphTraits<DataFlowGraph>::NodesBFSIterator::operator++() {
-  PADDLE_ENFORCE(!queue_.empty());
-  auto *cur = queue_.front();
-  visited_.insert(cur);
-  queue_.pop_front();
-  for (auto *output : cur->outlinks) {
-    if (!visited_.count(output)) {
-      queue_.push_back(output);
-      visited_.insert(output);
-    }
-  }
-  return *this;
-}
-bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
-    const GraphTraits<DataFlowGraph>::NodesBFSIterator &other) {
-  if (queue_.empty()) return other.queue_.empty();
-  if ((!queue_.empty()) && (!other.queue_.empty())) {
-    return queue_.front() == other.queue_.front() &&
-           visited_.size() == other.visited_.size();
-    // equality of queue and
-    // visited. Just a light but week implementation.
-  }
-  return false;
-}
-//
-// NodesDFSIterator
-//
-GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
-    const std::vector<Node *> &source) {
-  for (auto *x : source) stack_.push(x);
-}
-GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
-    GraphTraits<DataFlowGraph>::NodesDFSIterator &&other) noexcept
-    : stack_(std::move(other.stack_)),
-      visited_(std::move(other.visited_)) {}
-GraphTraits<DataFlowGraph>::NodesDFSIterator::NodesDFSIterator(
-    const GraphTraits<DataFlowGraph>::NodesDFSIterator &other)
-    : stack_(other.stack_), visited_(other.visited_) {}
-Node &GraphTraits<DataFlowGraph>::NodesDFSIterator::operator*() {
-  PADDLE_ENFORCE(!stack_.empty());
-  return *stack_.top();
-}
-GraphTraits<DataFlowGraph>::NodesDFSIterator
-    &GraphTraits<DataFlowGraph>::NodesDFSIterator::operator++() {
-  if (stack_.empty()) return *this;
-  visited_.insert(stack_.top());
-  auto *cur = stack_.top();
-  stack_.pop();
-  for (auto *x : cur->outlinks) {
-    if (!visited_.count(x)) {
-      stack_.push(x);
-      visited_.insert(x);
-    }
-  }
-  return *this;
-}
-bool GraphTraits<DataFlowGraph>::NodesDFSIterator::operator==(
-    const GraphTraits<DataFlowGraph>::NodesDFSIterator &other) {
-  if (stack_.empty()) return other.stack_.empty();
-  if ((!stack_.empty()) && (!other.stack_.empty())) {
-    return stack_.top() == other.stack_.top();
-  }
-  return false;
-}
-GraphTraits<DataFlowGraph>::NodesDFSIterator &
-GraphTraits<DataFlowGraph>::NodesDFSIterator::operator=(
-    const GraphTraits<DataFlowGraph>::NodesDFSIterator &other) {
-  stack_ = other.stack_;
-  visited_ = other.visited_;
-  return *this;
-}
-Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
-  return stack_.top();
-}
-inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
-  return node.inlinks.size() == n;
-}
-GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
-    const std::vector<Node *> &source) {
-  PADDLE_ENFORCE(!source.empty(),
-                 "Start points of topological sorting should not be empty!");
-  // CHECK all the inputs' in-degree is 0
-  for (auto *node : source) {
-    PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0));
-  }
-  std::unordered_set<Node *> visited;
-  std::unordered_set<Node *> to_visit{source.begin(), source.end()};
-  std::vector<Node *> inlink_visited;
-  while (!to_visit.empty()) {
-    std::vector<Node *> queue(to_visit.begin(), to_visit.end());
-    for (auto *p : queue) {
-      if (p->deleted()) {
-        visited.insert(p);
-        to_visit.erase(p);
-        continue;
-      }
-      inlink_visited.clear();
-      std::copy_if(p->inlinks.begin(), p->inlinks.end(),
-                   std::back_inserter(inlink_visited),
-                   [&](Node *x) { return visited.count(x); });
-      if (inlink_visited.size() == p->inlinks.size()) {
-        sorted_.push_back(p);
-        for (auto *_ : p->outlinks) {
-          if (!visited.count(_)) {
-            to_visit.insert(_);
-          }
-        }
-        to_visit.erase(p);
-        visited.insert(p);
-      }
-    }
-  }
-}
-GraphTraits<DataFlowGraph>::NodesTSIterator::NodesTSIterator(
-    const paddle::inference::analysis::GraphTraits<
-        DataFlowGraph>::NodesTSIterator &other)
-    : sorted_(other.sorted_), cursor_(other.cursor_) {}
-Node &GraphTraits<DataFlowGraph>::NodesTSIterator::operator*() {
-  PADDLE_ENFORCE_LT(cursor_, sorted_.size());
-  return *sorted_[cursor_];
-}
-paddle::inference::analysis::GraphTraits<DataFlowGraph>::NodesTSIterator
-    &GraphTraits<DataFlowGraph>::NodesTSIterator::operator++() {
-  if (++cursor_ >= sorted_.size()) {
-    sorted_.clear();
-    cursor_ = 0;
-  }
-  return *this;
-}
-paddle::inference::analysis::GraphTraits<DataFlowGraph>::NodesTSIterator &
-GraphTraits<DataFlowGraph>::NodesTSIterator::operator=(
-    const paddle::inference::analysis::GraphTraits<
-        DataFlowGraph>::NodesTSIterator &other) {
-  cursor_ = other.cursor_;
-  sorted_ = other.sorted_;
-  return *this;
-}
-bool GraphTraits<DataFlowGraph>::NodesTSIterator::operator==(
-    const paddle::inference::analysis::GraphTraits<
-        DataFlowGraph>::NodesTSIterator &other) {
-  return sorted_ == other.sorted_ && cursor_ == other.cursor_;
-}
-Node *GraphTraits<DataFlowGraph>::NodesTSIterator::operator->() {
-  PADDLE_ENFORCE_LT(cursor_, sorted_.size());
-  return sorted_[cursor_];
-}
-std::pair<std::vector<Node *>, std::vector<Node *>>
-ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph) {  // NOLINT
-  std::unordered_set<Node *> nodes(graph.begin(), graph.end());
-  std::unordered_set<Node *> inputs;
-  std::unordered_set<Node *> outputs;
-  // Input a Value, check whether its inlink is in the subgraph.
-  auto inlink_in_subgraph = [&](Node *n) {
-    for (auto *in : n->inlinks) {
-      if (nodes.count(in)) return true;
-    }
-    return false;
-  };
-  for (auto &node : graph) {
-    for (auto *in : node->inlinks) {
-      // The Value that is written by nodes inside a sub-graph shouldn't be the
-      // input of the sub-graph.
-      if (!nodes.count(in) && in->type() == Node::Type::kValue &&
-          !inlink_in_subgraph(in)) {
-        inputs.insert(in);
-      }
-    }
-    for (auto *out : node->outlinks) {
-      if (!nodes.count(out) && out->type() == Node::Type::kValue) {
-        outputs.insert(out);
-      }
-    }
-  }
-  return std::make_pair(std::vector<Node *>(inputs.begin(), inputs.end()),
-                        std::vector<Node *>(outputs.begin(), outputs.end()));
-}
-// Filter the Intermediate results of the subgraph node.
-void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) {
-  std::vector<Node *> op_nodes;
-  for (auto &node : GraphTraits<DataFlowGraph>(*graph).nodes_in_TS()) {
-    if (node.type() == Node::Type::kValue || node.deleted()) {
-      continue;
-    }
-    op_nodes.push_back(&node);
-  }
-  size_t op_num = op_nodes.size();
-  for (size_t i = 0; i < op_num; i++) {
-    if (op_nodes[i]->type() == Node::Type::kFunction) continue;
-    std::unordered_set<std::string> follow_up_input_names;
-    for (size_t j = i + 1; j < op_num; j++) {
-      for (auto *in : op_nodes[j]->inlinks) {
-        follow_up_input_names.insert(in->name());
-      }
-    }
-    std::vector<Node *> filtered_subgraph_outlinks;
-    for (auto *out : op_nodes[i]->outlinks) {
-      if (follow_up_input_names.count(out->name())) {
-        filtered_subgraph_outlinks.push_back(out);
-      } else {
-        out->SetDeleted();
-      }
-    }
-    // The filtered_subgraph_outlinks may be empty.
-    op_nodes[i]->outlinks = filtered_subgraph_outlinks;
-  }
-}
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/data_flow_graph.h
+++ b/paddle/fluid/inference/analysis/data_flow_graph.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-/*
- * Data flow graph is an pass that build the basic graph. It contains a graph
- * and the iterators that enable the iteration over the graph.
- */
-#pragma once
-#include <deque>
-#include <stack>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-#include "paddle/fluid/framework/ir/graph.h"
-#include "paddle/fluid/inference/analysis/graph_traits.h"
-#include "paddle/fluid/inference/analysis/node.h"
-#include "paddle/fluid/platform/enforce.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-/*
- * DataFlowGraph - A container of Value and Function Nodes.
- *
- * This is the base graph for any other type of graphs, such as SSA or CFG.
- */
-struct DataFlowGraph {
-  NodeMap nodes;
-  // inputs and outputs are deduced from the graph.
-  // Used to interact with IR.
-  const framework::ir::Graph *ir_graph{nullptr};
-  // Extract inputs and outputs of the graph.
-  void Build();
-  void Build(const framework::proto::ProgramDesc &prog);
-  // Build a graph from ir::Graph.
-  void Build(const framework::ir::Graph &graph);
-  // Get an attribute.
-  AnyAttr &Attr(const std::string &key) { return attrs_[key]; }
-  // Output a DOT graph file for debug.
-  std::string DotString() const;
-  std::string HumanReadableInfo(bool show_values = true,
-                                bool show_functions = true) const;
-  const std::vector<Node *> &inputs() const {
-    PADDLE_ENFORCE(!inputs_.empty(),
-                   "No inputs are deduced, need to Build() first.");
-    return inputs_;
-  }
-  const std::vector<Node *> &outputs() const {
-    PADDLE_ENFORCE(!outputs_.empty(),
-                   "No outputs are deduced, need to Build() first.");
-    return outputs_;
-  }
- private:
-  mutable std::vector<Node *> inputs_;
-  mutable std::vector<Node *> outputs_;
-  std::unordered_map<std::string, AnyAttr> attrs_;
-  // Remove duplicate edges and so on.
-  void Clean();
-};
-/*
- * An graph trait help to traverse the graph using BFS.
- * The BFS start from a graph's inputs, the graph should be fully-connected, so
- * that the iterator can reach the end.
- */
-template <>
-struct GraphTraits<DataFlowGraph> {
-  // BFS iterator on nodes.
-  struct NodesBFSIterator
-      : public std::iterator<std::forward_iterator_tag, Node *> {
-    NodesBFSIterator() = default;
-    explicit NodesBFSIterator(const std::vector<Node *> &source);
-    NodesBFSIterator(NodesBFSIterator &&other) noexcept;
-    // NOTE Heavy to use.
-    NodesBFSIterator(const NodesBFSIterator &other);
-    Node &operator*();
-    NodesBFSIterator &operator++();
-    Node *operator->();
-    // TODO(Superjomn) current implementation just compare the first
-    // element, need to compare the graph and all the elements in the queue and
-    // set.
-    NodesBFSIterator &operator=(const NodesBFSIterator &other);
-    bool operator==(const NodesBFSIterator &other);
-    bool operator!=(const NodesBFSIterator &other) { return !(*this == other); }
-   private:
-    std::deque<Node *> queue_;
-    std::unordered_set<Node *> visited_;
-  };
-  // DFS iterator on nodes.
-  struct NodesDFSIterator
-      : public std::iterator<std::forward_iterator_tag, Node *> {
-    NodesDFSIterator() = default;
-    NodesDFSIterator(const std::vector<Node *> &source);
-    NodesDFSIterator(NodesDFSIterator &&other) noexcept;
-    NodesDFSIterator(const NodesDFSIterator &other);
-    Node &operator*();
-    NodesDFSIterator &operator++();
-    // TODO(Superjomn) current implementation just compare the first
-    // element, need to compare the graph and all the elements in the queue and
-    // set.
-    NodesDFSIterator &operator=(const NodesDFSIterator &other);
-    bool operator==(const NodesDFSIterator &other);
-    bool operator!=(const NodesDFSIterator &other) { return !(*this == other); }
-    Node *operator->();
-   private:
-    std::stack<Node *> stack_;
-    std::unordered_set<Node *> visited_;
-  };
-  // Topological sorting iterator on nodes.
-  struct NodesTSIterator
-      : public std::iterator<std::forward_iterator_tag, Node *> {
-    NodesTSIterator() = default;
-    NodesTSIterator(const std::vector<Node *> &source);
-    NodesTSIterator(NodesTSIterator &&other)
-        : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) {
-      other.cursor_ = 0;
-    }
-    NodesTSIterator(const NodesTSIterator &other);
-    Node &operator*();
-    NodesTSIterator &operator++();
-    // TODO(Superjomn) current implementation just compare the first
-    // element, need to compare the graph and all the elements in the queue and
-    // set.
-    NodesTSIterator &operator=(const NodesTSIterator &other);
-    bool operator==(const NodesTSIterator &other);
-    bool operator!=(const NodesTSIterator &other) { return !(*this == other); }
-    Node *operator->();
-   private:
-    std::vector<Node *> sorted_;
-    size_t cursor_{0};
-  };
-  explicit GraphTraits(const DataFlowGraph &graph) : graph_(graph) {}
-  // default use BFS to visit the nodes.
-  iterator_range<NodesBFSIterator> nodes() {
-    return iterator_range<NodesBFSIterator>(nodes_bfs_begin(), nodes_bfs_end());
-  }
-  iterator_range<NodesBFSIterator> nodes_in_BFS() {
-    return iterator_range<NodesBFSIterator>(nodes_bfs_begin(), nodes_bfs_end());
-  }
-  iterator_range<NodesDFSIterator> nodes_in_DFS() {
-    return iterator_range<NodesDFSIterator>(nodes_dfs_begin(), nodes_dfs_end());
-  }
-  iterator_range<NodesTSIterator> nodes_in_TS() {
-    return iterator_range<NodesTSIterator>(nodes_ts_begin(), nodes_ts_end());
-  }
- private:
-  NodesBFSIterator nodes_bfs_begin() {
-    return NodesBFSIterator(graph_.inputs());
-  }
-  NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); }
-  NodesDFSIterator nodes_dfs_begin() {
-    return NodesDFSIterator(graph_.inputs());
-  }
-  NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); }
-  NodesTSIterator nodes_ts_begin() { return NodesTSIterator(graph_.inputs()); }
-  NodesTSIterator nodes_ts_end() { return NodesTSIterator(); }
- private:
-  const DataFlowGraph &graph_;
-};
-// Extract the inputs and outputs of a graph. The inputs and outputs of a
-// sub-graph is the inputs nodes and output nodes that doesn't inside the
-// sub-graph.
-std::pair<std::vector<Node *>, std::vector<Node *>>
-ExtractInputAndOutputOfSubGraph(std::vector<Node *> &graph);  // NOLINT
-void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph);
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc
+++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
-#include "paddle/fluid/framework/op_proto_maker.h"
-#include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/inference/analysis/ut_helper.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-TEST(DataFlowGraph, BFS) {
-  auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__");
-  auto dfg = ProgramDescToDFG(desc);
-  dfg.Build();
-  for (auto* in : dfg.inputs()) {
-    LOG(INFO) << "inputs: " << in->name() << " "
-              << static_cast<int>(in->type());
-  }
-  for (auto* out : dfg.outputs()) {
-    LOG(INFO) << "outputs: " << out->name() << " "
-              << static_cast<int>(out->type());
-  }
-  size_t count = 0;
-  for (auto& node : GraphTraits<DataFlowGraph>(dfg).nodes()) {
-    LOG(INFO) << "visiting " << node.name();
-    ++count;
-  }
-  ASSERT_EQ(count, dfg.nodes.size());
-}
-TEST(DataFlowGraph, DFS) {
-  auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__");
-  DataFlowGraph dfg;
-  dfg.Build(desc);
-  size_t count = 0;
-  for (auto& node : GraphTraits<DataFlowGraph>(dfg).nodes_in_DFS()) {
-    LOG(INFO) << "visiting " << node.name();
-    ++count;
-  }
-  ASSERT_EQ(count, dfg.nodes.size());
-}
-// Topological sorting.
-/*
- * Graph topology
- * inputs: 0, 1, 2
- * 0 -> 4
- * 0 -> 5
- * 1 -> 6
- * 2 -> 7
- * 4 -> 5
- * 4 -> 7
- * 4 -> 3
- * 7 -> 3
- */
-TEST(DataFlowGraph, TS) {
-  DataFlowGraph graph;
-  for (int i = 0; i < 8; i++) {
-    auto* node = graph.nodes.Create(Node::Type::kValue);
-    node->SetName("node-" + std::to_string(i));
-  }
-  auto add_link = [&](int i, int j) {
-    Node* source = graph.nodes.GetMutable(i);
-    Node* target = graph.nodes.GetMutable(j);
-    target->inlinks.push_back(source);
-    source->outlinks.push_back(target);
-  };
-  add_link(0, 4);
-  add_link(0, 5);
-  add_link(1, 6);
-  add_link(2, 7);
-  add_link(4, 5);
-  add_link(4, 7);
-  add_link(4, 3);
-  add_link(7, 3);
-  graph.Build();
-  auto its = GraphTraits<DataFlowGraph>(graph).nodes_in_TS();
-  std::vector<int> sorted_ids;
-  for (auto it = its.begin(); it != its.end(); ++it) {
-    LOG(INFO) << it->name();
-    sorted_ids.push_back(it->id());
-  }
-  // Assert a occurs prior to b in the sorted_ids.
-  auto assert_positive_sequence_pair = [&](int a, int b) {
-    auto a_offset = std::find(sorted_ids.begin(), sorted_ids.end(), a);
-    auto b_offset = std::find(sorted_ids.begin(), sorted_ids.end(), b);
-    ASSERT_LT(a_offset, b_offset);
-  };
-  assert_positive_sequence_pair(2, 7);
-  assert_positive_sequence_pair(7, 3);
-  assert_positive_sequence_pair(4, 3);
-  assert_positive_sequence_pair(0, 4);
-  assert_positive_sequence_pair(0, 5);
-  assert_positive_sequence_pair(1, 6);
-  assert_positive_sequence_pair(4, 5);
-  assert_positive_sequence_pair(4, 7);
-}
-TEST(DataFlowGraph, Build_ProgramDesc) {
-  auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__");
-  DataFlowGraph graph;
-  graph.Build(desc);
-  ASSERT_EQ(graph.nodes.size(), 38UL);
-}
-void SetOp(framework::ProgramDesc* prog, const std::string& type,
-           const std::vector<std::string>& inputs,
-           const std::vector<std::string>& outputs) {
-  auto* op = prog->MutableBlock(0)->AppendOp();
-  op->SetType(type);
-  op->SetInput("Xs", inputs);
-  op->SetOutput("Xs", outputs);
-  op->SetAttr(framework::OpProtoAndCheckerMaker::OpRoleAttrName(),
-              static_cast<int>(framework::OpRole::kForward));
-}
-TEST(DataFlowGraph, Build_IR_Graph) {
-  framework::ProgramDesc prog;
-  for (auto& v : std::vector<std::string>({"a", "b", "c", "d", "e", "f"})) {
-    auto* var = prog.MutableBlock(0)->Var(v);
-    var->SetType(framework::proto::VarType::SELECTED_ROWS);
-    if (v == "c") {
-      var->SetPersistable(true);
-    }
-  }
-  SetOp(&prog, "OP0", std::vector<std::string>({"a"}),
-        std::vector<std::string>({"b"}));
-  SetOp(&prog, "OP1", std::vector<std::string>({"a"}),
-        std::vector<std::string>({"c"}));
-  SetOp(&prog, "mul", std::vector<std::string>({"b", "c"}),
-        std::vector<std::string>({"d"}));
-  SetOp(&prog, "elementwise_add", std::vector<std::string>({"d", "e"}),
-        std::vector<std::string>({"f"}));
-  DataFlowGraph graph;
-  framework::ir::Graph ir_graph(prog);
-  graph.Build(ir_graph);
-  ASSERT_EQ(graph.nodes.size(), ir_graph.Nodes().size());
-}
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
+++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-   http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-/*
- * This file implements the transformation from fluid ProgramDesc to data flow
- * graph.
- */
-#pragma once
-#include <string>
-#include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-class DataFlowGraphToFluidPass final : public DataFlowGraphPass {
- public:
-  DataFlowGraphToFluidPass() = default;
-  bool Initialize(Argument *argument) override;
-  bool Finalize() override;
-  void Run(DataFlowGraph *graph) override;
-  std::string repr() const override { return "DFG to fluid"; }
-  std::string description() const override {
-    return "Transform a DFG to a Fluid ProgramDesc";
-  }
-  AnalysisPass *CreateGraphvizDebugerPass() const override;
- protected:
-  // Add a Fluid Op into the ProgramDesc.
-  void AddFluidOp(Node *node);
-  // Add a EngineOp into the ProgramDesc.
-  void AddEngineOp(Node *node);
- private:
-  framework::proto::ProgramDesc *desc_;
-  Argument *argument_;
-};
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc
-//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
-#include <glog/logging.h>
-#include <google/protobuf/text_format.h>
-#include <gtest/gtest.h>
-#include "paddle/fluid/framework/executor.h"
-#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
-#include "paddle/fluid/inference/analysis/ut_helper.h"
-#include "paddle/fluid/inference/io.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-TEST(DataFlowGraph, Test) {
-  Argument argument(FLAGS_inference_model_dir);
-  FluidToDataFlowGraphPass pass0;
-  DataFlowGraphToFluidPass pass1;
-  ASSERT_TRUE(pass0.Initialize(&argument));
-  ASSERT_TRUE(pass1.Initialize(&argument));
-  pass0.Run(argument.main_dfg.get());
-  pass1.Run(argument.main_dfg.get());
-  pass0.Finalize();
-  pass1.Finalize();
-  LOG(INFO) << argument.main_dfg->nodes.size();
-}
-};  // namespace analysis
-};  // namespace inference
-};  // namespace paddle
--- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
+++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-int DFG_GraphvizDrawPass::counter_{0};
-void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
-  auto content = Draw(graph);
-  auto dot_path = GenDotPath();
-  std::ofstream file(dot_path);
-  file.write(content.c_str(), content.size());
-  file.close();
-  auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png";
-  std::string message;
-  VLOG(30) << "draw to " << png_path;
-  ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message);
-}
-std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) {
-  Dot dot;
-  // Add nodes
-  for (size_t i = 0; i < graph->nodes.size(); i++) {
-    const Node &node = graph->nodes.Get(i);
-    if (config_.display_deleted_node || !node.deleted()) {
-      dot.AddNode(node.repr(), node.dot_attrs());
-    }
-  }
-  // Add edges
-  for (size_t i = 0; i < graph->nodes.size(); i++) {
-    const Node &node = graph->nodes.Get(i);
-    if (!config_.display_deleted_node && node.deleted()) continue;
-    for (auto &out : node.outlinks) {
-      if (!config_.display_deleted_node && out->deleted()) continue;
-      dot.AddEdge(node.repr(), out->repr(), {});
-    }
-  }
-  return dot.Build();
-}
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
+++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-/*
- * This file create an DFG_GraphvizDrawPass which helps to draw a data flow
- * graph's structure using graphviz.
- */
-#pragma once
-#include <fstream>
-#include <string>
-#include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/inference/analysis/dot.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-/*
- * Output a dot file and write to some place.
- */
-class DFG_GraphvizDrawPass : public DataFlowGraphPass {
- public:
-  struct Config {
-    Config(const std::string &dir, const std::string &id,
-           bool display_deleted_node = false)
-        : dir(dir), id(id), display_deleted_node(display_deleted_node) {}
-    // The directory to store the .dot or .png files.
-    const std::string dir;
-    // The identifier for this dot file.
-    const std::string id;
-    // Whether to display deleted nodes, default false.
-    const bool display_deleted_node;
-  };
-  explicit DFG_GraphvizDrawPass(const Config &config) : config_(config) {}
-  bool Initialize(Argument *argument) override { return true; }
-  void Run(DataFlowGraph *graph) override;
-  bool Finalize() override { return true; }
-  std::string repr() const override { return "DFG graphviz drawer"; }
-  std::string description() const override {
-    return "Debug a DFG by draw with graphviz";
-  }
- protected:
-  // A counter to add a number prefix to the debugger image output so that they
-  // will sort in the triggered order.
-  static int counter_;
-  // Path of the dot file to output.
-  std::string GenDotPath() const {
-    return config_.dir + "/" + std::to_string(counter_++) + "-graph_" +
-           config_.id + ".dot";
-  }
-  virtual std::string Draw(DataFlowGraph *graph);
-  Config config_;
-};
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/dot_tester.cc
+++ b/paddle/fluid/inference/analysis/dot_tester.cc
@@ -16,7 +16,6 @@
 #include <gtest/gtest.h>
 #include <memory>
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
 namespace paddle {
 namespace inference {

--- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <glog/logging.h>
-#include <string>
-#include <vector>
-#include "paddle/fluid/inference/analysis/analyzer.h"
-#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
-#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-bool FluidToDataFlowGraphPass::Initialize(Argument *argument) {
-  ANALYSIS_ARGUMENT_CHECK_FIELD(argument);
-  if (argument->origin_program_desc) {
-    LOG(WARNING) << "argument's origin_program_desc is already set, might "
-                    "duplicate called";
-  }
-  if (!argument->fluid_model_program_path) {
-    ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_dir);
-    argument->fluid_model_program_path.reset(
-        new std::string(*argument->fluid_model_dir + "/__model__"));
-  }
-  ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_program_path);
-  auto program = LoadProgramDesc(*argument->fluid_model_program_path);
-  argument->origin_program_desc.reset(
-      new framework::proto::ProgramDesc(program));
-  if (!argument->main_dfg) {
-    argument->main_dfg.reset(new DataFlowGraph);
-  }
-  desc_ = argument->origin_program_desc.get();
-  return true;
-}
-bool FluidToDataFlowGraphPass::Finalize() { return true; }
-void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
-  PADDLE_ENFORCE(graph);
-  PADDLE_ENFORCE(desc_);
-  graph->Build(*desc_);
-}
-namespace {
-class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
- public:
-  using Config = DFG_GraphvizDrawPass::Config;
-  explicit DFG_DebuggerPass(const Config &config)
-      : DFG_GraphvizDrawPass(config) {}
-  std::string repr() const override { return "fluid-to-dfg-debuger-pass"; }
-  bool Finalize() override { return true; }
-};
-}
-AnalysisPass *FluidToDataFlowGraphPass::CreateGraphvizDebugerPass() const {
-  return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config(
-      FLAGS_IA_graphviz_log_root, "fluid-to-dfg-debuger"));
-}
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
+++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-   http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-/*
- * This file implements the transformation from data flow graph to fluid
- * ProgramDesc.
- */
-#pragma once
-#include <string>
-#include "paddle/fluid/framework/program_desc.h"
-#include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/inference/analysis/data_flow_graph.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-/*
- * Transform a FluidDesc to a SSA.
- */
-class FluidToDataFlowGraphPass final : public DataFlowGraphPass {
- public:
-  FluidToDataFlowGraphPass() = default;
-  bool Initialize(Argument *argument) override;
-  bool Finalize() override;
-  void Run(DataFlowGraph *graph) override;
-  std::string repr() const override { return "fluid-to-data-flow-graph"; }
-  std::string description() const override {
-    return "transform a fluid ProgramDesc to a data flow graph.";
-  }
-  AnalysisPass *CreateGraphvizDebugerPass() const override;
- private:
-  framework::proto::ProgramDesc const *desc_;
-};
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <string>
-#include <vector>
-#include "paddle/fluid/framework/ir/fuse_pass_base.h"
-#include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/inference/analysis/flags.h"
-#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-static const char kFluidToIrPassesAttr[] = "__fluid_to_ir_passes__";
-class FluidToIrPass final : public DataFlowGraphPass {
- public:
-  FluidToIrPass() = default;
-  bool Initialize(Argument *argument) override {
-    ANALYSIS_ARGUMENT_CHECK_FIELD(argument);
-    PADDLE_ENFORCE(argument->Has(kFluidToIrPassesAttr),
-                   "argument need the attr %s", kFluidToIrPassesAttr);
-    argument_ = argument;
-    if (argument->origin_program_desc) {
-      LOG(WARNING) << "argument's origin_program_desc is already set, might "
-                      "duplicate called";
-    }
-    // set fluid model program path
-    if (!argument->fluid_model_program_path) {
-      ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_dir);
-      argument->fluid_model_program_path.reset(
-          new std::string(*argument->fluid_model_dir + "/__model__"));
-    }
-    ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_program_path);
-    // Load program.
-    auto program = LoadProgramDesc(*argument->fluid_model_program_path);
-    argument->origin_program_desc.reset(
-        new framework::proto::ProgramDesc(program));
-    // Create main data flow graph.
-    if (!argument->main_dfg) {
-      argument->main_dfg.reset(new DataFlowGraph);
-    }
-    argument->Set("ir_program_desc", new ProgramDesc(program));
-    LOG(INFO) << "Loading parameters";
-    // Load parameters to argument if needed.
-    if (argument->fluid_model_dir || (argument->fluid_model_program_path &&
-                                      argument->fluid_model_param_path)) {
-#define SAFE_GET(ATTR) std::string ATTR = argument->ATTR ? *argument->ATTR : "";
-      SAFE_GET(fluid_model_dir);
-      SAFE_GET(fluid_model_program_path);
-      SAFE_GET(fluid_model_param_path);
-#undef SAFE_GET
-      EnableParamModify(fluid_model_dir, fluid_model_program_path,
-                        fluid_model_param_path);
-    }
-    return true;
-  }
-  bool Finalize() override { return true; }
-  void Run(DataFlowGraph *graph) override {
-    // Call all the IR Passes
-    IRPassManager ir_passes(argument_->Get<ProgramDesc>("ir_program_desc"),
-                            nullptr);
-    // Pass the scope from analysis to IR if needed.
-    if (argument_->Has(framework::ir::kParamScopeAttr)) {
-      // Here the address is passed, attention that IR doesn't own the scope, so
-      // the real scope in analysis should live during the IR phase.
-      ir_passes.graph().Set(
-          framework::ir::kParamScopeAttr,
-          new framework::Scope *(&argument_->Get<framework::Scope>(
-              framework::ir::kParamScopeAttr)));
-    }
-    if (FLAGS_IA_enable_ir) {
-      const auto &ir_passes_to_apply =
-          argument_->Get<std::vector<std::string>>(kFluidToIrPassesAttr);
-      ir_passes.Apply(ir_passes_to_apply);
-    }
-    PADDLE_ENFORCE(argument_->main_dfg.get());
-    argument_->main_dfg->Build(ir_passes.graph());
-    // inherit the arguments from ir.
-    if (ir_passes.graph().Has(framework::ir::kFuseStatisAttr)) {
-      argument_->Set(
-          framework::ir::kFuseStatisAttr,
-          new std::unordered_map<std::string, int>(
-              ir_passes.graph().Get<std::unordered_map<std::string, int>>(
-                  framework::ir::kFuseStatisAttr)));
-    }
-  }
-  void EnableParamModify(const std::string &model_dir,
-                         const std::string &prog_file,
-                         const std::string &param_file);
-  std::string repr() const override { return "fluid-to-ir-pass"; }
- private:
-  // Load parameters from a single file or from a directory.
-  bool LoadParams(framework::Scope *scope, const std::string &dir,
-                  const std::string &prog_file, const std::string &param_file);
- private:
-  Argument *argument_{nullptr};
-};
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/graph_traits.h
+++ b/paddle/fluid/inference/analysis/graph_traits.h
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-/*
- * This file defines the GraphTraits<X> template class that should be specified
- * by classes that want to be iteratable by generic graph iterators.
- *
- * This file also defines the marker class Inverse that is used to iterate over
- * graphs in a graph defined, inverse ordering...
- */
-#pragma once
-#include "paddle/fluid/inference/analysis/helper.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-/*
- * This class should be specialized by different graph types...
- * That's why the base class is empty.
- */
-template <typename GraphType>
-struct GraphTraits {
-  // using NodesBFSIterator = xxx
-  // NodesBFSIterator nodes_begin();
-  // NodesBFSIterator nodes_end();
-};
-/*
- * Inverse - This class is used as a marker class to tell the graph iterator to
- * iterate in a graph defined Inverse order.
- */
-template <typename GraphType>
-struct Inverse {
-  const GraphType &graph;
-  explicit Inverse(const GraphType &graph) : graph(graph) {}
-};
-/*
- * Provide a partial specialization of GraphTraits so that the inverse of an
- * inverse turns into the original graph.
- */
-template <typename GraphType>
-struct GraphTraits<Inverse<Inverse<GraphType>>> : GraphTraits<GraphType> {};
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/helper.h
+++ b/paddle/fluid/inference/analysis/helper.h
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/port.h"
 namespace paddle {
 namespace inference {
@@ -101,20 +102,20 @@ class OrderedRegistry {
 public:
  T *Register(const std::string &name, T *x) {
    PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name);
-    dic_[name] = data_.size();
+    dic_[name] = elements_.size();
-    data_.emplace_back(std::unique_ptr<T>(x));
+    elements_.emplace_back(std::unique_ptr<T>(x));
-    return data_.back().get();
+    return elements_.back().get();
  }
  T *Lookup(const std::string &name) {
    auto it = dic_.find(name);
    if (it == dic_.end()) return nullptr;
-    return data_[it->second].get();
+    return elements_[it->second].get();
  }
 protected:
  std::unordered_map<std::string, int> dic_;
-  std::vector<std::unique_ptr<T>> data_;
+  std::vector<std::unique_ptr<T>> elements_;
 };
 template <typename T>
@@ -124,20 +125,6 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) {
  return *var->GetMutable<T>();
 }
-static void ExecShellCommand(const std::string &cmd, std::string *message) {
-  char buffer[128];
-  std::shared_ptr<FILE> pipe(popen(cmd.c_str(), "r"), pclose);
-  if (!pipe) {
-    LOG(ERROR) << "error running command: " << cmd;
-    return;
-  }
-  while (!feof(pipe.get())) {
-    if (fgets(buffer, 128, pipe.get()) != nullptr) {
-      *message += buffer;
-    }
-  }
-}
 static framework::proto::ProgramDesc LoadProgramDesc(
    const std::string &model_path) {
  std::ifstream fin(model_path, std::ios::in | std::ios::binary);

--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -18,6 +18,8 @@
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/analysis/argument.h"
+#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
 #include "paddle/fluid/string/pretty_log.h"
 namespace paddle {
@@ -27,21 +29,33 @@ using string::PrettyLogEndl;
 using string::PrettyLog;
 using string::Style;
-IRPassManager::IRPassManager(const ProgramDesc &program,
+IRPassManager::IRPassManager(Argument *argument) {
-                             framework::Scope *scope)
+  ARGUMENT_CHECK_FIELD(argument, main_program);
-    : program_(program) {
+  graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
-  graph_.reset(new framework::ir::Graph(program));
+  if (argument->Has("scope")) {
-  if (scope)
+    graph_->Set(framework::ir::kParamScopeAttr,
-    graph_->Set(framework::ir::kParamScopeAttr, new framework::Scope *(scope));
+                new framework::Scope *(
+                    const_cast<framework::Scope *>(&argument->scope())));
+  }
+  ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes);
+  CreatePasses(argument, argument->ir_analysis_passes());
 }
-void IRPassManager::Apply(const std::vector<std::string> &passes) {
+void IRPassManager::CreatePasses(Argument *argument,
-  // Apply all the passes
+                                 const std::vector<std::string> &passes) {
  std::string pre_pass;
  int pass_num = 0;
  for (const std::string &pass_name : passes) {
-    PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass_name);
    auto pass = framework::ir::PassRegistry::Instance().Get(pass_name);
+    // Set some pass attributes.
+    if (pass_name == "ir_analysis_pass") {
+      pass->Set("tensorrt_node_teller",
+                new SubgraphDetector::NodeInsideSubgraphTeller(
+                    argument->tensorrt_node_teller()));
+    }
    if (pass_name == "graph_viz_pass") {
      std::string dot_file_path = std::to_string(pass_num) + "_ir_" +
                                  (pre_pass.empty() ? "origin" : pre_pass) +
@@ -49,11 +63,47 @@ void IRPassManager::Apply(const std::vector<std::string> &passes) {
      pass->Set("graph_viz_path", new std::string(std::move(dot_file_path)));
      pass_num++;
    }
-    graph_ = pass->Apply(std::move(graph_));
+    if (pass_name == "tensorrt_subgraph_pass") {
+      PADDLE_ENFORCE(argument->tensorrt_node_teller_valid());
+      pass->SetNotOwned("tensorrt_node_teller",
+                        argument->tensorrt_node_teller_ptr());
+      pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
+      pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
+    }
+    // graph_ = pass->Apply(std::move(graph_));
    pre_pass = pass_name;
+    passes_.emplace_back(std::move(pass));
  }
 }
+std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) {
+  if (passes_.empty()) {
+    return graph;
+  }
+  PADDLE_ENFORCE(graph.get());
+  // Apply all the passes
+  for (const auto &pass : passes_) {
+    PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type());
+    graph = pass->Apply(std::move(graph));
+  }
+  return std::move(graph);
+}
+framework::proto::ProgramDesc IRPassManager::AcquireProgram(
+    std::unique_ptr<Graph> *graph, const ProgramDesc &program) const {
+  auto pass =
+      framework::ir::PassRegistry::Instance().Get("graph_to_program_pass");
+  ProgramDesc desc(program);
+  pass->SetNotOwned("program", &desc);
+  auto *the_graph = graph->release();
+  *graph = pass->Apply(std::unique_ptr<Graph>(the_graph));
+  return *desc.Proto();
+}
 }  // namespace analysis
 }  // namespace inference
 }  // namespace paddle
--- a/paddle/fluid/inference/analysis/ir_pass_manager.h
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.h
@@ -20,27 +20,38 @@
 * for inference.
 */
+#pragma once
+#include <string>
+#include <vector>
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/pass.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/analysis/argument.h"
 namespace paddle {
 namespace inference {
 namespace analysis {
 using framework::ProgramDesc;
+using framework::ir::Graph;
 class IRPassManager final {
 public:
-  IRPassManager(const ProgramDesc &program, framework::Scope *scope);
+  explicit IRPassManager(Argument *argument);
+  std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph);
-  void Apply(const std::vector<std::string> &passes);
+  framework::proto::ProgramDesc AcquireProgram(
+      std::unique_ptr<Graph> *graph, const ProgramDesc &program) const;
  framework::ir::Graph &graph() const { return *graph_; }
 private:
-  std::unique_ptr<framework::ir::Graph> graph_;
+  void CreatePasses(Argument *argument, const std::vector<std::string> &passes);
-  ProgramDesc program_;
+  std::unique_ptr<Graph> graph_;
+  std::vector<std::unique_ptr<framework::ir::Pass>> passes_;
 };
 }  // namespace analysis

--- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt
+cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc)
+cc_library(tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc DEPS subgraph_detector)
+set(analysis_deps ${analysis_deps}
+        subgraph_detector tensorrt_subgraph_pass
+        CACHE INTERNAL "")
+set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
--- a/paddle/fluid/inference/analysis/subgraph_splitter.cc
+++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc
--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h
--- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
--- a/paddle/fluid/inference/analysis/model_store_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/model_store_pass_tester.cc
@@ -12,31 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/fluid/inference/analysis/model_store_pass.h"
+#pragma once
+#include <paddle/fluid/framework/ir/fuse_pass_base.h>
-#include <gflags/gflags.h>
+#include "paddle/fluid/framework/ir/pass.h"
-#include <gtest/gtest.h>
-#include "paddle/fluid/inference/analysis/analyzer.h"
 namespace paddle {
 namespace inference {
 namespace analysis {
-DEFINE_string(inference_model_dir, "", "Model path");
+class TensorRtSubgraphPass : public framework::ir::FusePassBase {
+ public:
-TEST(DFG_StorePass, test) {
+  std::unique_ptr<framework::ir::Graph> ApplyImpl(
-  Analyzer analyzer;
+      std::unique_ptr<framework::ir::Graph> graph) const override;
-  Argument argument(FLAGS_inference_model_dir);
-  argument.model_output_store_path.reset(
-      new std::string("./_dfg_store_pass_tmp"));
-  // disable storage in alalyzer
-  FLAGS_IA_output_storage_path = "";
-  analyzer.Run(&argument);
-  ModelStorePass pass;
+ private:
-  pass.Initialize(&argument);
+  void CreateTensorRTOp(framework::ir::Node *x,
-  pass.Run(argument.main_dfg.get());
+                        framework::ir::Graph *graph) const;
-}
+  void CleanIntermediateOutputs(framework::ir::Node *node);
+};
 }  // namespace analysis
 }  // namespace inference

--- a/paddle/fluid/inference/analysis/model_store_pass.cc
+++ b/paddle/fluid/inference/analysis/model_store_pass.cc
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include "paddle/fluid/inference/analysis/analyzer.h"
-#include "paddle/fluid/inference/analysis/argument.h"
-#include "paddle/fluid/inference/analysis/model_store_pass.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-void ModelStorePass::Run(DataFlowGraph *x) {
-  if (!argument_->fluid_model_param_path) {
-    PADDLE_ENFORCE_NOT_NULL(argument_->fluid_model_dir);
-    argument_->fluid_model_param_path.reset(
-        new std::string(*argument_->fluid_model_dir + "param"));
-  }
-  PADDLE_ENFORCE_NOT_NULL(argument_->model_output_store_path);
-  // Directly copy param file to destination.
-  std::stringstream ss;
-  // NOTE these commands only works on linux.
-  ss << "mkdir -p " << *argument_->model_output_store_path;
-  VLOG(30) << "run command: " << ss.str();
-  PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
-  ss.str("");
-  ss << "cp " << *argument_->fluid_model_dir << "/*"
-     << " " << *argument_->model_output_store_path;
-  VLOG(30) << "run command: " << ss.str();
-  PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
-  // Store program
-  PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc,
-                          "program desc is not transformed, should call "
-                          "DataFlowGraphToFluidPass first.");
-  VLOG(30) << "store analyzed program to "
-           << *argument_->model_output_store_path;
-  const std::string program_output_path =
-      *argument_->model_output_store_path + "/__model__";
-  std::ofstream file(program_output_path, std::ios::binary);
-  PADDLE_ENFORCE(file.is_open(), "failed to open %s to write.",
-                 program_output_path);
-  const std::string serialized_message =
-      argument_->transformed_program_desc->SerializeAsString();
-  file.write(serialized_message.c_str(), serialized_message.size());
-}
-bool ModelStorePass::Finalize() { return true; }
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/node.cc
+++ b/paddle/fluid/inference/analysis/node.cc
--- a/paddle/fluid/inference/analysis/node.h
+++ b/paddle/fluid/inference/analysis/node.h
--- a/paddle/fluid/inference/analysis/pass_manager.cc
+++ b/paddle/fluid/inference/analysis/pass_manager.cc
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/inference/analysis/pass_manager.h"
-#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
-#include "paddle/fluid/string/pretty_log.h"
-namespace paddle {
-namespace inference {
-namespace analysis {
-bool PassManager::Initialize(Argument* argument) {
-  argument_ = argument;
-  for (auto& pass : data_) {
-    VLOG(30) << "Initializing pass [" << pass->repr() << "]";
-    if (!pass->Initialize(argument)) {
-      LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
-      return false;
-    }
-  }
-  return true;
-}
-void DfgPassManager::RunAll() {
-  PADDLE_ENFORCE(argument_);
-  VLOG(30) << "Total " << data_.size() << " Analysys passes";
-  for (auto& pass : data_) {
-    string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]",
-                          pass->repr());
-    pass->Run(argument_->main_dfg.get());
-  }
-}
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
--- a/paddle/fluid/inference/analysis/pass_manager.h
+++ b/paddle/fluid/inference/analysis/pass_manager.h
--- a/paddle/fluid/inference/analysis/pass_manager_tester.cc
+++ b/paddle/fluid/inference/analysis/pass_manager_tester.cc
--- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt
+cc_library(ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager)
+cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager)
+cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass)
+set(analysis_deps ${analysis_deps}
+        ir_graph_build_pass
+        ir_analysis_pass
+        analysis_passes
+        CACHE INTERNAL "")
--- a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc
--- a/paddle/fluid/inference/analysis/model_store_pass.h
+++ b/paddle/fluid/inference/analysis/model_store_pass.h
--- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
--- a/paddle/fluid/inference/analysis/node_attr_flags.h
+++ b/paddle/fluid/inference/analysis/node_attr_flags.h
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc
--- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
+++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h
--- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc
--- a/paddle/fluid/inference/analysis/subgraph_splitter.h
+++ b/paddle/fluid/inference/analysis/subgraph_splitter.h
--- a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc
+++ b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc
--- a/paddle/fluid/inference/analysis/ut_helper.h
+++ b/paddle/fluid/inference/analysis/ut_helper.h
--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
--- a/paddle/fluid/inference/api/README.md
+++ b/paddle/fluid/inference/api/README.md
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
--- a/paddle/fluid/inference/api/analysis_predictor_tester.cc
+++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc
--- a/paddle/fluid/inference/api/api.cc
+++ b/paddle/fluid/inference/api/api.cc
--- a/paddle/fluid/inference/api/api_anakin_engine.h
+++ b/paddle/fluid/inference/api/api_anakin_engine.h
--- a/paddle/fluid/inference/api/api_impl_tester.cc
+++ b/paddle/fluid/inference/api/api_impl_tester.cc
--- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
--- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
+++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
--- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
--- a/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc
--- a/paddle/fluid/inference/api/helper.h
+++ b/paddle/fluid/inference/api/helper.h
--- a/paddle/fluid/inference/analysis/analyzer_main.cc
+++ b/paddle/fluid/inference/analysis/analyzer_main.cc
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h
--- a/paddle/fluid/inference/api/paddle_pass_builder.h
+++ b/paddle/fluid/inference/api/paddle_pass_builder.h
--- a/paddle/fluid/inference/tensorrt/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
--- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/mul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/mul_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/pad_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pad_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc
--- a/paddle/fluid/inference/tensorrt/convert/split_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/split_op.cc
--- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
+++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
--- a/paddle/fluid/inference/tensorrt/helper.h
+++ b/paddle/fluid/inference/tensorrt/helper.h
--- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
--- a/paddle/fluid/inference/tensorrt/plugin/serialize.h
+++ b/paddle/fluid/inference/tensorrt/plugin/serialize.h
--- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
--- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h
--- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
+++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc
--- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
--- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
--- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
--- a/paddle/fluid/inference/tests/api/config_printer.h
+++ b/paddle/fluid/inference/tests/api/config_printer.h
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
--- a/paddle/fluid/inference/tests/api/trt_models_tester.cc
+++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc
--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
--- a/paddle/fluid/operators/auc_op.cc
+++ b/paddle/fluid/operators/auc_op.cc
--- a/paddle/fluid/operators/conv_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
--- a/paddle/fluid/operators/detection/density_prior_box_op.cc
+++ b/paddle/fluid/operators/detection/density_prior_box_op.cc
--- a/paddle/fluid/operators/detection/density_prior_box_op.h
+++ b/paddle/fluid/operators/detection/density_prior_box_op.h
--- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu
+++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu
--- a/paddle/fluid/operators/elementwise_op_function.h
+++ b/paddle/fluid/operators/elementwise_op_function.h
--- a/paddle/fluid/operators/fc_op.cc
+++ b/paddle/fluid/operators/fc_op.cc
--- a/paddle/fluid/operators/gather.cu.h
+++ b/paddle/fluid/operators/gather.cu.h
--- a/paddle/fluid/operators/gather.h
+++ b/paddle/fluid/operators/gather.h
--- a/paddle/fluid/operators/gather_op.cc
+++ b/paddle/fluid/operators/gather_op.cc
--- a/paddle/fluid/operators/grid_sampler_op.h
+++ b/paddle/fluid/operators/grid_sampler_op.h
--- a/paddle/fluid/operators/hash_op.cc
+++ b/paddle/fluid/operators/hash_op.cc
--- a/paddle/fluid/operators/load_op.cc
+++ b/paddle/fluid/operators/load_op.cc
--- a/paddle/fluid/operators/lookup_sparse_table_op.cc
+++ b/paddle/fluid/operators/lookup_sparse_table_op.cc
--- a/paddle/fluid/operators/lrn_op.cc
+++ b/paddle/fluid/operators/lrn_op.cc
--- a/paddle/fluid/operators/lrn_op.h
+++ b/paddle/fluid/operators/lrn_op.h
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
--- a/paddle/fluid/operators/math/blas.h
+++ b/paddle/fluid/operators/math/blas.h
--- a/paddle/fluid/operators/math/blas_impl.h
+++ b/paddle/fluid/operators/math/blas_impl.h
--- a/paddle/fluid/operators/math/jit_code.cc
+++ b/paddle/fluid/operators/math/jit_code.cc
--- a/paddle/fluid/operators/math/jit_code.h
+++ b/paddle/fluid/operators/math/jit_code.h
--- a/paddle/fluid/operators/math/jit_kernel.h
+++ b/paddle/fluid/operators/math/jit_kernel.h
--- a/paddle/fluid/operators/math/jit_kernel_blas.cc
+++ b/paddle/fluid/operators/math/jit_kernel_blas.cc
--- a/paddle/fluid/operators/math/jit_kernel_exp.cc
+++ b/paddle/fluid/operators/math/jit_kernel_exp.cc
--- a/paddle/fluid/operators/math/jit_kernel_rnn.cc
+++ b/paddle/fluid/operators/math/jit_kernel_rnn.cc
--- a/paddle/fluid/operators/math/jit_kernel_test.cc
+++ b/paddle/fluid/operators/math/jit_kernel_test.cc
--- a/paddle/fluid/operators/math/selected_rows_functor.cc
+++ b/paddle/fluid/operators/math/selected_rows_functor.cc
--- a/paddle/fluid/operators/math/sequence_pooling_test.cc
+++ b/paddle/fluid/operators/math/sequence_pooling_test.cc
--- a/paddle/fluid/operators/math/softmax.cc
+++ b/paddle/fluid/operators/math/softmax.cc
--- a/paddle/fluid/operators/math/softmax.cu
+++ b/paddle/fluid/operators/math/softmax.cu
--- a/paddle/fluid/operators/math/softmax.h
+++ b/paddle/fluid/operators/math/softmax.h
--- a/paddle/fluid/operators/math/softmax_impl.h
+++ b/paddle/fluid/operators/math/softmax_impl.h
--- a/paddle/fluid/operators/merge_ids_op.h
+++ b/paddle/fluid/operators/merge_ids_op.h
--- a/paddle/fluid/operators/mul_op.cc
+++ b/paddle/fluid/operators/mul_op.cc
--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
--- a/paddle/fluid/operators/pad_constant_like_op.cc
+++ b/paddle/fluid/operators/pad_constant_like_op.cc
--- a/paddle/fluid/operators/reduce_max_op.cu
+++ b/paddle/fluid/operators/reduce_max_op.cu
--- a/paddle/fluid/operators/reduce_max_op.part.cu
+++ b/paddle/fluid/operators/reduce_max_op.part.cu
--- a/paddle/fluid/operators/reduce_mean_op.cu
+++ b/paddle/fluid/operators/reduce_mean_op.cu
--- a/paddle/fluid/operators/reduce_mean_op.part.cu
+++ b/paddle/fluid/operators/reduce_mean_op.part.cu
--- a/paddle/fluid/operators/reduce_min_op.cu
+++ b/paddle/fluid/operators/reduce_min_op.cu
--- a/paddle/fluid/operators/reduce_min_op.part.cu
+++ b/paddle/fluid/operators/reduce_min_op.part.cu
--- a/paddle/fluid/operators/reduce_prod_op.cu
+++ b/paddle/fluid/operators/reduce_prod_op.cu
--- a/paddle/fluid/operators/reduce_prod_op.part.cu
+++ b/paddle/fluid/operators/reduce_prod_op.part.cu
--- a/paddle/fluid/operators/reduce_sum_op.cu
+++ b/paddle/fluid/operators/reduce_sum_op.cu
--- a/paddle/fluid/operators/reduce_sum_op.part.cu
+++ b/paddle/fluid/operators/reduce_sum_op.part.cu
--- a/paddle/fluid/operators/ref_by_trainer_id_op.h
+++ b/paddle/fluid/operators/ref_by_trainer_id_op.h
--- a/paddle/fluid/operators/roi_pool_op.cc
+++ b/paddle/fluid/operators/roi_pool_op.cc
--- a/paddle/fluid/operators/scatter.cu.h
+++ b/paddle/fluid/operators/scatter.cu.h
--- a/paddle/fluid/operators/scatter.h
+++ b/paddle/fluid/operators/scatter.h
--- a/paddle/fluid/operators/sgd_op.h
+++ b/paddle/fluid/operators/sgd_op.h
--- a/paddle/fluid/operators/softmax_op.h
+++ b/paddle/fluid/operators/softmax_op.h
--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h
--- a/paddle/fluid/operators/split_ids_op.h
+++ b/paddle/fluid/operators/split_ids_op.h
--- a/paddle/fluid/operators/stack_op.cc
+++ b/paddle/fluid/operators/stack_op.cc
--- a/paddle/fluid/operators/stack_op.cu
+++ b/paddle/fluid/operators/stack_op.cu
--- a/paddle/fluid/operators/unpool_op.cc
+++ b/paddle/fluid/operators/unpool_op.cc
--- a/paddle/fluid/platform/dynload/mklml.h
+++ b/paddle/fluid/platform/dynload/mklml.h
--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
--- a/paddle/fluid/platform/nccl_helper.h
+++ b/paddle/fluid/platform/nccl_helper.h
--- a/paddle/fluid/platform/port.h
+++ b/paddle/fluid/platform/port.h
--- a/paddle/fluid/platform/variant.h
+++ b/paddle/fluid/platform/variant.h
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
--- a/paddle/fluid/string/printf.h
+++ b/paddle/fluid/string/printf.h
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
--- a/python/paddle/fluid/contrib/inferencer.py
+++ b/python/paddle/fluid/contrib/inferencer.py
--- a/python/paddle/fluid/contrib/trainer.py
+++ b/python/paddle/fluid/contrib/trainer.py
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
--- a/python/paddle/fluid/tests/unittests/dist_save_load.py
+++ b/python/paddle/fluid/tests/unittests/dist_save_load.py
--- a/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py
+++ b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py
--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
--- a/python/paddle/fluid/tests/unittests/test_dist_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_save_load.py
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
--- a/python/paddle/fluid/tests/unittests/test_lookup_sparse_table_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lookup_sparse_table_op.py
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
--- a/python/paddle/fluid/tests/unittests/test_pass_builder.py
+++ b/python/paddle/fluid/tests/unittests/test_pass_builder.py
--- a/python/setup.py.in
+++ b/python/setup.py.in