diff --git a/CMakeLists.txt b/CMakeLists.txt
index df00e977ebb547980e69ee421779c57717d771a9..6aa2e1715b92d73aa4e5e97d5e52ffac51451d80 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,6 +127,9 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
 set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
   "A path setting fluid shared and static libraries")
 
+set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING
+  "A path setting fluid inference shared and static libraries")
+
 if (WITH_C_API AND WITH_PYTHON)
   message(WARNING "It is suggest not embedded a python interpreter in Paddle "
     "when using C-API. It will give an unpredictable behavior when using a "
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index a3e682e54ac496e37ed4a33a7b30d9fdca381d9d..67cca09b64c1ed7a503a886e78347d786eae0de7 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -150,16 +150,16 @@ if (WITH_ANAKIN AND WITH_MKL)
         SRCS
         ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/libinference_anakin_api* # compiled anakin api
         ${ANAKIN_INSTALL_DIR} # anakin release
-        DSTS ${dst_dir}/inference/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
+        DSTS ${FLUID_INSTALL_DIR}/third_party/install/anakin ${FLUID_INSTALL_DIR}/third_party/install/anakin)
      list(APPEND inference_deps anakin_inference_lib)
 endif()
 
 set(module "inference")
 copy(inference_lib DEPS ${inference_deps}
   SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
-       ${src_dir}/${module}/api/paddle_inference_api.h ${src_dir}/${module}/api/demo_ci
+       ${src_dir}/${module}/api/paddle_inference_api.h
        ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
-  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
 )
 
 set(module "platform")
@@ -188,18 +188,38 @@ copy(cmake_cache
 # This command generates a complete fluid library for both train and inference
 add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep}) 
 
+# Following commands generate a inference-only fluid library
+# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
+copy(third_party DEPS fluid_lib_dist
+  SRCS ${FLUID_INSTALL_DIR}/third_party ${FLUID_INSTALL_DIR}/CMakeCache.txt
+  DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
+)
+
+# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
+copy(inference_api_lib DEPS fluid_lib_dist
+  SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
+       ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
+  DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
+)
+
+add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)
+
 # paddle fluid version
-execute_process(
-  COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
-  WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
-  OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
-set(version_file ${FLUID_INSTALL_DIR}/version.txt)
-file(WRITE ${version_file}
-  "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
-  "WITH_MKL: ${WITH_MKL}\n"
-  "WITH_GPU: ${WITH_GPU}\n")
-if(WITH_GPU)
-  file(APPEND ${version_file}
-    "CUDA version: ${CUDA_VERSION}\n"
-    "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
-endif()
+function(version version_file)
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
+    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
+    OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
+  file(WRITE ${version_file}
+    "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
+    "WITH_MKL: ${WITH_MKL}\n"
+    "WITH_MKLDNN: ${WITH_MKLDNN}\n"
+    "WITH_GPU: ${WITH_GPU}\n")
+  if(WITH_GPU)
+    file(APPEND ${version_file}
+      "CUDA version: ${CUDA_VERSION}\n"
+      "CUDNN version: v${CUDNN_MAJOR_VERSION}\n")
+  endif()
+endfunction()
+version(${FLUID_INSTALL_DIR}/version.txt)
+version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt)
diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
index ec8471ef960a2fc44af23c52be09cd678fab3f70..03f0f726eb61c2619c7719a865383090f86b5b7f 100644
--- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt
@@ -77,7 +77,7 @@ endif(NOT WIN32)
 link_directories("${PADDLE_LIB}/third_party/install/protobuf/lib")
 link_directories("${PADDLE_LIB}/third_party/install/glog/lib")
 link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
-link_directories("${PADDLE_LIB}/paddle/fluid/inference")
+link_directories("${PADDLE_LIB}/paddle/lib")
 
 add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
 
@@ -97,10 +97,10 @@ endif()
 # Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
 if(WITH_STATIC_LIB)
   set(DEPS
-      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
+      ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
 else()
   set(DEPS
-      ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
+      ${PADDLE_LIB}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
 endif()
 
 if (NOT WIN32)
diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh
index 65c95f0834a9356fc14faed8342f5d1e474edf8f..67994aad70a40c0e0c8a311914d4ea40b96eaf1e 100755
--- a/paddle/fluid/inference/api/demo_ci/run.sh
+++ b/paddle/fluid/inference/api/demo_ci/run.sh
@@ -5,12 +5,13 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
 DATA_DIR=$4 # dataset
 TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, defalut to /usr/local/TensorRT/include
 TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib
+inference_install_dir=${PADDLE_ROOT}/build/fluid_inference_install_dir
 
 cd `dirname $0`
 current_dir=`pwd`
 if [ $2 == ON ]; then
   # You can export yourself if move the install path
-  MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib
+  MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB}
 fi
 if [ $3 == ON ]; then
@@ -55,7 +56,7 @@ cd build
 for WITH_STATIC_LIB in ON OFF; do
   # -----simple_on_word2vec-----
   rm -rf *
-  cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+  cmake .. -DPADDLE_LIB=${inference_install_dir} \
     -DWITH_MKL=$TURN_ON_MKL \
     -DDEMO_NAME=simple_on_word2vec \
     -DWITH_GPU=$TEST_GPU_CPU \
@@ -75,7 +76,7 @@ for WITH_STATIC_LIB in ON OFF; do
   fi
   # ---------vis_demo---------
   rm -rf *
-  cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+  cmake .. -DPADDLE_LIB=${inference_install_dir} \
     -DWITH_MKL=$TURN_ON_MKL \
     -DDEMO_NAME=vis_demo \
     -DWITH_GPU=$TEST_GPU_CPU \
@@ -98,7 +99,7 @@ for WITH_STATIC_LIB in ON OFF; do
   # --------tensorrt mobilenet------
   if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then
     rm -rf *
-    cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
+    cmake .. -DPADDLE_LIB=${inference_install_dir} \
       -DWITH_MKL=$TURN_ON_MKL \
       -DDEMO_NAME=trt_mobilenet_demo \
       -DWITH_GPU=$TEST_GPU_CPU \
diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
index 8058d7e881025b1d806efe187d4523adadff367d..5446fd4d4256c10442a53ea09a447cf308cbd681 100644
--- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
+++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc
@@ -23,7 +23,7 @@ limitations under the License. */
 #include <memory>
 #include <thread>  //NOLINT
 
-#include "paddle/fluid/inference/paddle_inference_api.h"
+#include "paddle/include/paddle_inference_api.h"
 
 DEFINE_string(dirname, "", "Directory of the inference model.");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");
@@ -42,8 +42,7 @@ void Main(bool use_gpu) {
   config.use_gpu = use_gpu;
   config.fraction_of_gpu_memory = 0.15;
   config.device = 0;
-  auto predictor =
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+  auto predictor = CreatePaddlePredictor<NativeConfig>(config);
 
   for (int batch_id = 0; batch_id < 3; batch_id++) {
     //# 2. Prepare input.
@@ -85,8 +84,7 @@ void MainThreads(int num_threads, bool use_gpu) {
   config.use_gpu = use_gpu;
   config.fraction_of_gpu_memory = 0.15;
   config.device = 0;
-  auto main_predictor =
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+  auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
 
   std::vector<std::thread> threads;
   for (int tid = 0; tid < num_threads; ++tid) {
diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
index ffb12b5871f088f15e43a1b0ff7e2a8b2f5fd079..4a8404f21c6ec6a1647e964ac3538b4b49151009 100644
--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>  // use glog instead of CHECK to avoid importing other paddle header files.
-#include "paddle/fluid/inference/demo_ci/utils.h"
+#include "utils.h"  // NOLINT
 
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DEFINE_string(modeldir, "", "Directory of the inference model.");
diff --git a/paddle/fluid/inference/api/demo_ci/utils.h b/paddle/fluid/inference/api/demo_ci/utils.h
index 4792c97fe7d0a3f9c904774ad4a8e580cefcf237..d70c6aea791219a40c3164b51499f9d5e562be71 100644
--- a/paddle/fluid/inference/api/demo_ci/utils.h
+++ b/paddle/fluid/inference/api/demo_ci/utils.h
@@ -18,7 +18,7 @@
 #include <iostream>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/paddle_inference_api.h"
+#include "paddle/include/paddle_inference_api.h"
 
 namespace paddle {
 namespace demo {
diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
index db61786e2fefda29256d84b5357028ec9c39b014..8d546e3e9c740c10bcf2984e073c956e3612625c 100644
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 
 #include <gflags/gflags.h>
 #include <glog/logging.h>  // use glog instead of CHECK to avoid importing other paddle header files.
-#include "paddle/fluid/inference/demo_ci/utils.h"
+#include "utils.h"  // NOLINT
 
 #ifdef PADDLE_WITH_CUDA
 DECLARE_double(fraction_of_gpu_memory_to_use);
@@ -34,12 +34,13 @@ DEFINE_bool(use_gpu, false, "Whether use gpu.");
 namespace paddle {
 namespace demo {
 
+using contrib::AnalysisConfig;
 /*
- * Use the native fluid engine to inference the demo.
+ * Use the native and analysis fluid engine to inference the demo.
  */
 void Main(bool use_gpu) {
-  std::unique_ptr<PaddlePredictor> predictor;
-  NativeConfig config;
+  std::unique_ptr<PaddlePredictor> predictor, analysis_predictor;
+  AnalysisConfig config;
   config.param_file = FLAGS_modeldir + "/__params__";
   config.prog_file = FLAGS_modeldir + "/__model__";
   config.use_gpu = use_gpu;
@@ -49,8 +50,8 @@ void Main(bool use_gpu) {
   }
 
   VLOG(3) << "init predictor";
-  predictor =
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+  predictor = CreatePaddlePredictor<NativeConfig>(config);
+  analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
 
   VLOG(3) << "begin to process data";
   // Just a single batch of data.
@@ -68,7 +69,7 @@ void Main(bool use_gpu) {
   input.dtype = PaddleDType::FLOAT32;
 
   VLOG(3) << "run executor";
-  std::vector<PaddleTensor> output;
+  std::vector<PaddleTensor> output, analysis_output;
   predictor->Run({input}, &output, 1);
 
   VLOG(3) << "output.size " << output.size();
@@ -77,6 +78,10 @@ void Main(bool use_gpu) {
 
   // compare with reference result
   CheckOutput(FLAGS_refer, tensor);
+
+  // the analysis_output has some diff with native_output,
+  // TODO(luotao): add CheckOutput for analysis_output later.
+  analysis_predictor->Run({input}, &analysis_output, 1);
 }
 
 }  // namespace demo
diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
index 5b6c922f95cf6d2d0683a1e9328463fe21f6bc38..6399476680c0af83a6d26aea952c58543bdce9ae 100644
--- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
@@ -311,8 +311,7 @@ TEST(Analyzer_rnn1, ZeroCopy) {
   auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
 
   config.use_feed_fetch_ops = true;
-  auto native_predictor =
-      CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+  auto native_predictor = CreatePaddlePredictor<NativeConfig>(config);
 
   config.use_feed_fetch_ops = true;  // the analysis predictor needs feed/fetch.
   auto analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h
index 04e338653d6946041d83891676fe844e1de3ab10..62c2dac02bf6f83b7bf3c1ba307229b26df96a24 100644
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -79,8 +79,7 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
   if (use_analysis) {
     return CreatePaddlePredictor<contrib::AnalysisConfig>(config);
   } else {
-    return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
-        config);
+    return CreatePaddlePredictor<NativeConfig>(config);
   }
 }
 
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index 87b9e7d5a21d18d63f1300c5edf272eed5b174c6..85493c10549c290330ed09b9f28accb7a980de6a 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -661,6 +661,7 @@ function gen_fluid_lib() {
 EOF
         cmake .. -DWITH_DISTRIBUTE=OFF
         make -j `nproc` fluid_lib_dist
+        make -j `nproc` inference_lib_dist
       fi
 }
 
@@ -674,6 +675,8 @@ EOF
         cd ${PADDLE_ROOT}/build
         cp -r fluid_install_dir fluid
         tar -czf fluid.tgz fluid
+        cp -r fluid_inference_install_dir fluid_inference
+        tar -czf fluid_inference.tgz fluid_inference
       fi
 }
 
@@ -685,7 +688,9 @@ function test_fluid_lib() {
     ========================================
 EOF
         cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci
-        ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} ${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} ${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib}
+        ./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} \
+                 ${TENSORRT_INCLUDE_DIR:-/usr/local/TensorRT/include} \
+                 ${TENSORRT_LIB_DIR:-/usr/local/TensorRT/lib}
         ./clean.sh
       fi
 }