diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4793fa924042b3e457d71d209800ed8e71e3dd2d..199b3bda17f4ac22c1d657b6794446832d448440 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,7 @@ lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
 lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
 # publish options
 lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
+lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
 
 # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
 if(ANDROID OR IOS OR ARMLINUX)
diff --git a/cmake/lite.cmake b/cmake/lite.cmake
index 0061ddea32bf4b15a53b1e2742bae70f38c1e040..7d8641d96da86cf9a2be442b797507ac79058efa 100644
--- a/cmake/lite.cmake
+++ b/cmake/lite.cmake
@@ -241,6 +241,10 @@ set(host_kernels CACHE INTERNAL "host kernels")
 
 set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
 file(WRITE ${kernels_src_list} "") # clean
+if(LITE_BUILD_TAILOR)
+  set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
+  file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
+endif()
 # add a kernel for some specific device
 # device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
 # level: one of (basic, extra)
@@ -252,6 +256,15 @@ function(add_kernel TARGET device level)
         ARGS)
     cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
+    if(LITE_BUILD_TAILOR)
+      foreach(src ${args_SRCS})
+        list (FIND tailored_kernels_list ${src} _index)
+        if (${_index} EQUAL -1)
+          return()
+        endif()
+      endforeach()
+    endif()
+
     if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
         return()
     endif()
@@ -338,6 +351,10 @@ endfunction()
 set(ops CACHE INTERNAL "ops")
 set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt")
 file(WRITE ${ops_src_list} "") # clean
+if(LITE_BUILD_TAILOR)
+  set(tailored_ops_list_path "${LITE_OPTMODEL_DIR}/.tailored_ops_source_list")
+  file(STRINGS ${tailored_ops_list_path} tailored_ops_list)
+endif()
 # add an operator
 # level: one of (basic, extra)
 function(add_operator TARGET level)
@@ -348,16 +365,24 @@ function(add_operator TARGET level)
         ARGS)
     cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
+
     if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
         return()
     endif()
 
-    set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
 
     foreach(src ${args_SRCS})
+      if(LITE_BUILD_TAILOR)
+        list(FIND tailored_ops_list ${src} _index)
+        if (${_index} EQUAL -1)
+          return()
+        endif()
+      endif()
       file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
     endforeach()
 
+    set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
+
     lite_cc_library(${TARGET} SRCS ${args_SRCS}
               DEPS ${args_DEPS}
               X86_DEPS ${args_X86_DEPS}
diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc
index 5e2c3e59a311b9a9a678e4fe686a419e8a045350..a2b538aa77e0603f439b6b23aab875103fdbbff0 100644
--- a/lite/api/cxx_api.cc
+++ b/lite/api/cxx_api.cc
@@ -15,6 +15,7 @@
 #include "lite/api/cxx_api.h"
 #include <algorithm>
 #include <memory>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
@@ -23,8 +24,16 @@
 namespace paddle {
 namespace lite {
 
+static const char TAILORD_OPS_SOURCE_LIST_FILENAME[] =
+    ".tailored_ops_source_list";
+static const char TAILORD_OPS_LIST_NAME[] = ".tailored_ops_list";
+static const char TAILORD_KERNELS_SOURCE_LIST_FILENAME[] =
+    ".tailored_kernels_source_list";
+static const char TAILORD_KERNELS_LIST_NAME[] = ".tailored_kernels_list";
+
 void Predictor::SaveModel(const std::string &dir,
-                          lite_api::LiteModelType model_type) {
+                          lite_api::LiteModelType model_type,
+                          bool record_info) {
   if (!program_) {
     GenRuntimeProgram();
   }
@@ -40,6 +49,83 @@ void Predictor::SaveModel(const std::string &dir,
     default:
       LOG(FATAL) << "Unknown model type";
   }
+  if (record_info) {
+    SaveOpKernelInfo(dir);
+  }
+}
+
+void Predictor::SaveOpKernelInfo(const std::string &model_dir) {
+  std::set<std::string> ops_info;
+  std::set<std::string> kernels_info;
+  const auto &instructions_ = program_->instructions();
+  for (auto &node : instructions_) {
+    // parse op type infomation
+    auto op = node.op()->op_info();
+    ops_info.insert(op->Type());
+    // parse kernel type information
+    std::string kernel_type_str =
+        node.kernel()->op_type() + "," + TargetRepr(node.kernel()->target()) +
+        "," + PrecisionRepr(node.kernel()->precision()) + "," +
+        DataLayoutRepr(node.kernel()->layout()) + "," + node.kernel()->alias();
+    kernels_info.insert(kernel_type_str);
+  }
+
+  // get souce_file name from op type and kernel type
+  auto op2pathmap = OpKernelInfoCollector::Global().GetOp2PathDict();
+  auto kernel2pathmap = OpKernelInfoCollector::Global().GetKernel2PathDict();
+
+  // write used op and kernel info into files
+  std::string opf_path = model_dir + "/" + TAILORD_OPS_LIST_NAME;
+  std::string opf_source_path =
+      model_dir + "/" + TAILORD_OPS_SOURCE_LIST_FILENAME;
+  std::string kpf_path = model_dir + "/" + TAILORD_KERNELS_LIST_NAME;
+  std::string kpf_source_path =
+      model_dir + "/" + TAILORD_KERNELS_SOURCE_LIST_FILENAME;
+  std::map<std::string, std::string> op2path;
+
+  std::FILE *opf = std::fopen(opf_path.c_str(), "w");
+  std::FILE *opf_source = std::fopen(opf_source_path.c_str(), "w");
+  std::FILE *kpf = std::fopen(kpf_path.c_str(), "w");
+  std::FILE *kpf_source = std::fopen(kpf_source_path.c_str(), "w");
+  std::vector<std::string> opcompile;
+  std::vector<std::string> kernelcompile;
+
+  if (nullptr == opf || nullptr == opf_source || nullptr == opf ||
+      nullptr == kpf_source) {
+    LOG(FATAL) << "failed to create info file into: " << model_dir;
+  }
+  for (auto op_info = ops_info.begin(); op_info != ops_info.end(); op_info++) {
+    fputs(op_info->c_str(), opf);
+    fputc('\n', opf);
+    std::string op_path = op2pathmap[*op_info];
+    fputs(op_path.c_str(), opf_source);
+    fputc('\n', opf_source);
+  }
+  std::fclose(opf_source);
+  std::fclose(opf);
+  LOG(INFO) << "operators information of tailored model is stored into: "
+            << opf_path;
+
+  // write Kernel_type and Kernel_path into file
+  for (auto kernel_info = kernels_info.begin();
+       kernel_info != kernels_info.end();
+       kernel_info++) {
+    fputs(kernel_info->c_str(), kpf);
+    fputc('\n', kpf);
+    std::string kernel_path = kernel2pathmap[*kernel_info];
+    fputs(kernel_path.c_str(), kpf_source);
+    fputc('\n', kpf_source);
+    if (kernel_path == "conv_compute.cc") {
+      fputs(
+          "conv_depthwise.cc\nconv_direct.cc\nconv_gemmlike.cc\nconv_"
+          "winograd.cc\n",
+          kpf_source);
+    }
+  }
+  std::fclose(kpf_source);
+  std::fclose(kpf);
+  LOG(INFO) << "kernels information of tailored model is stored into: "
+            << kpf_path;
 }
 
 lite::Tensor *Predictor::GetInput(size_t offset) {
@@ -61,7 +147,7 @@ void Predictor::PrepareFeedFetch() {
   auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
   std::vector<cpp::OpDesc *> feeds;
   std::vector<cpp::OpDesc *> fetchs;
-  for (int i = 0; i < current_block->OpsSize(); i++) {
+  for (size_t i = 0; i < current_block->OpsSize(); i++) {
     auto op = current_block->GetOp<cpp::OpDesc>(i);
     if (op->Type() == "feed") {
       feeds.push_back(op);
@@ -71,11 +157,11 @@ void Predictor::PrepareFeedFetch() {
   }
   input_names_.resize(feeds.size());
   output_names_.resize(fetchs.size());
-  for (int i = 0; i < feeds.size(); i++) {
+  for (size_t i = 0; i < feeds.size(); i++) {
     input_names_[feeds[i]->GetAttr<int>("col")] =
         feeds[i]->Output("Out").front();
   }
-  for (int i = 0; i < fetchs.size(); i++) {
+  for (size_t i = 0; i < fetchs.size(); i++) {
     output_names_[fetchs[i]->GetAttr<int>("col")] =
         fetchs[i]->Input("X").front();
   }
@@ -191,7 +277,7 @@ lite::Tensor *Predictor::GetInputByName(const std::string &name) {
   if (element == input_names_.end()) {
     LOG(ERROR) << "Model do not have input named with: [" << name
                << "], model's inputs include:";
-    for (int i = 0; i < input_names_.size(); i++) {
+    for (size_t i = 0; i < input_names_.size(); i++) {
       LOG(ERROR) << "[" << input_names_[i] << "]";
     }
     return nullptr;
diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h
index 5a4a6919d7d2386819b05724e0f275e90a0fa119..e1d34172ba578824228e6369a8e37d60972336e9 100644
--- a/lite/api/cxx_api.h
+++ b/lite/api/cxx_api.h
@@ -89,7 +89,9 @@ class LITE_API Predictor {
   // This method is disabled in mobile, for unnecessary dependencies required.
   void SaveModel(
       const std::string& dir,
-      lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf);
+      lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
+      bool record_info = false);
+  void SaveOpKernelInfo(const std::string& model_dir);
 
 #ifdef LITE_WITH_TRAIN
   void Run(const std::vector<framework::Tensor>& tensors) {
@@ -137,9 +139,10 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
   std::unique_ptr<lite_api::Tensor> GetInputByName(
       const std::string& name) override;
 
-  void SaveOptimizedModel(const std::string& model_dir,
-                          lite_api::LiteModelType model_type =
-                              lite_api::LiteModelType::kProtobuf) override;
+  void SaveOptimizedModel(
+      const std::string& model_dir,
+      lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
+      bool record_info = false) override;
 
  private:
   Predictor raw_predictor_;
diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc
index 3e216a40b50b8bcb3bdfdc2c0fd9aefc415764c0..db225fb78497d5c8f31f90e59c755232adc53222 100644
--- a/lite/api/cxx_api_impl.cc
+++ b/lite/api/cxx_api_impl.cc
@@ -65,8 +65,9 @@ std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInputByName(
 }
 
 void CxxPaddleApiImpl::SaveOptimizedModel(const std::string &model_dir,
-                                          lite_api::LiteModelType model_type) {
-  raw_predictor_.SaveModel(model_dir, model_type);
+                                          lite_api::LiteModelType model_type,
+                                          bool record_info) {
+  raw_predictor_.SaveModel(model_dir, model_type, record_info);
 }
 
 }  // namespace lite
diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc
index 7dd2077008bc81d328ef206a41384c45159af3b3..1aef522b2a6bb95f895449469f3c13e4a713179a 100644
--- a/lite/api/model_optimize_tool.cc
+++ b/lite/api/model_optimize_tool.cc
@@ -16,7 +16,10 @@
 #ifdef PADDLE_WITH_TESTING
 #include <gtest/gtest.h>
 #endif
+// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during
+// model_optimize_tool's compiling period
 #include "all_kernel_faked.cc"  // NOLINT
+#include "kernel_src_map.h"     // NOLINT
 #include "lite/api/paddle_api.h"
 #include "lite/api/paddle_use_ops.h"
 #include "lite/api/paddle_use_passes.h"
@@ -35,6 +38,11 @@ DEFINE_string(
     "protobuf",
     "store type of the output optimized model. protobuf/naive_buffer");
 DEFINE_bool(display_kernels, false, "Display kernel information");
+DEFINE_bool(record_tailoring_info,
+            false,
+            "Record kernels and operators information of the optimized model "
+            "for tailoring compiling, information are stored into optimized "
+            "model path as hidden files");
 DEFINE_string(optimize_out, "", "path of the output optimized model");
 DEFINE_string(valid_targets,
               "arm",
@@ -104,8 +112,14 @@ void Main() {
   } else {
     LOG(FATAL) << "Unsupported Model type :" << FLAGS_optimize_out_type;
   }
+  OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
 
-  predictor->SaveOptimizedModel(FLAGS_optimize_out, model_type);
+  predictor->SaveOptimizedModel(
+      FLAGS_optimize_out, model_type, FLAGS_record_tailoring_info);
+  if (FLAGS_record_tailoring_info) {
+    LOG(INFO) << "Record the information of tailored model into :"
+              << FLAGS_optimize_out;
+  }
 }
 
 }  // namespace lite_api
diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc
index e87885c369999470bd0d1d8875cade797630388d..db545de958743b89f99efcde131e1371fdb15409 100644
--- a/lite/api/paddle_api.cc
+++ b/lite/api/paddle_api.cc
@@ -145,7 +145,8 @@ lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); }
 void Tensor::SetLoD(const lod_t &lod) { tensor(raw_tensor_)->set_lod(lod); }
 
 void PaddlePredictor::SaveOptimizedModel(const std::string &model_dir,
-                                         LiteModelType model_type) {
+                                         LiteModelType model_type,
+                                         bool record_info) {
   LOG(FATAL)
       << "The SaveOptimizedModel API is only supported by CxxConfig predictor.";
 }
diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h
index 3886c462ff3192ccb522b741debe6730e3e0e4fb..3e911b62f785b2102685de94377804cf250f57e9 100644
--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -97,7 +97,8 @@ class LITE_API PaddlePredictor {
   /// CxxConfig, and the persisted model can be reused for MobileConfig.
   virtual void SaveOptimizedModel(
       const std::string& model_dir,
-      LiteModelType model_type = LiteModelType::kProtobuf);
+      LiteModelType model_type = LiteModelType::kProtobuf,
+      bool record_info = false);
 
   virtual ~PaddlePredictor() = default;
 };
diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc
index f87c707ddbea0d4e78d195e4529892b321027e8f..69d544c3decac9f312bc9eb03cdc6c3702c5032b 100644
--- a/lite/api/paddle_api_test.cc
+++ b/lite/api/paddle_api_test.cc
@@ -64,8 +64,8 @@ TEST(CxxApi, run) {
   EXPECT_NEAR(out[1], -28.8729, 1e-3);
 
   predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2");
-  predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2.naive",
-                                LiteModelType::kNaiveBuffer);
+  predictor->SaveOptimizedModel(
+      FLAGS_model_dir + ".opt2.naive", LiteModelType::kNaiveBuffer, true);
 }
 
 // Demo1 for Mobile Devices :Load model from file and run
diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt
index f083c97f7ab89640f425de4807cdcd090784fd87..a5b581335047ff18c31ea9d1c03a9785e4ddf2ed 100644
--- a/lite/core/CMakeLists.txt
+++ b/lite/core/CMakeLists.txt
@@ -71,6 +71,8 @@ add_custom_command(
   COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
   ${kernels_src_list}
   ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h
+  "${LITE_OPTMODEL_DIR}/.tailored_kernels_list"
+  LITE_BUILD_TAILOR
   OUTPUT kernels.h # not a real path to the output to force it execute every time.
   )
 # A trick to generate the paddle_use_ops.h
@@ -78,6 +80,8 @@ add_custom_command(
   COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py
   ${ops_src_list}
   ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
+  "${LITE_OPTMODEL_DIR}/.tailored_ops_list"
+  LITE_BUILD_TAILOR
   OUTPUT ops.h # not a real path to the output to force it execute every time.
   )
 # generate fake kernels for memory_optimize_tool
@@ -85,6 +89,7 @@ add_custom_command(
   COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/create_fake_kernel_registry.py
   ${kernels_src_list}
   ${CMAKE_BINARY_DIR}/all_kernel_faked.cc
+  ${CMAKE_BINARY_DIR}/kernel_src_map.h
   OUTPUT all_kernel_faked.cc # not a real path to the output to force it execute every time.
   )
 add_custom_target(op_list_h DEPENDS ops.h)
diff --git a/lite/core/op_registry.h b/lite/core/op_registry.h
index 13f83c5346cf70d2bb709fe06393cf20af06a2a6..25375b8a8f795e58194d6223f617273beac3b78e 100644
--- a/lite/core/op_registry.h
+++ b/lite/core/op_registry.h
@@ -32,6 +32,43 @@
 
 using LiteType = paddle::lite::Type;
 
+class OpKernelInfoCollector {
+ public:
+  static OpKernelInfoCollector &Global() {
+    static auto *x = new OpKernelInfoCollector;
+    return *x;
+  }
+  void AddOp2path(const std::string &op_name, const std::string &op_path) {
+    size_t index = op_path.find_last_of('/');
+    if (index != std::string::npos) {
+      op2path_.insert(std::pair<std::string, std::string>(
+          op_name, op_path.substr(index + 1)));
+    }
+  }
+  void AddKernel2path(const std::string &kernel_name,
+                      const std::string &kernel_path) {
+    size_t index = kernel_path.find_last_of('/');
+    if (index != std::string::npos) {
+      kernel2path_.insert(std::pair<std::string, std::string>(
+          kernel_name, kernel_path.substr(index + 1)));
+    }
+  }
+  void SetKernel2path(
+      const std::map<std::string, std::string> &kernel2path_map) {
+    kernel2path_ = kernel2path_map;
+  }
+  const std::map<std::string, std::string> &GetOp2PathDict() {
+    return op2path_;
+  }
+  const std::map<std::string, std::string> &GetKernel2PathDict() {
+    return kernel2path_;
+  }
+
+ private:
+  std::map<std::string, std::string> op2path_;
+  std::map<std::string, std::string> kernel2path_;
+};
+
 namespace paddle {
 namespace lite {
 
@@ -59,7 +96,6 @@ class OpLiteRegistor : public Registor<OpClass> {
               });
         }) {}
 };
-
 template <TargetType Target, PrecisionType Precision, DataLayoutType Layout>
 using KernelRegistryForTarget =
     Factory<KernelLite<Target, Precision, Layout>, std::unique_ptr<KernelBase>>;
@@ -287,6 +323,7 @@ class KernelRegistor : public lite::Registor<KernelType> {
   static paddle::lite::OpLiteRegistor<OpClass> LITE_OP_REGISTER_INSTANCE( \
       op_type__)(#op_type__);                                             \
   int touch_op_##op_type__() {                                            \
+    OpKernelInfoCollector::Global().AddOp2path(#op_type__, __FILE__);     \
     return LITE_OP_REGISTER_INSTANCE(op_type__).Touch();                  \
   }
 
@@ -312,6 +349,9 @@ class KernelRegistor : public lite::Registor<KernelType> {
   static KernelClass LITE_KERNEL_INSTANCE(                                     \
       op_type__, target__, precision__, layout__, alias__);                    \
   int touch_##op_type__##target__##precision__##layout__##alias__() {          \
+    OpKernelInfoCollector::Global().AddKernel2path(                            \
+        #op_type__ "," #target__ "," #precision__ "," #layout__ "," #alias__,  \
+        __FILE__);                                                             \
     LITE_KERNEL_INSTANCE(op_type__, target__, precision__, layout__, alias__)  \
         .Touch();                                                              \
     return 0;                                                                  \
diff --git a/lite/tools/build.sh b/lite/tools/build.sh
index 8463c1497ad8608358dcf3f5b561419c8af1d0a2..9423b69ee39bc2e06adc0807e4329b882f4705bd 100755
--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -17,6 +17,8 @@ BUILD_EXTRA=OFF
 BUILD_JAVA=ON
 BUILD_PYTHON=OFF
 BUILD_DIR=$(pwd)
+OPTMODEL_DIR=""
+BUILD_TAILOR=OFF
 
 readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
 
@@ -94,6 +96,8 @@ function make_tiny_publish_so {
       -DLITE_ON_TINY_PUBLISH=ON \
       -DANDROID_STL_TYPE=$android_stl \
       -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
+      -DLITE_BUILD_TAILOR=$BUILD_TAILOR \
+      -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
       -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
 
   make publish_inference -j$NUM_PROC
@@ -133,6 +137,8 @@ function make_full_publish_so {
       -DLITE_SHUTDOWN_LOG=ON \
       -DANDROID_STL_TYPE=$android_stl \
       -DLITE_BUILD_EXTRA=$BUILD_EXTRA \
+      -DLITE_BUILD_TAILOR=$BUILD_TAILOR \
+      -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \
       -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang}
 
   make publish_inference -j4
@@ -317,6 +323,14 @@ function main {
                 BUILD_DIR="${i#*=}"
                 shift
 		            ;;
+            --opt_model_dir=*)
+                OPTMODEL_DIR="${i#*=}"
+                shift
+                ;;
+            --build_tailor=*)
+                BUILD_TAILOR="${i#*=}"
+                shift
+                ;;
             tiny_publish)
                 make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL
                 shift
diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py
index 9edd6d836d9c4eddbc3f9e4c1f78274abdf3b4c2..140d77320704f62dfb2492eec3ad7238fe3868ff 100644
--- a/lite/tools/cmake_tools/create_fake_kernel_registry.py
+++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py
@@ -20,6 +20,7 @@ from utils import *
 
 ops_list_path = sys.argv[1]
 dest_path = sys.argv[2]
+kernelmap_path = sys.argv[3]
 
 out_lines = [
     '#pragma once',
@@ -47,6 +48,31 @@ class %s : public KernelLite<TARGET(%s), PRECISION(%s), DATALAYOUT(%s)> {
 }  // namespace paddle
 '''
 
+# create .h file to store kernel&source relationship
+kernel_src_map_lines = [
+'''
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include<map>
+// ATTENTION This can only include in a .cc file.
+
+const std::map<std::string, std::string> kernel2path_map{
+
+'''
+]
 
 
 with open(ops_list_path) as f:
@@ -99,7 +125,23 @@ with open(ops_list_path) as f:
                 out_lines.append("")
                 out_lines.append(gen_use_kernel_statement(k.op_type, k.target, k.precision, k.data_layout, k.alias))
 
-
+                index = path.rindex('/')
+                filename = path[index + 1:]
+                map_element = '  {"%s,%s,%s,%s,%s", "%s"},' % (
+                    k.op_type,
+                    k.target,
+                    k.precision,
+                    k.data_layout,
+                    k.alias,
+                    filename.strip()
+                )
+                kernel_src_map_lines.append(map_element)
 with open(dest_path, 'w') as f:
     logging.info("write kernel list to %s" % dest_path)
     f.write('\n'.join(out_lines))
+
+with open(kernelmap_path, 'w') as fd:
+    logging.info("write kernel map to %s" % dest_path)
+    kernel_src_map_lines.append('  {"  ", "  "}')
+    kernel_src_map_lines.append('};')
+    fd.write('\n'.join(kernel_src_map_lines))
diff --git a/lite/tools/cmake_tools/parse_kernel_registry.py b/lite/tools/cmake_tools/parse_kernel_registry.py
index 50d28864144de11cde129233f6b9ed2e2a4f644c..f4f0b95483687d3785168c132d30ac8a4fa87c8e 100644
--- a/lite/tools/cmake_tools/parse_kernel_registry.py
+++ b/lite/tools/cmake_tools/parse_kernel_registry.py
@@ -18,14 +18,19 @@ from ast import RegisterLiteKernelParser
 
 ops_list_path = sys.argv[1]
 dest_path = sys.argv[2]
+minkernels_list_path = sys.argv[3]
+tailored = sys.argv[4]
 
 out_lines = [
     '#pragma once',
     '#include "paddle_lite_factory_helper.h"',
     '',
 ]
-
-
+minlines = set()
+if tailored == "ON":
+    with open(minkernels_list_path) as fd:
+        for line in fd:
+            minlines.add(line.strip())
 with open(ops_list_path) as f:
     paths = set([path for path in f])
     for path in paths:
@@ -35,6 +40,15 @@ with open(ops_list_path) as f:
             kernel_parser.parse()
 
             for k in kernel_parser.kernels:
+                  kernel = "%s, %s, %s, %s, %s" % (
+                     k.op_type,
+                     k.target,
+                     k.precision,
+                     k.data_layout,
+                     k.alias,
+                  )
+                  if tailored == "ON":
+                      if kernel not in minlines: continue
                   key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % (
                      k.op_type,
                      k.target,
diff --git a/lite/tools/cmake_tools/parse_op_registry.py b/lite/tools/cmake_tools/parse_op_registry.py
index 8643475f9e86e3ed57768dd1d35a3e56424652f4..db58c455a9d5863ec0c66d7783871831c73c120f 100644
--- a/lite/tools/cmake_tools/parse_op_registry.py
+++ b/lite/tools/cmake_tools/parse_op_registry.py
@@ -19,7 +19,8 @@ from ast import RegisterLiteOpParser
 
 ops_list_path = sys.argv[1]
 dest_path = sys.argv[2]
-
+minops_list_path = sys.argv[3]
+tailored = sys.argv[4]
 out_lines = [
     '#pragma once',
     '#include "paddle_lite_factory_helper.h"',
@@ -30,6 +31,11 @@ paths = set()
 for line in open(ops_list_path):
     paths.add(line.strip())
 
+if tailored == "ON":
+    minlines = set()
+    with open(minops_list_path) as fd:
+        for line in fd:
+            minlines.add(line.strip())
 for path in paths:
     str_info = open(path.strip()).read()
     op_parser = RegisterLiteOpParser(str_info)
@@ -37,6 +43,8 @@ for path in paths:
     for op in ops:
         if "_grad" in op: 
             continue
+        if tailored == "ON":
+            if op not in minlines: continue
         out = "USE_LITE_OP(%s);" % op
         out_lines.append(out)