From c004b35a1a0a00283a1339ded57845dfd84cca67 Mon Sep 17 00:00:00 2001
From: Yan Chunwei <yanchunwei@outlook.com>
Date: Mon, 10 Jun 2019 15:56:47 +0800
Subject: [PATCH] init code gen for light framework(#17905)

---
 paddle/fluid/lite/CMakeLists.txt              |  42 ++-
 paddle/fluid/lite/api/cxx_api_test.cc         |   1 +
 paddle/fluid/lite/core/CMakeLists.txt         |   5 +-
 paddle/fluid/lite/core/context.h              |   5 +
 paddle/fluid/lite/core/hvy_tensor.h           |  10 +-
 paddle/fluid/lite/core/kernel.h               |   2 +-
 paddle/fluid/lite/core/lite_tensor.h          |   8 +
 paddle/fluid/lite/core/op_lite.cc             |  14 +-
 paddle/fluid/lite/core/op_registry.h          |   9 +-
 paddle/fluid/lite/core/scope.cc               |   8 +-
 paddle/fluid/lite/core/target_wrapper.h       |  41 ++-
 paddle/fluid/lite/core/tensor.h               |  30 ++-
 paddle/fluid/lite/core/variable.h             |   2 +-
 paddle/fluid/lite/gen_code/CMakeLists.txt     |  24 ++
 paddle/fluid/lite/gen_code/gen_code.cc        | 193 +++++++++++++
 paddle/fluid/lite/gen_code/gen_code.h         | 254 ++++++++++++++++++
 paddle/fluid/lite/gen_code/gen_code_test.cc   | 139 ++++++++++
 .../lite/gen_code/generated_code_test.cc      |  46 ++++
 paddle/fluid/lite/gen_code/paddle_infer.cc    | 139 ++++++++++
 paddle/fluid/lite/gen_code/paddle_infer.h     |  70 +++++
 paddle/fluid/lite/kernels/x86/CMakeLists.txt  |   2 +-
 paddle/fluid/lite/model_parser/cpp/op_desc.cc |  14 +-
 paddle/fluid/lite/model_parser/pb/op_desc.cc  |   2 +-
 paddle/fluid/lite/operators/feed_op.cc        |   4 +-
 paddle/fluid/lite/operators/mul_op.h          |   7 +-
 paddle/fluid/lite/tools/build.sh              |  34 ++-
 paddle/fluid/lite/utils/CMakeLists.txt        |   2 +-
 paddle/fluid/lite/utils/string.cc             |  19 ++
 paddle/fluid/lite/utils/string.h              |  77 ++++++
 29 files changed, 1145 insertions(+), 58 deletions(-)
 create mode 100644 paddle/fluid/lite/gen_code/CMakeLists.txt
 create mode 100644 paddle/fluid/lite/gen_code/gen_code.cc
 create mode 100644 paddle/fluid/lite/gen_code/gen_code.h
 create mode 100644 paddle/fluid/lite/gen_code/gen_code_test.cc
 create mode 100644 paddle/fluid/lite/gen_code/generated_code_test.cc
 create mode 100644 paddle/fluid/lite/gen_code/paddle_infer.cc
 create mode 100644 paddle/fluid/lite/gen_code/paddle_infer.h
 create mode 100644 paddle/fluid/lite/utils/string.cc
 create mode 100644 paddle/fluid/lite/utils/string.h

diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt
index 028384cca07..2c263cc4f68 100644
--- a/paddle/fluid/lite/CMakeLists.txt
+++ b/paddle/fluid/lite/CMakeLists.txt
@@ -34,7 +34,7 @@ endfunction()
 function (lite_deps TARGET)
   set(options "")
   set(oneValueArgs "")
-  set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
+  set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS ARGS)
   cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
   set(deps ${lite_deps_DEPS})
@@ -63,14 +63,35 @@ function (lite_deps TARGET)
     endforeach(var)
   endif()
 
+  if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    foreach(var ${lite_deps_LIGHT_DEPS})
+      set(deps ${deps} ${var})
+    endforeach(var)
+  endif()
+
+  if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
+    foreach(var ${lite_deps_HVY_DEPS})
+      set(deps ${deps} ${var})
+    endforeach(var)
+  endif()
+
   set(${TARGET} ${deps} PARENT_SCOPE)
 
 endfunction()
 
+# cc_library with branch support.
+# The branches:
+#  X86_DEPS: works only when LITE_WITH_X86 is ON.
+#  CUDA_DEPS:     LITE_WITH_CUDA
+#  ARM_DEPS:      LITE_WITH_ARM
+#  PROFILE_DEPS:  LITE_WITH_PROFILE
+#  LIGHT_DEPS:    LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
+#  HVY_DEPS:      NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
 function(lite_cc_library TARGET)
     set(options "")
     set(oneValueArgs "")
-    set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
+    set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
+      HVY_DEPS ARGS)
     cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
     set(deps "")
@@ -79,7 +100,10 @@ function(lite_cc_library TARGET)
             X86_DEPS ${args_X86_DEPS}
             CUDA_DEPS ${args_CUDA_DEPS}
             ARM_DEPS ${args_ARM_DEPS}
-            PROFILE_DEPS ${args_PROFILE_DEPS})
+            PROFILE_DEPS ${args_PROFILE_DEPS}
+            LIGHT_DEPS ${args_LIGHT_DEPS}
+            HVY_DEPS ${args_HVY_DEPS}
+            )
 
     cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
 endfunction()
@@ -87,7 +111,8 @@ endfunction()
 function(lite_cc_binary TARGET)
     set(options "")
     set(oneValueArgs "")
-    set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
+    set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
+      LIGHT_DEPS HVY_DEPS ARGS)
     cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
     set(deps "")
@@ -97,6 +122,8 @@ function(lite_cc_binary TARGET)
             CUDA_DEPS ${args_CUDA_DEPS}
             ARM_DEPS ${args_ARM_DEPS}
             PROFILE_DEPS ${args_PROFILE_DEPS}
+            LIGHT_DEPS ${args_LIGHT_DEPS}
+            HVY_DEPS ${args_HVY_DEPS}
             )
     cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
 endfunction()
@@ -112,7 +139,9 @@ endfunction()
 function(lite_cc_test TARGET)
     set(options "")
     set(oneValueArgs "")
-    set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS ARGS)
+    set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS
+      LIGHT_DEPS HVY_DEPS
+      ARGS)
     cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
     set(deps "")
@@ -122,6 +151,8 @@ function(lite_cc_test TARGET)
             CUDA_DEPS ${args_CUDA_DEPS}
             ARM_DEPS ${args_ARM_DEPS}
             PROFILE_DEPS ${args_PROFILE_DEPS}
+            LIGHT_DEPS ${args_LIGHT_DEPS}
+            HVY_DEPS ${args_HVY_DEPS}
             )
     _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
     register_test_offline("${TARGET}")
@@ -137,3 +168,4 @@ add_subdirectory(kernels)
 add_subdirectory(model_parser)
 add_subdirectory(utils)
 add_subdirectory(api)
+add_subdirectory(gen_code)
diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc
index 05630384044..c818f33e029 100644
--- a/paddle/fluid/lite/api/cxx_api_test.cc
+++ b/paddle/fluid/lite/api/cxx_api_test.cc
@@ -76,6 +76,7 @@ TEST(CXXApi, save_model) {
   predictor.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)},
                   valid_places);
 
+  LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
   predictor.SaveModel(FLAGS_optimized_model);
 }
 #endif  // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt
index 94085934112..e2ab2354e9c 100644
--- a/paddle/fluid/lite/core/CMakeLists.txt
+++ b/paddle/fluid/lite/core/CMakeLists.txt
@@ -8,7 +8,7 @@ lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc
 lite_cc_library(memory_lite SRCS memory.cc DEPS target_wrapper_lite)
 lite_cc_library(lite_tensor SRCS lite_tensor.cc DEPS memory_lite target_wrapper_lite)
 if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
-    cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor)
+    lite_cc_library(hvy_tensor SRCS hvy_tensor.cc DEPS lod_tensor HVY_DEPS framework_proto)
 endif()
 
 if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
@@ -26,8 +26,7 @@ cc_library(scope_lite SRCS scope.cc DEPS ${tensor_lite})
 cc_library(cpu_info_lite SRCS cpu_info.cc)
 cc_library(context_lite SRCS context.cc DEPS ${tensor_lite} any_lite cpu_info_lite)
 cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
-  cpp_op_desc_lite
-  ${tensor_lite})
+  cpp_op_desc_lite ${tensor_lite})
 cc_library(types_lite SRCS types.cc)
 cc_library(type_system SRCS type_system.cc DEPS ${tensor_lite} target_wrapper_lite)
 
diff --git a/paddle/fluid/lite/core/context.h b/paddle/fluid/lite/core/context.h
index 4702512af3a..483f5154144 100644
--- a/paddle/fluid/lite/core/context.h
+++ b/paddle/fluid/lite/core/context.h
@@ -173,6 +173,11 @@ class Context<TargetType::kX86> {
         new ::paddle::framework::ExecutionContext(*x86_device_context_));
   }
 
+  Context(Context&& ctx) {
+    x86_device_context_ = std::move(ctx.x86_device_context_);
+    x86_execution_context_ = std::move(ctx.x86_execution_context_);
+  }
+
   // NOTE: InitOnce should only be used by ContextScheduler
   void InitOnce() {}
 
diff --git a/paddle/fluid/lite/core/hvy_tensor.h b/paddle/fluid/lite/core/hvy_tensor.h
index f86ca272921..21bfa2b48e2 100644
--- a/paddle/fluid/lite/core/hvy_tensor.h
+++ b/paddle/fluid/lite/core/hvy_tensor.h
@@ -21,6 +21,7 @@
 #pragma once
 #include <vector>
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/lite/core/target_wrapper.h"
 #include "paddle/fluid/lite/core/tensor.h"
 
 namespace paddle {
@@ -65,6 +66,14 @@ class TensorHvy : public TensorBase<TensorHvy> {
   using DDimT = DDimHvy;
   using LoDT = framework::LoD;
 
+  template <typename DType, typename DimT, TargetType Target>
+  void Assign(DType* data, const DimT& dim) {
+    Resize(dim);
+    auto* dst = mutable_data<DType>(Target);
+    CopySync<Target>(dst, data, dim.production() * sizeof(DType),
+                     IoDirection::HtoD);
+  }
+
   TargetType target() const {
     if (platform::is_gpu_place(data_.place())) {
       return TARGET(kCUDA);
@@ -95,7 +104,6 @@ class TensorHvy : public TensorBase<TensorHvy> {
   const void* raw_data() const { return data_.raw_data(); }
 
   void Resize(const DDimHvy& dims) {
-    LOG(INFO) << "dims.size " << dims.size();
     data_.Resize(framework::make_ddim(dims.Vectorize()));
   }
 
diff --git a/paddle/fluid/lite/core/kernel.h b/paddle/fluid/lite/core/kernel.h
index 629da86bbdd..d7b296eec12 100644
--- a/paddle/fluid/lite/core/kernel.h
+++ b/paddle/fluid/lite/core/kernel.h
@@ -150,7 +150,7 @@ class KernelBase {
   void Torch() {}
 
  protected:
-  std::unique_ptr<KernelContext> ctx_;
+  std::unique_ptr<KernelContext> ctx_{nullptr};
   mutable operators::param_t param_;
   // The corresponding op type.
   std::string op_type_{};
diff --git a/paddle/fluid/lite/core/lite_tensor.h b/paddle/fluid/lite/core/lite_tensor.h
index 79f1352cb06..6cccdc0dd03 100644
--- a/paddle/fluid/lite/core/lite_tensor.h
+++ b/paddle/fluid/lite/core/lite_tensor.h
@@ -61,6 +61,14 @@ class TensorLite : public TensorBase<TensorLite> {
 
   TensorLite() : buffer_(std::make_shared<Buffer>()) {}
 
+  template <typename DType, typename DimT, TargetType Target>
+  void Assign(DType *data, const DimT &dim) {
+    Resize(dim);
+    auto *dst = mutable_data<DType>(Target);
+    CopySync<Target>(dst, data, dim.product() * sizeof(DType),
+                     IoDirection::HtoD);
+  }
+
   template <typename T>
   const T *data() const {
     return static_cast<const T *>(buffer_->data());
diff --git a/paddle/fluid/lite/core/op_lite.cc b/paddle/fluid/lite/core/op_lite.cc
index dc22e4fb4b4..bc30a00a497 100644
--- a/paddle/fluid/lite/core/op_lite.cc
+++ b/paddle/fluid/lite/core/op_lite.cc
@@ -28,15 +28,23 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
   CHECK(!op_type_.empty()) << "op_type_ should be set first";
 
   auto pick_kernel = [&](const Place &place) {
-    auto ks = KernelRegistry::Global().Create(
-        (kernel_type.empty() ? op_type_ : kernel_type), place.target,
-        place.precision, place.layout);
+    auto ks = KernelRegistry::Global().Create(op_type_, place.target,
+                                              place.precision, place.layout);
     for (auto &&it : ks) {
       AttachKernel(it.get());
       kernels.emplace_back(std::move(it));
     }
   };
 
+  if (!kernel_type.empty()) {
+    Place place;
+    std::string op_type, alias;
+    KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place);
+    pick_kernel(place);
+    CHECK(!kernels.empty()) << "no kernel for kernel type " << kernel_type;
+    return kernels;
+  }
+
   std::set<Place> place_set;
   for (auto place : places) {
     place_set.insert(place);
diff --git a/paddle/fluid/lite/core/op_registry.h b/paddle/fluid/lite/core/op_registry.h
index 1052419ecda..49332262deb 100644
--- a/paddle/fluid/lite/core/op_registry.h
+++ b/paddle/fluid/lite/core/op_registry.h
@@ -91,9 +91,9 @@ class KernelRegistry final {
   void Register(const std::string &name,
                 typename KernelRegistryForTarget<Target, Precision,
                                                  Layout>::creator_t &&creator) {
-    VLOG(3) << "register for " << TargetToStr(Target) << ":"
-            << PrecisionToStr(Precision) << "//"
-            << GetKernelOffset<Target, Precision, Layout>();
+    // VLOG(3) << "register for " << TargetToStr(Target) << ":"
+    //<< PrecisionToStr(Precision) << "//"
+    //<< GetKernelOffset<Target, Precision, Layout>();
     using kernel_registor_t =
         KernelRegistryForTarget<Target, Precision, Layout>;
     auto &varient = registries_[GetKernelOffset<Target, Precision, Layout>()];
@@ -153,9 +153,6 @@ class KernelRegistor : public lite::Registor<KernelType> {
  public:
   KernelRegistor(const std::string &op_type, const std::string &alias)
       : Registor<KernelType>([=] {
-          VLOG(3) << "Register kernel " << op_type << " for "
-                  << TargetToStr(target) << " " << PrecisionToStr(precision)
-                  << " " << DataLayoutToStr(layout) << " alias " << alias;
           KernelRegistry::Global().Register<target, precision, layout>(
               op_type, [=]() -> std::unique_ptr<KernelType> {
                 std::unique_ptr<KernelType> x(new KernelType);
diff --git a/paddle/fluid/lite/core/scope.cc b/paddle/fluid/lite/core/scope.cc
index 053803b00a0..fbb837aedd3 100644
--- a/paddle/fluid/lite/core/scope.cc
+++ b/paddle/fluid/lite/core/scope.cc
@@ -17,7 +17,13 @@
 namespace paddle {
 namespace lite {
 
-Scope::~Scope() {}
+Scope::~Scope() {
+  for (auto *x : kids_) {
+    if (x) {
+      delete x;
+    }
+  }
+}
 
 Scope &Scope::NewScope() const {
   kids_.push_back(new Scope);
diff --git a/paddle/fluid/lite/core/target_wrapper.h b/paddle/fluid/lite/core/target_wrapper.h
index 1f0d1ecf140..1029bf5300e 100644
--- a/paddle/fluid/lite/core/target_wrapper.h
+++ b/paddle/fluid/lite/core/target_wrapper.h
@@ -63,7 +63,8 @@ static const std::string& TargetToStr(TargetType target) {
 }
 
 static const std::string& PrecisionToStr(PrecisionType precision) {
-  static const std::string precision2string[] = {"unk", "float", "int8", "any"};
+  static const std::string precision2string[] = {"unk", "float", "int8_t",
+                                                 "any"};
   auto x = static_cast<int>(precision);
   CHECK_LT(x, static_cast<int>(PRECISION(NUM)));
   return precision2string[x];
@@ -76,6 +77,29 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
   return datalayout2string[x];
 }
 
+static const std::string& TargetRepr(TargetType target) {
+  static const std::string target2string[] = {"kUnk", "kHost", "kX86", "kCUDA",
+                                              "kAny"};
+  auto x = static_cast<int>(target);
+  CHECK_LT(x, static_cast<int>(TARGET(NUM)));
+  return target2string[x];
+}
+
+static const std::string& PrecisionRepr(PrecisionType precision) {
+  static const std::string precision2string[] = {"kUnk", "kFloat", "kInt8",
+                                                 "kAny"};
+  auto x = static_cast<int>(precision);
+  CHECK_LT(x, static_cast<int>(PRECISION(NUM)));
+  return precision2string[x];
+}
+
+static const std::string& DataLayoutRepr(DataLayoutType layout) {
+  static const std::string datalayout2string[] = {"kUnk", "kNCHW", "kAny"};
+  auto x = static_cast<int>(layout);
+  CHECK_LT(x, static_cast<int>(DATALAYOUT(NUM)));
+  return datalayout2string[x];
+}
+
 /*
  * Place specifies the execution context of a Kernel or input/output for a
  * kernel. It is used to make the analysis of the MIR more clear and accurate.
@@ -228,5 +252,20 @@ class TargetWrapper<TARGET(kCUDA), cudaStream_t, cudaEvent_t> {
 };
 #endif  // LITE_WITH_CUDA
 
+template <TargetType Target>
+void CopySync(void* dst, void* src, size_t size, IoDirection dir) {
+  switch (Target) {
+    case TARGET(kX86):
+    case TARGET(kHost):
+    case TARGET(kARM):
+      TargetWrapperX86::MemcpySync(dst, src, size, IoDirection::HtoH);
+      break;
+#ifdef LITE_WITH_CUDA
+    case TARGET(kCUDA):
+      TargetWrapperCuda::MemcpySync(dst, src, size, dir);
+#endif
+  }
+}
+
 }  // namespace lite
 }  // namespace paddle
diff --git a/paddle/fluid/lite/core/tensor.h b/paddle/fluid/lite/core/tensor.h
index 11b682a617c..d6980ff8898 100644
--- a/paddle/fluid/lite/core/tensor.h
+++ b/paddle/fluid/lite/core/tensor.h
@@ -47,7 +47,8 @@ class DDimBase {
   DDimBase() = default;
 
   explicit DDimBase(const std::vector<int64_t> &x) { self()->ConstructFrom(x); }
-  value_type operator[](int offset) const { return (*self())[offset]; }
+  value_type operator[](int offset) const { return (*const_self())[offset]; }
+  value_type &operator[](int offset) { return (*self())[offset]; }
   std::vector<int64_t> Vectorize() const { return self()->Vectorize(); }
   size_t size() const { return const_self()->size(); }
   bool empty() const { return const_self()->empty(); }
@@ -73,18 +74,19 @@ class DDimBase {
         {Slice(0, col).production(), Slice(col, size()).production()}));
   }
 
-  friend std::ostream &operator<<(std::ostream &os, const DDimT &dims) {
-    if (dims.empty()) {
-      os << "[]";
-      return os;
+  std::string repr() const {
+    std::stringstream ss;
+    ss << "{";
+    for (size_t i = 0; i < this->size() - 1; i++) {
+      ss << (*this)[i] << ",";
     }
+    if (!this->empty()) ss << (*this)[size() - 1];
+    ss << "}";
+    return ss.str();
+  }
 
-    os << "[";
-    for (size_t i = 0; i < dims.size() - 1; i++) {
-      os << dims[i] << " ";
-    }
-    if (!dims.empty()) os << dims[dims.size() - 1];
-    os << "]";
+  friend std::ostream &operator<<(std::ostream &os, const DDimT &dims) {
+    os << dims.repr();
     return os;
   }
 
@@ -102,6 +104,12 @@ template <typename TensorT>
 class TensorBase {
  public:
   TensorBase() = default;
+
+  template <typename T, typename DimT>
+  void Assign(T *data, const DimT &dim) {
+    self()->Assign(data, dim);
+  }
+
   TargetType target() const { return self()->target(); }
 
   template <typename T>
diff --git a/paddle/fluid/lite/core/variable.h b/paddle/fluid/lite/core/variable.h
index c83871446d2..d52a813a09c 100644
--- a/paddle/fluid/lite/core/variable.h
+++ b/paddle/fluid/lite/core/variable.h
@@ -24,7 +24,7 @@ namespace lite {
 class Variable {
  public:
   template <typename T>
-  const T& Get() {
+  const T& Get() const {
     return blob_.get<T>();
   }
 
diff --git a/paddle/fluid/lite/gen_code/CMakeLists.txt b/paddle/fluid/lite/gen_code/CMakeLists.txt
new file mode 100644
index 00000000000..5d09dd567ea
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/CMakeLists.txt
@@ -0,0 +1,24 @@
+lite_cc_library(gen_code_lite SRCS gen_code.cc
+        DEPS program_lite op_lite scope
+        cpp_op_desc_lite
+        HVY_DEPS operator)
+lite_cc_library(paddle_infer_gencode SRCS paddle_infer.cc DEPS program_lite utils_lite)
+
+lite_cc_test(test_gen_code_lite SRCS gen_code_test.cc DEPS gen_code_lite ${tensor_lite}
+        mul_op_lite
+        compatible_pb_lite
+        model_parser_lite
+        X86_DEPS mul_compute_x86
+        ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
+
+lite_cc_library(__generated_code__
+    SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/lite/gen_code/__generated_code__.cc
+    DEPS scope_lite op_lite kernel_lite paddle_infer_gencode
+)
+
+lite_cc_test(test_generated_code SRCS generated_code_test.cc DEPS __generated_code__
+  ${ops_lite} ${host_kernels}
+  X86_DEPS ${x86_kernels}
+  )
+
+add_dependencies(__generated_code__ test_gen_code_lite)
diff --git a/paddle/fluid/lite/gen_code/gen_code.cc b/paddle/fluid/lite/gen_code/gen_code.cc
new file mode 100644
index 00000000000..a50241bb715
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/gen_code.cc
@@ -0,0 +1,193 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/gen_code/gen_code.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace lite {
+namespace gencode {
+
+void Module::AddWeight(const std::string &name, const TensorRepr &tensor) {
+  auto w_name = WeightUniqueName();
+  Line(string_format("// Create weight: %s", name.c_str()));
+  // auto* w0 = scope.Var("w0")->GetMutable<lite::Tensor>();
+  Line(string_format("auto* %s = scope->Var(%s)->GetMutable<lite::Tensor>();",
+                     w_name.c_str(), Repr(name).c_str()));
+  // lite::DDim w_ddim({1, 2})
+  Line(string_format("lite::DDim %s_ddim(std::vector<int64_t>(%s));",
+                     w_name.c_str(), tensor.ddim.repr().c_str()));
+  // std::vector<float> w_data({});
+  auto w_data_repr = DataRepr(
+      std::string(static_cast<const char *>(tensor.raw_data), tensor.num_bytes),
+      tensor.dtype);
+  Line(string_format("std::vector<%s> %s_data({%s});",
+                     PrecisionToStr(tensor.dtype).c_str(), w_name.c_str(),
+                     w_data_repr.c_str()));
+  // w0->Assign<float, lite::DDim, TARGET(kX86)>(w0_data.data(), w0_ddim);
+  Line(string_format(
+      "%s->Assign<%s, lite::DDim, TARGET(kX86)>(%s_data.data(), %s_ddim);",
+      w_name.c_str(), PrecisionToStr(tensor.dtype).c_str(), w_name.c_str(),
+      w_name.c_str()));
+  Line("");
+}
+
+void Module::AddHeaderIncludeGenCode() {
+  Line("");
+  Line("#include <string>");
+  Line("#include <vector>");
+  Line("#include \"paddle/fluid/lite/core/compatible_tensor.h\"");
+  Line("#include \"paddle/fluid/lite/core/context.h\"");
+  Line("#include \"paddle/fluid/lite/gen_code/paddle_infer.h\"");
+  Line("#include \"paddle/fluid/lite/core/op_registry.h\"");
+  Line("#include \"paddle/fluid/lite/core/scope.h\"");
+  Line("#include \"paddle/fluid/lite/model_parser/cpp/op_desc.h\"");
+  Line("");
+  Line("");
+}
+
+std::string Module::DataRepr(const std::string &raw_data, PrecisionType dtype) {
+  std::stringstream ss;
+  switch (dtype) {
+    case PRECISION(kFloat): {
+      const float *raw = reinterpret_cast<const float *>(raw_data.c_str());
+      int num_elems = raw_data.size() / sizeof(float);
+      if (num_elems) {
+        for (int i = 0; i < num_elems - 1; i++) {
+          ss << raw[i] << ",";
+        }
+        ss << raw[num_elems - 1];
+      }
+    } break;
+
+    default:
+      LOG(FATAL) << "Unsupported type " << PrecisionToStr(dtype);
+  }
+  return ss.str();
+}
+
+void Module::AddOpDescHelper(const std::string &op_id,
+                             const cpp::OpDesc &desc) {
+  std::string desc_var = op_id + "_desc";
+  Line(string_format("lite::cpp::OpDesc %s;", desc_var.c_str()));
+  auto vec_str_repr = [](const std::vector<std::string> &vec) {
+    return Repr(vec);
+  };
+  for (auto &item : desc.inputs()) {
+    Line(string_format("%s.SetInput(%s, %s);", desc_var.c_str(),
+                       Repr(item.first).c_str(),
+                       vec_str_repr(item.second).c_str()));
+  }
+
+  for (auto &item : desc.outputs()) {
+    Line(string_format("%s.SetOutput(%s, %s);", desc_var.c_str(),
+                       Repr(item.first).c_str(),
+                       vec_str_repr(item.second).c_str()));
+  }
+
+  auto attr_repr = [&](const std::string &name) -> std::string {
+    using AttrType = OpDescAPI::AttrType;
+    auto type = desc.GetAttrType(name);
+
+    switch (type) {
+      case AttrType::INT:
+        return std::to_string(desc.GetAttr<int>(name));
+      case AttrType::FLOAT:
+        return std::to_string(desc.GetAttr<float>(name));
+      case AttrType::BOOLEAN:
+        return std::to_string(desc.GetAttr<bool>(name));
+      case AttrType::STRING:
+        return "\"" + desc.GetAttr<std::string>(name) + "\"";
+      case AttrType::STRINGS: {
+        std::vector<std::string> tmp;
+        auto vals = desc.GetAttr<std::vector<std::string>>(name);
+        std::transform(vals.begin(), vals.end(), std::back_inserter(tmp),
+                       [](const std::string &x) { return Repr(x); });
+        return "{" + Join(tmp, ",") + "}";
+      }
+      default:
+        LOG(FATAL) << "Unsupported attribute type: " << static_cast<int>(type);
+    }
+    return "";
+  };
+
+  auto attr_type_repr = [&](const std::string &name) -> std::string {
+    using AttrType = OpDescAPI::AttrType;
+    auto type = desc.GetAttrType(name);
+
+    switch (type) {
+      case AttrType::INT:
+        return "int";
+      case AttrType::FLOAT:
+        return "float";
+      case AttrType::BOOLEAN:
+        return "bool";
+      case AttrType::STRING:
+        return "std::string";
+      case AttrType::STRINGS:
+        return "std::vector<std::string>";
+      default:
+        LOG(FATAL) << "Unsupported attribute type: " << static_cast<int>(type);
+    }
+
+    return "unk_t";
+  };
+  for (auto &item : desc.AttrNames()) {
+    // Drop the python information.
+    if (item == "op_callstack") continue;
+    auto attr_type = attr_type_repr(item);
+    auto attr_val = attr_repr(item);
+    Line(string_format("%s.SetAttr<%s>(%s, %s);",  //
+                       desc_var.c_str(), attr_type.c_str(), Repr(item).c_str(),
+                       attr_val.c_str()));
+  }
+}
+
+void Module::AddOp(const cpp::OpDesc &op) {
+  auto op_name = OpUniqueName();
+  AddOpDescHelper(op_name, op);
+
+  Line(string_format("// Create Op: %s", op.Type().c_str()));
+
+  Line(string_format("auto %s = lite::LiteOpRegistry::Global().Create(\"%s\");",
+                     op_name.c_str(), op.Type().c_str()));
+
+  CHECK(op.HasAttr(kKernelTypeAttr))
+      << "the kernel type should be specified before generate code.";
+  auto kernel_type = op.GetAttr<std::string>(kKernelTypeAttr);
+  Line(string_format("%s->Attach(%s, exec_scope);", op_name.c_str(),
+                     (op_name + "_desc").c_str()));
+
+  // Create kernel
+  auto kernel_name = KernelUniqueName();
+  Line(string_format(
+      "auto %s = std::move(%s->CreateKernels(valid_places, \"%s\").front());",
+      kernel_name.c_str(), op_name.c_str(), kernel_type.c_str()));
+
+  // Set Context for kernel
+  // clang-format off
+  Line(string_format("%s->SetContext(lite::ContextScheduler::Global().NewContext(%s->target()));", kernel_name.c_str(), kernel_name.c_str()));  // NOLINT
+  // clang-format on
+
+  Line(string_format("ops.push_back(%s);", op_name.c_str()));
+  Line(string_format("kernels.push_back(std::move(%s));", kernel_name.c_str()));
+
+  op_kinds_.insert(op.Type());
+  kernel_kinds_.insert(kernel_type);
+}
+}  // namespace gencode
+}  // namespace lite
+}  // namespace paddle
diff --git a/paddle/fluid/lite/gen_code/gen_code.h b/paddle/fluid/lite/gen_code/gen_code.h
new file mode 100644
index 00000000000..1a55483f03a
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/gen_code.h
@@ -0,0 +1,254 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <set>
+#include <string>
+#include <vector>
+#include "paddle/fluid/lite/core/compatible_tensor.h"
+#include "paddle/fluid/lite/core/framework.pb.h"
+#include "paddle/fluid/lite/core/program.h"
+#include "paddle/fluid/lite/core/target_wrapper.h"
+#include "paddle/fluid/lite/model_parser/cpp/op_desc.h"
+#include "paddle/fluid/lite/model_parser/desc_apis.h"
+#include "paddle/fluid/lite/utils/string.h"
+
+namespace paddle {
+namespace lite {
+namespace gencode {
+
+struct TensorRepr {
+  TensorRepr() = default;
+  TensorRepr(PrecisionType dtype, const std::vector<int64_t> &ddim,
+             void *raw_data, size_t num_bytes)
+      : dtype(dtype), ddim(ddim), raw_data(raw_data), num_bytes(num_bytes) {}
+
+  PrecisionType dtype;
+  lite::DDim ddim;
+  const void *raw_data;
+  size_t num_bytes{};
+};
+
+class Module {
+  std::vector<cpp::OpDesc> ops;
+  std::vector<TensorRepr> weights;
+  std::vector<std::string> tmp_vars_;
+  std::stringstream stream_;
+  std::set<std::string> kernel_kinds_;
+  std::set<std::string> op_kinds_;
+
+  int line_indent_{};
+  const int indent_unit_{2};
+
+ public:
+  void NewOp(const cpp::OpDesc &desc) { ops.push_back(desc); }
+  void NewWeight(const TensorRepr &x) { weights.push_back(x); }
+  void NewTmpVar(const std::string &x) { tmp_vars_.push_back(x); }
+
+  std::stringstream &stream() { return stream_; }
+
+  void AddHeaderIncludeGenCode();
+
+  void AddNamespaceBegin() {
+    Line("namespace paddle {");
+    Line("namespace gencode{");
+    Line("");
+  }
+
+  void AddNamespaceEnd() {
+    Line("");
+    Line("}  // namespace gencode");
+    Line("}  // namespace paddle");
+  }
+
+  void AddInitFuncBegin() {
+    Line("void PaddlePredictor::Init() {");
+    Line("");
+    IncIndent();
+  }
+
+  void AddInitFuncEnd() {
+    DecIndent();
+    Line("");
+    Line("}");
+  }
+
+  void AddScopeDecl() {
+    Line("lite::Scope* scope = static_cast<lite::Scope*>(raw_scope_);");
+
+    // clang-format off
+    Line("lite::Scope* exec_scope = static_cast<lite::Scope*>(raw_exe_scope_);");  // NOLINT
+    // clang-format on
+
+    // Create feed and fetch in exec_scope.
+    Line(string_format("exec_scope->Var(%s);", Repr("feed").c_str()));
+    Line(string_format("exec_scope->Var(%s);", Repr("fetch").c_str()));
+  }
+
+  void AddValidPlaceDecl() {
+    // clang-format off
+    Line("std::vector<lite::Place> valid_places({lite::Place({TARGET(kX86), PRECISION(kFloat), DATALAYOUT(kNCHW)}), lite::Place({TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)})});");  // NOLINT
+    // clang-format on
+  }
+
+  void AddMemberCast() {
+    Line("// Cast the raw members");
+    // clang-format off
+    Line(string_format("auto& ops = *static_cast<std::vector<std::shared_ptr<lite::OpLite>>*>(raw_ops_);"));  // NOLINT
+    Line(string_format("auto& kernels = *static_cast<std::vector<std::unique_ptr<lite::KernelBase>>*>(raw_kernels_);"));  // NOLINT
+    // clang-format on
+    Line("");
+  }
+
+  void AddWeight(const std::string &name, const TensorRepr &tensor);
+
+  void AddTmpVar(const std::string &x) {
+    Line(string_format("// Create temporary variable: %s", x.c_str()));
+    Line(string_format("exec_scope->Var(%s);", Repr(x).c_str()));
+    Line("");
+  }
+
+  void AddOp(const cpp::OpDesc &op);
+
+  void AddOpDescHelper(const std::string &op_id, const cpp::OpDesc &desc);
+
+  void AddOpCompileDeps() {
+    Line("");
+    Line("// Add Operator compile deps");
+    for (auto &op_type : op_kinds_) {
+      Line(string_format("USE_LITE_OP(%s)", op_type.c_str()));
+    }
+    Line("");
+  }
+  void AddKernelCompileDeps() {
+    Line("// Add Kernel compile deps");
+
+    std::string op_type, alias;
+    Place place;
+    for (auto &kernel_type : kernel_kinds_) {
+      KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place);
+      Line(string_format("USE_LITE_KERNEL(%s, %s, %s, %s, %s)",  //
+                         op_type.c_str(),                        //
+                         TargetRepr(place.target).c_str(),
+                         PrecisionRepr(place.precision).c_str(),
+                         DataLayoutRepr(place.layout).c_str(), alias.c_str()));
+    }
+  }
+
+ private:
+  std::string WeightUniqueName() const {
+    return "w_" + std::to_string(weight_counter_++);
+  }
+  std::string TmpVarUniqueName() const {
+    return "tmp_" + std::to_string(tmp_var_counter_++);
+  }
+  std::string OpUniqueName() const {
+    return "op_" + std::to_string(op_counter_++);
+  }
+  std::string KernelUniqueName() const {
+    return "kernel_" + std::to_string(kernel_counter_++);
+  }
+
+  std::string DataRepr(const std::string &raw_data, PrecisionType dtype);
+
+  void IncIndent() { line_indent_++; }
+  void DecIndent() { line_indent_--; }
+
+  void Line(const std::string &x) {
+    std::string indent_str(line_indent_ * indent_unit_, ' ');
+    stream() << indent_str << x << "\n";
+  }
+
+ private:
+  mutable int weight_counter_{};
+  mutable int tmp_var_counter_{};
+  mutable int op_counter_{};
+  mutable int kernel_counter_{};
+};
+
+class ProgramCodeGenerator {
+ public:
+  ProgramCodeGenerator(const framework::proto::ProgramDesc &program,
+                       const lite::Scope &exec_scope)
+      : program_(program), exec_scope_(exec_scope) {
+    LOG(INFO) << program.DebugString();
+  }
+
+  std::string GenCode() {
+    Module m;
+    m.AddHeaderIncludeGenCode();
+    m.AddNamespaceBegin();
+    m.AddInitFuncBegin();
+    m.AddMemberCast();
+    m.AddScopeDecl();
+    m.AddValidPlaceDecl();
+
+    AddWeights(&m);
+    AddTmpVars(&m);
+    AddOps(&m);
+
+    m.AddInitFuncEnd();
+    m.AddNamespaceEnd();
+
+    m.AddOpCompileDeps();
+    m.AddKernelCompileDeps();
+
+    return m.stream().str();
+  }
+
+  void AddWeights(Module *m) {
+    for (auto &var : program_.blocks(0).vars()) {
+      if (var.persistable()) {
+        auto name = var.name();
+        if (name == "feed" || name == "fetch") continue;
+        const auto &tensor = exec_scope_.FindVar(name)->Get<lite::Tensor>();
+        TensorRepr repr;
+        TensorToRepr(tensor, &repr);
+        m->AddWeight(name, repr);
+      }
+    }
+  }
+  void AddTmpVars(Module *m) {
+    for (auto &var : program_.blocks(0).vars()) {
+      if (!var.persistable()) {
+        m->AddTmpVar(var.name());
+      }
+    }
+  }
+  void AddOps(Module *m) {
+    for (auto &op : program_.blocks(0).ops()) {
+      pb::OpDesc pb_desc(op);
+      cpp::OpDesc cpp_desc;
+      TransformOpDescPbToCpp(pb_desc, &cpp_desc);
+      m->AddOp(cpp_desc);
+    }
+  }
+
+ private:
+  void TensorToRepr(const lite::Tensor &tensor, TensorRepr *repr) {
+    repr->ddim = tensor.dims();
+    // TODO(Superjomn) support other types.
+    repr->dtype = PRECISION(kFloat);
+    repr->raw_data = tensor.data<float>();
+    repr->num_bytes = repr->ddim.production() * sizeof(float);
+  }
+
+ private:
+  const framework::proto::ProgramDesc &program_;
+  const lite::Scope &exec_scope_;
+};
+
+}  // namespace gencode
+}  // namespace lite
+}  // namespace paddle
diff --git a/paddle/fluid/lite/gen_code/gen_code_test.cc b/paddle/fluid/lite/gen_code/gen_code_test.cc
new file mode 100644
index 00000000000..96ef56e857e
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/gen_code_test.cc
@@ -0,0 +1,139 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/gen_code/gen_code.h"
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+#include <fstream>
+#include <string>
+#include <utility>
+#include <vector>
+#include "paddle/fluid/lite/core/compatible_tensor.h"
+#include "paddle/fluid/lite/core/context.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+#include "paddle/fluid/lite/core/scope.h"
+#include "paddle/fluid/lite/model_parser/cpp/op_desc.h"
+#include "paddle/fluid/lite/model_parser/model_parser.h"
+
+DEFINE_string(optimized_model, "", "");
+DEFINE_string(generated_code_file, "__generated_code__.cc", "");
+
+namespace paddle {
+namespace lite {
+namespace gencode {
+
+// Manually construct a program.
+TEST(gen_code, manual) {
+  // For holding the weights.
+  lite::Scope scope;
+  // For holding the temporary variables.
+  auto &tmp_scope = scope.NewScope();
+
+  // Create weight variables.
+  auto *w0 = scope.Var("w0")->GetMutable<lite::Tensor>();
+  // Create temporary variables.
+  auto *a = tmp_scope.Var("x")->GetMutable<lite::Tensor>();
+  tmp_scope.Var("out")->GetMutable<lite::Tensor>();
+
+  // Set weights.
+  std::vector<float> w0_data({0, 1, 2, 3});
+  w0->Assign<float, lite::DDim, TARGET(kX86)>(
+      w0_data.data(), lite::DDim{std::vector<int64_t>({2, 2})});
+
+  std::vector<float> a_data({0, 1, 2, 3});
+  a->Assign<float, lite::DDim, TARGET(kX86)>(
+      a_data.data(), lite::DDim{std::vector<int64_t>({2, 2})});
+
+  std::vector<Place> valid_places({
+      Place{TARGET(kX86), PRECISION(kFloat)},
+      Place{TARGET(kHost), PRECISION(kFloat)},
+      Place{TARGET(kHost), PRECISION(kAny)},
+  });
+  auto mul_op = LiteOpRegistry::Global().Create("mul");
+  cpp::OpDesc mul_op_desc;
+  mul_op_desc.SetType("mul");
+  mul_op_desc.SetInput("X", {"x"});
+  mul_op_desc.SetInput("Y", {"w0"});
+  mul_op_desc.SetAttr("x_num_col_dims", 1);
+  mul_op_desc.SetAttr("y_num_col_dims", 1);
+  mul_op_desc.SetOutput("Out", {"out"});
+
+  mul_op->Attach(mul_op_desc, &tmp_scope);
+  auto mul_kernel = std::move(mul_op->CreateKernels(valid_places).front());
+  auto fc_ctx = ContextScheduler::Global().NewContext(TARGET(kX86));
+  mul_op->CheckShape();
+  mul_op->InferShape();
+  mul_kernel->SetContext(std::move(fc_ctx));
+  mul_kernel->Launch();
+}
+
+TEST(gen_code, auto_gen) {
+  std::vector<float> w0_data({0, 1, 2, 3});
+  TensorRepr w0(PRECISION(kFloat), std::vector<int64_t>({2, 2}), w0_data.data(),
+                w0_data.size() * sizeof(float));
+
+  std::vector<float> w1_data({0.01, 1.2, 2.3, 3.4, 1.1, 2.2});
+  TensorRepr w1(PRECISION(kFloat), std::vector<int64_t>({3, 2}), w1_data.data(),
+                w1_data.size() * sizeof(float));
+
+  cpp::OpDesc op0;
+  op0.SetType("mul");
+  op0.SetInput("X", {"a", "b"});
+  op0.SetOutput("Out", {"out0"});
+  op0.SetAttr<std::string>("desc", "this is a desc");
+  op0.SetAttr<int>("x_col", 1);
+  op0.SetAttr<int>("y_col", 2);
+  op0.SetAttr<std::string>(kKernelTypeAttr, "x86");
+
+  gencode::Module module;
+  module.AddHeaderIncludeGenCode();
+
+  module.AddNamespaceBegin();
+  module.AddInitFuncBegin();
+
+  module.AddMemberCast();
+
+  module.AddWeight("w0", w0);
+  module.AddWeight("w1", w1);
+  module.AddTmpVar("a");
+  module.AddTmpVar("b");
+
+  module.AddOp(op0);
+
+  module.AddInitFuncEnd();
+  module.AddNamespaceEnd();
+
+  LOG(INFO) << module.stream().str();
+}
+
+TEST(gen_code, optimized_program) {
+  lite::Scope scope;
+  framework::proto::ProgramDesc desc;
+  LoadModel(FLAGS_optimized_model, &scope, &desc);
+
+  ProgramCodeGenerator codegen(desc, scope);
+
+  std::ofstream file(FLAGS_generated_code_file);
+
+  file << codegen.GenCode();
+
+  file.close();
+}
+
+}  // namespace gencode
+}  // namespace lite
+}  // namespace paddle
+
+USE_LITE_OP(mul);
+USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
diff --git a/paddle/fluid/lite/gen_code/generated_code_test.cc b/paddle/fluid/lite/gen_code/generated_code_test.cc
new file mode 100644
index 00000000000..e5874a2e149
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/generated_code_test.cc
@@ -0,0 +1,46 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+#include "paddle/fluid/lite/gen_code/paddle_infer.h"
+
+namespace paddle {
+namespace lite {
+
+TEST(PaddlePredictor, Init) {
+  gencode::PaddlePredictor predictor;
+  predictor.Init();
+}
+
+TEST(PaddlePredictor, Run) {
+  gencode::PaddlePredictor predictor;
+  predictor.Init();
+
+  LOG(INFO) << "run the generated code";
+  auto input_tensor = predictor.GetInput(0);
+  input_tensor->Resize(std::vector<int64_t>({100, 100}));
+  auto* data = input_tensor->mutable_data<float>();
+  for (int i = 0; i < 100 * 100; i++) {
+    data[i] = i;
+  }
+
+  predictor.Run();
+
+  auto output_tensor = predictor.GetOutput(0);
+  LOG(INFO) << "output: " << output_tensor->data<float>()[0];
+}
+
+}  // namespace lite
+}  // namespace paddle
diff --git a/paddle/fluid/lite/gen_code/paddle_infer.cc b/paddle/fluid/lite/gen_code/paddle_infer.cc
new file mode 100644
index 00000000000..ac4e99cb714
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/paddle_infer.cc
@@ -0,0 +1,139 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/gen_code/paddle_infer.h"
+#include "paddle/fluid/lite/core/compatible_tensor.h"
+#include "paddle/fluid/lite/core/op_lite.h"
+
+namespace paddle {
+namespace gencode {
+
+void Tensor::Resize(const Tensor::ddim_t &shape) {
+  CHECK(raw_mutable_tensor_);
+  auto *tensor = static_cast<lite::Tensor *>(raw_mutable_tensor_);
+  tensor->Resize(shape);
+}
+
+#define FOR_EACH_TYPE(HANDLE) \
+  HANDLE(int);                \
+  HANDLE(float);              \
+  HANDLE(int8_t);             \
+  HANDLE(int64_t);
+
+#define IMPL_DATA(T)                                                     \
+  template <>                                                            \
+  const T *Tensor::data<T>() const {                                     \
+    CHECK(raw_tensor_);                                                  \
+    const auto *tensor = static_cast<const lite::Tensor *>(raw_tensor_); \
+    return tensor->data<T>();                                            \
+  }
+FOR_EACH_TYPE(IMPL_DATA);
+#undef IMPL_DATA
+
+#define IMPL_MUTABLE_DATA(T)                                         \
+  template <>                                                        \
+  T *Tensor::mutable_data<T>() {                                     \
+    CHECK(raw_mutable_tensor_);                                      \
+    auto *tensor = static_cast<lite::Tensor *>(raw_mutable_tensor_); \
+    return tensor->mutable_data<T>();                                \
+  }
+FOR_EACH_TYPE(IMPL_MUTABLE_DATA);
+#undef IMPL_MUTABLE_DATA
+
+PaddlePredictor::PaddlePredictor() {
+  raw_ops_ = new std::vector<std::shared_ptr<lite::OpLite>>;
+  raw_kernels_ = new std::vector<std::unique_ptr<lite::KernelBase>>;
+  raw_scope_ = new lite::Scope;
+  raw_exe_scope_ = &(static_cast<lite::Scope *>(raw_scope_)->NewScope());
+}
+
+std::unique_ptr<Tensor> PaddlePredictor::GetTensor(
+    const std::string &id) const {
+  auto *exe_scope = static_cast<lite::Scope *>(raw_exe_scope_);
+  const auto *var = exe_scope->FindVar(id);
+  const auto &tensor = var->Get<lite::Tensor>();
+  return std::unique_ptr<Tensor>(new Tensor(&tensor, nullptr));
+}
+
+std::unique_ptr<Tensor> PaddlePredictor::GetMutableTensor(
+    const std::string &id) {
+  auto *exe_scope = static_cast<lite::Scope *>(raw_exe_scope_);
+  auto *var = exe_scope->FindVar(id);
+  auto *tensor = var->GetMutable<lite::Tensor>();
+  return std::unique_ptr<Tensor>(new Tensor(nullptr, tensor));
+}
+
+#define CAST_OPS \
+  auto *ops =    \
+      static_cast<std::vector<std::shared_ptr<lite::OpLite>> *>(raw_ops_);
+#define CAST_KERNELS                                                 \
+  auto *kernels =                                                    \
+      static_cast<std::vector<std::unique_ptr<lite::KernelBase>> *>( \
+          raw_kernels_);
+#define CAST_SCOPE auto *scope = static_cast<lite::Scope *>(raw_scope_);
+
+PaddlePredictor::~PaddlePredictor() {
+  CAST_OPS
+  CAST_KERNELS
+  CAST_SCOPE
+
+  if (ops) {
+    delete ops;
+  }
+  if (kernels) {
+    delete kernels;
+  }
+  if (scope) {
+    delete scope;
+  }
+}
+
+void PaddlePredictor::Run() {
+  CAST_OPS
+  CAST_KERNELS
+
+  CHECK(ops);
+  CHECK(kernels);
+  CHECK_EQ(ops->size(), kernels->size());
+
+  for (size_t i = 0; i < ops->size(); i++) {
+    LOG(INFO) << "Running the " << i << "-th operator";
+    ops->at(i)->InferShape();
+    kernels->at(i)->Launch();
+  }
+}
+
+std::unique_ptr<Tensor> PaddlePredictor::GetInput(size_t offset) {
+  auto *exec_scope = static_cast<lite::Scope *>(raw_exe_scope_);
+  auto *_feed_list = exec_scope->FindVar("feed");
+  CHECK(_feed_list) << "no feed variable in exec_scope";
+  auto *feed_list = _feed_list->GetMutable<std::vector<lite::Tensor>>();
+  if (offset >= feed_list->size()) {
+    feed_list->resize(offset + 1);
+  }
+
+  return std::unique_ptr<Tensor>(new Tensor(nullptr, &feed_list->at(offset)));
+}
+
+std::unique_ptr<Tensor> PaddlePredictor::GetOutput(size_t offset) {
+  auto *exec_scope = static_cast<lite::Scope *>(raw_exe_scope_);
+  auto *_fetch_list = exec_scope->FindVar("fetch");
+  CHECK(_fetch_list) << "no fatch variable in exec_scope";
+  auto &fetch_list = *_fetch_list->GetMutable<std::vector<lite::Tensor>>();
+  CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow";
+  return std::unique_ptr<Tensor>(new Tensor(&fetch_list.at(offset), nullptr));
+}
+
+}  // namespace gencode
+}  // namespace paddle
diff --git a/paddle/fluid/lite/gen_code/paddle_infer.h b/paddle/fluid/lite/gen_code/paddle_infer.h
new file mode 100644
index 00000000000..99158b0503c
--- /dev/null
+++ b/paddle/fluid/lite/gen_code/paddle_infer.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace gencode {
+
+/// Zero Copy Tensor.
+class Tensor {
+ public:
+  using ddim_t = std::vector<int64_t>;
+
+  Tensor(const void *raw_tensor, void *raw_mutable_tensor)
+      : raw_tensor_(raw_tensor), raw_mutable_tensor_(raw_mutable_tensor) {}
+
+  void Resize(const ddim_t &shape);
+  template <typename T>
+  const T *data() const;
+  template <typename T>
+  T *mutable_data();
+
+ private:
+  const void *raw_tensor_;
+  void *raw_mutable_tensor_{};
+};
+
+/*
+ * Predictor for the generated code.
+ */
+class PaddlePredictor {
+ public:
+  void Init();
+
+  std::unique_ptr<Tensor> GetTensor(const std::string &id) const;
+  std::unique_ptr<Tensor> GetMutableTensor(const std::string &id);
+
+  // Get offset-th col of feed.
+  std::unique_ptr<Tensor> GetInput(size_t offset);
+
+  std::unique_ptr<Tensor> GetOutput(size_t offset);
+
+  void Run();
+
+  PaddlePredictor();
+  ~PaddlePredictor();
+
+ private:
+  void *raw_ops_;
+  void *raw_kernels_;
+  void *raw_scope_{};
+  void *raw_exe_scope_{};  // raw_exe_scope is not owned.
+};
+
+}  // namespace gencode
+}  // namespace paddle
diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt
index 1c2937df5be..62db7a0a226 100644
--- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt
@@ -25,7 +25,7 @@ set(x86_kernels
     relu_compute_x86
     fc_compute_x86
     scale_compute_x86
-    softmax_compute_x86 
+    softmax_compute_x86
     dropout_compute_x86
     concat_compute_x86
     )
diff --git a/paddle/fluid/lite/model_parser/cpp/op_desc.cc b/paddle/fluid/lite/model_parser/cpp/op_desc.cc
index 01ee4703143..b6b854d72af 100644
--- a/paddle/fluid/lite/model_parser/cpp/op_desc.cc
+++ b/paddle/fluid/lite/model_parser/cpp/op_desc.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/lite/model_parser/cpp/op_desc.h"
 #include <set>
+#include <utility>
 
 namespace paddle {
 namespace lite {
@@ -44,12 +45,13 @@ FindAttr(const cpp::OpDesc& desc, const std::string& name) {
   return std::make_pair(it, attr_it);
 }
 
-#define GET_IMPL_ONE(T, repr__)                         \
-  template <>                                           \
-  T OpDesc::GetAttr<T>(const std::string& name) const { \
-    auto pair = FindAttr(*this, name);                  \
-    CHECK(pair.second->second == AttrType::repr__);     \
-    return pair.first->second.get<T>();                 \
+#define GET_IMPL_ONE(T, repr__)                                          \
+  template <>                                                            \
+  T OpDesc::GetAttr<T>(const std::string& name) const {                  \
+    auto pair = FindAttr(*this, name);                                   \
+    CHECK(pair.second->second == AttrType::repr__)                       \
+        << "required type is " << #repr__ << " not match the true type"; \
+    return pair.first->second.get<T>();                                  \
   }
 
 GET_IMPL_ONE(int32_t, INT);
diff --git a/paddle/fluid/lite/model_parser/pb/op_desc.cc b/paddle/fluid/lite/model_parser/pb/op_desc.cc
index 1de4fb275e4..7f84510a3fa 100644
--- a/paddle/fluid/lite/model_parser/pb/op_desc.cc
+++ b/paddle/fluid/lite/model_parser/pb/op_desc.cc
@@ -44,7 +44,7 @@ FindAttr(framework::proto::OpDesc *desc, const std::string &name) {
   }
 SET_IMPL_ONE(int, INT, i);
 SET_IMPL_ONE(float, FLOAT, f);
-SET_IMPL_ONE(bool, FLOAT, f);
+SET_IMPL_ONE(bool, BOOLEAN, b);
 
 template <>
 void OpDesc::SetAttr<std::vector<int>>(const std::string &name,
diff --git a/paddle/fluid/lite/operators/feed_op.cc b/paddle/fluid/lite/operators/feed_op.cc
index 8c7d33e9e59..c977adfd4b3 100644
--- a/paddle/fluid/lite/operators/feed_op.cc
+++ b/paddle/fluid/lite/operators/feed_op.cc
@@ -38,8 +38,8 @@ class FeedOp : public OpLite {
     auto feed_var_name = opdesc.Input("X").front();
     auto* feed_var = scope->FindVar(feed_var_name);
     CHECK(feed_var);
-    auto& feed_tensor_list = feed_var->Get<std::vector<lite::Tensor>>();
-    param_.feed_list = &feed_tensor_list;
+    auto* feed_tensor_list = feed_var->GetMutable<std::vector<lite::Tensor>>();
+    param_.feed_list = feed_tensor_list;
 
     auto out_name = opdesc.Output("Out").front();
     auto* out_var = scope->FindVar(out_name);
diff --git a/paddle/fluid/lite/operators/mul_op.h b/paddle/fluid/lite/operators/mul_op.h
index e21540d2c6f..7aa1581bb2a 100644
--- a/paddle/fluid/lite/operators/mul_op.h
+++ b/paddle/fluid/lite/operators/mul_op.h
@@ -45,10 +45,11 @@ class MulOpLite : public OpLite {
     CHECK(var);
     param_.x = var->GetMutable<Tensor>();
     var = scope->FindVar(W);
-    CHECK(var);
+    CHECK(var) << "no var called " << W;
     param_.y = var->GetMutable<Tensor>();
-    CHECK(scope->FindVar(out));
-    param_.output = scope->FindVar(out)->GetMutable<Tensor>();
+    var = scope->FindVar(out);
+    CHECK(var) << "no var called " << out;
+    param_.output = var->GetMutable<Tensor>();
     param_.x_num_col_dims = op_desc.GetAttr<int>("x_num_col_dims");
     param_.y_num_col_dims = op_desc.GetAttr<int>("y_num_col_dims");
 
diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh
index 96a378441b4..5afbc003cf7 100755
--- a/paddle/fluid/lite/tools/build.sh
+++ b/paddle/fluid/lite/tools/build.sh
@@ -4,15 +4,26 @@ set -ex
 TESTS_FILE="./lite_tests.txt"
 
 readonly common_flags="-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF"
+
+# for code gen, a source file is generated after a test, but is dependended by some targets in cmake.
+# here we fake an empty file to make cmake works.
+function prepare_for_codegen {
+    # in build directory
+    mkdir -p ./paddle/fluid/lite/gen_code
+    touch ./paddle/fluid/lite/gen_code/__generated_code__.cc
+}
 function cmake_x86 {
+    prepare_for_codegen
     cmake ..  -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags}
 }
 
 function cmake_x86_for_CI {
+    prepare_for_codegen
     cmake ..  -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} -DLITE_WITH_PROFILE=ON
 }
 
 function cmake_gpu {
+    prepare_for_codegen
     cmake .. " -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON"
 }
 
@@ -31,18 +42,19 @@ function cmake_arm {
         -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
 }
 
-function build {
-    file=$1
-    for _test in $(cat $file); do
-        make $_test -j$(expr $(nproc) - 2)
-    done
-}
+# function build {
+#     file=$1
+#     for _test in $(cat $file); do
+#         make $_test -j$(expr $(nproc) - 2)
+#     done
+# }
 
 # It will eagerly test all lite related unittests.
 function test_lite {
     local file=$1
     echo "file: ${file}"
     for _test in $(cat $file); do
+        make $_test -j$(expr $(nproc) - 2)
         ctest -R $_test -V
     done
 }
@@ -86,7 +98,7 @@ function build_test_server {
     cd ./build
     export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/paddle/build/third_party/install/mklml/lib"
     cmake_x86_for_CI
-    build $TESTS_FILE
+    #build $TESTS_FILE
     test_lite $TESTS_FILE
 }
 
@@ -165,10 +177,10 @@ function main {
                 TESTS_FILE="${i#*=}"
                 shift
                 ;;
-            build)
-                build $TESTS_FILE
-                shift
-                ;;
+            # build)
+            #     build $TESTS_FILE
+            #     shift
+            #     ;;
             cmake_x86)
                 cmake_x86
                 shift
diff --git a/paddle/fluid/lite/utils/CMakeLists.txt b/paddle/fluid/lite/utils/CMakeLists.txt
index bd161555f08..08eeaa54f8e 100644
--- a/paddle/fluid/lite/utils/CMakeLists.txt
+++ b/paddle/fluid/lite/utils/CMakeLists.txt
@@ -8,4 +8,4 @@ set(utils_DEPS glog)
 
 lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite)
 cc_library(any_lite SRCS any.cc)
-cc_library(utils_lite SRCS cp_logging.cc DEPS ${utils_DEPS} any_lite)
+cc_library(utils_lite SRCS cp_logging.cc string.cc DEPS ${utils_DEPS} any_lite)
diff --git a/paddle/fluid/lite/utils/string.cc b/paddle/fluid/lite/utils/string.cc
new file mode 100644
index 00000000000..c608c31fb9f
--- /dev/null
+++ b/paddle/fluid/lite/utils/string.cc
@@ -0,0 +1,19 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/lite/utils/string.h"
+
+namespace paddle {
+namespace lite {}  // namespace lite
+}  // namespace paddle
diff --git a/paddle/fluid/lite/utils/string.h b/paddle/fluid/lite/utils/string.h
new file mode 100644
index 00000000000..3e06f93a252
--- /dev/null
+++ b/paddle/fluid/lite/utils/string.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <stdarg.h>  // For va_start, etc.
+#include <algorithm>
+#include <cstring>
+#include <memory>  // For std::unique_ptr
+#include <sstream>
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace lite {
+
+static std::string string_format(const std::string fmt_str, ...) {
+  /* Reserve two times as much as the length of the fmt_str */
+  int final_n, n = (static_cast<int>(fmt_str.size())) * 2;
+  std::unique_ptr<char[]> formatted;
+  va_list ap;
+  while (1) {
+    formatted.reset(
+        new char[n]); /* Wrap the plain char array into the unique_ptr */
+    std::strcpy(&formatted[0], fmt_str.c_str());  // NOLINT
+    va_start(ap, fmt_str);
+    final_n = vsnprintf(&formatted[0], n, fmt_str.c_str(), ap);
+    va_end(ap);
+    if (final_n < 0 || final_n >= n)
+      n += abs(final_n - n + 1);
+    else
+      break;
+  }
+  return std::string(formatted.get());
+}
+
+template <typename T>
+static std::string to_string_with_precision(const T& v, const int n = 6) {
+  std::stringstream ss;
+  ss.precision(n);
+  ss << std::fixed << v;
+  return ss.str();
+}
+
+static std::string Join(const std::vector<std::string>& vec,
+                        const std::string& delim) {
+  if (vec.empty()) return "";
+
+  std::stringstream ss;
+  for (size_t i = 0; i < vec.size() - 1; i++) ss << vec[i] << delim;
+  if (!vec.empty()) {
+    ss << vec.back();
+  }
+  return ss.str();
+}
+
+static std::string Repr(const std::string& x) { return "\"" + x + "\""; }
+
+static std::string Repr(const std::vector<std::string>& v) {
+  std::vector<std::string> tmp;
+  std::transform(v.begin(), v.end(), std::back_inserter(tmp),
+                 [](const std::string& x) { return Repr(x); });
+  return "{" + Join(tmp, ",") + "}";
+}
+
+}  // namespace lite
+}  // namespace paddle
-- 
GitLab