diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h
index ba2d784b942c04c169a19d4747352d9048fd6ff2..915a469a58765f102ff01c28ed9856d185311168 100644
--- a/paddle/fluid/lite/api/cxx_api.h
+++ b/paddle/fluid/lite/api/cxx_api.h
@@ -50,6 +50,7 @@ class ExecutorLite {
     optimizer_.KernelPickPreferPlace(prefer_place);
     core::KernelPickFactor factor;
     factor.ConsiderTarget();
+    factor.ConsiderPrecision();
     optimizer_.Run(std::move(program), valid_places, factor);
     program_ = optimizer_.GenRuntimeProgram();
   }
diff --git a/paddle/fluid/lite/arm/math/CMakeLists.txt b/paddle/fluid/lite/arm/math/CMakeLists.txt
index dd439bbf0f6e23b721c1f61fb5e39d821b79fb26..32f367f703e6cdf1484a2bf2e53edcf38f879357 100644
--- a/paddle/fluid/lite/arm/math/CMakeLists.txt
+++ b/paddle/fluid/lite/arm/math/CMakeLists.txt
@@ -35,6 +35,8 @@ cc_library(math_arm SRCS
     split.cc
     activation.cc
     dropout.cc
+    gemm_prepacked_int8.cc
+    gemv_arm_int8.cc
     DEPS ${lite_kernel_deps} eigen3 framework_proto_lite)
   # TODO(TJ): fix me do not deps proto
  
diff --git a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
index b4778aab182abf368461984bbfb9ef827b6c0fb9..29ff767e772cdd63149c965107d1c448788dc9db 100644
--- a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
+++ b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
@@ -25,7 +25,7 @@ namespace fusion {
 
 /* The model trained by fluid quantization is a simulation of real int8.
  * The quantized Ops(conv2d, mul, depthwise conv2d etc) have fake_quantop
- * in front  and fake_dequantop behind.
+ * in front and fake_dequantop behind.
  *
  * When in int8 mode, the pattern like "fake_quant + quantized_op +
  * fake_dequant"
diff --git a/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc b/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
index 9f0b2e1f3225d708f0e71c255bad2eec71628f76..322ddb29064de5eb8771f50508d20ba9ba7f053c 100644
--- a/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
+++ b/paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
@@ -41,7 +41,7 @@ void FuseBase::DeleteInterNodes(SSAGraph *graph) {
     }
   }
 
-  LOG(INFO) << "keys: " << key2nodes_.size();
+  VLOG(4) << "keys: " << key2nodes_.size();
   std::unordered_set<const Node *> nodes2rm;
   for (auto &matched : key2nodes_) {
     for (const auto &key : keys) {
diff --git a/paddle/fluid/lite/core/op_registry.h b/paddle/fluid/lite/core/op_registry.h
index 1052419ecda8bcad8d919c0d8f8e2ab3f969440f..fc4cd25fa56eec295c522857a67e17315ed49ba8 100644
--- a/paddle/fluid/lite/core/op_registry.h
+++ b/paddle/fluid/lite/core/op_registry.h
@@ -80,6 +80,8 @@ class KernelRegistry final {
               KernelRegistryForTarget<TARGET(kARM), PRECISION(kAny),
                                       DATALAYOUT(kAny)> *,  //
               KernelRegistryForTarget<TARGET(kARM), PRECISION(kFloat),
+                                      DATALAYOUT(kNCHW)> *,  //
+              KernelRegistryForTarget<TARGET(kARM), PRECISION(kInt8),
                                       DATALAYOUT(kNCHW)> *  //
               >;
 
diff --git a/paddle/fluid/lite/core/optimizer.h b/paddle/fluid/lite/core/optimizer.h
index ea65329b668c89405ca43f55121f2ca1790539c0..c42699ff10a6e9e926693c46b38f3cd6343a4dd0 100644
--- a/paddle/fluid/lite/core/optimizer.h
+++ b/paddle/fluid/lite/core/optimizer.h
@@ -58,7 +58,6 @@ class Optimizer {
 #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
           "lite_elementwise_add_activation_fuse_pass",  //
 #endif
-          "lite_fc_fuse_pass",              //
           "static_kernel_pick_pass",        //
           "variable_place_inference_pass",  //
           "argument_type_display_pass",     //
diff --git a/paddle/fluid/lite/core/target_wrapper.h b/paddle/fluid/lite/core/target_wrapper.h
index c4a870ab83f0c61fc4a5116f8c3dd379e8ead9db..66fbc652203dc4045aeae5eca87df856e76febbe 100644
--- a/paddle/fluid/lite/core/target_wrapper.h
+++ b/paddle/fluid/lite/core/target_wrapper.h
@@ -38,6 +38,7 @@ enum class PrecisionType : int {
   kUnk = 0,
   kFloat,
   kInt8,
+  kInt32,
   kAny,  // any precision
   NUM,   // number of fields.
 };
@@ -48,6 +49,19 @@ enum class DataLayoutType : int {
   NUM,   // number of fields.
 };
 
+static size_t PrecisionTypeLength(PrecisionType type) {
+  switch (type) {
+    case PrecisionType::kFloat:
+      return 4;
+    case PrecisionType::kInt8:
+      return 1;
+    case PrecisionType::kInt32:
+      return 4;
+    default:
+      return 4;
+  }
+}
+
 // Some helper macro to get a specific TargetType.
 #define TARGET(item__) paddle::lite::TargetType::item__
 // Some helper macro to get a specific PrecisionType.
@@ -87,7 +101,7 @@ static const std::string& TargetRepr(TargetType target) {
 
 static const std::string& PrecisionRepr(PrecisionType precision) {
   static const std::string precision2string[] = {"kUnk", "kFloat", "kInt8",
-                                                 "kAny"};
+                                                 "kInt32", "kAny"};
   auto x = static_cast<int>(precision);
   CHECK_LT(x, static_cast<int>(PRECISION(NUM)));
   return precision2string[x];
diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.cc b/paddle/fluid/lite/kernels/arm/conv_compute.cc
index 5e9ddb6271684120c8cab68e6e10bade3a3ab015..af8f8e1242a32f58727ad1658b7db2cefbc1b653 100644
--- a/paddle/fluid/lite/kernels/arm/conv_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/conv_compute.cc
@@ -92,6 +92,9 @@ void ConvCompute::Run() {
   // }
 }
 
+void ConvComputeInt8::PrepareForRun() {}
+void ConvComputeInt8::Run() {}
+
 }  // namespace arm
 }  // namespace kernels
 }  // namespace lite
@@ -112,3 +115,23 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
     .BindInput("Filter", {LiteType::GetTensorTy(TARGET(kARM))})
     .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kARM))})
     .Finalize();
+
+REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW,
+                     paddle::lite::kernels::arm::ConvComputeInt8, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
+    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
+    .BindInput("Filter",
+               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
+    .BindOutput("Output",
+                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
+    .Finalize();
+
+REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kInt8, kNCHW,
+                     paddle::lite::kernels::arm::ConvComputeInt8, def)
+    .BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
+    .BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
+    .BindInput("Filter",
+               {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
+    .BindOutput("Output",
+                {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt8))})
+    .Finalize();
diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.h b/paddle/fluid/lite/kernels/arm/conv_compute.h
index 21fabf8c3e8f7983a891265135c39b96aaf42e8d..e5d5721a3b30256bd14a165400723cc4563cd942 100644
--- a/paddle/fluid/lite/kernels/arm/conv_compute.h
+++ b/paddle/fluid/lite/kernels/arm/conv_compute.h
@@ -41,6 +41,25 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
       nullptr};
 };
 
+class ConvComputeInt8 : public KernelLite<TARGET(kARM), PRECISION(kInt8)> {
+ public:
+  using param_t = operators::ConvParam;
+
+  void PrepareForRun() override;
+
+  void Run() override;
+
+  ~ConvComputeInt8() {
+    if (impl_ != nullptr) {
+      delete impl_;
+    }
+  }
+
+ private:
+  lite::arm::math::ImplBase<TARGET(kARM), PRECISION(kInt8), param_t>* impl_{
+      nullptr};
+};
+
 }  // namespace arm
 }  // namespace kernels
 }  // namespace lite