From 4a948cfc5ed6f4b4a367d04b34106f3877e3785b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?=
 <39303645+Shixiaowei02@users.noreply.github.com>
Date: Thu, 19 Sep 2019 22:29:13 +0800
Subject: [PATCH] add full_api_static target and fix building errors,
 test=develop (#2064)

* add full_api_static target and fix building errors, test=develop

* fix build errors, test=develop

* fix code style, test=develop

* fix lite/model_parser/pb/var_desc.cc, test=develop

* fix building errors, test=develop

* modify lite/tools/debug/CMakeLists.txt, test=develop
---
 CMakeLists.txt                                |  1 +
 cmake/generic.cmake                           |  5 +++--
 cmake/lite.cmake                              |  6 +++---
 lite/api/CMakeLists.txt                       |  2 ++
 lite/api/benchmark.cc                         |  3 ---
 lite/api/cxx_api_impl.cc                      |  4 ++++
 lite/api/model_test.cc                        |  3 ---
 lite/backends/cuda/math/cudnn_conv.cc         |  1 -
 lite/backends/cuda/math/cudnn_conv.h          | 18 +++++++++--------
 lite/backends/x86/dynamic_loader.cc           | 20 ++++++++++---------
 lite/backends/x86/math/beam_search.cc         |  9 ++++++---
 .../x86/math/detail/activation_functions.h    |  1 +
 lite/backends/x86/math/tree2col.cc            |  3 ++-
 lite/core/arena/framework.h                   |  9 +++++----
 lite/core/context.h                           |  4 ++--
 lite/core/device_info.h                       |  4 ++--
 lite/core/mir/pass_utils.cc                   |  2 +-
 .../mir/subgraph/subgraph_program_pass.cc     |  6 ++++--
 .../subgraph/subgraph_program_pass_test.cc    |  1 -
 lite/core/mir/type_layout_cast_pass.cc        |  1 -
 lite/core/mir/type_target_cast_pass.cc        |  1 -
 lite/core/op_registry.h                       |  2 +-
 lite/fluid/data_type.cc                       |  6 +++++-
 lite/fluid/selected_rows.cc                   |  4 ++--
 lite/fluid/selected_rows.h                    |  2 +-
 lite/kernels/cuda/calib_compute_cuda_test.cc  |  2 --
 lite/kernels/cuda/conv_compute_test.cc        |  3 ---
 .../cuda/elementwise_add_compute_test.cc      |  2 --
 lite/kernels/cuda/leaky_relu_compute_test.cc  |  1 -
 .../cuda/nearest_interp_compute_test.cc       |  3 ---
 lite/kernels/cuda/transpose_compute_test.cc   |  5 -----
 lite/kernels/cuda/yolo_box_compute_test.cc    |  2 --
 lite/kernels/x86/activation_compute.h         |  1 +
 .../naive_buffer/naive_buffer_wrapper_test.cc |  2 +-
 lite/model_parser/naive_buffer/op_desc.h      |  1 +
 lite/model_parser/naive_buffer/param_desc.cc  |  1 +
 lite/model_parser/naive_buffer/var_desc.cc    |  1 +
 lite/model_parser/pb/op_desc.h                |  1 +
 lite/model_parser/pb/var_desc.cc              |  1 +
 lite/operators/gru_unit_op.cc                 |  1 -
 lite/operators/im2sequence_op.cc              |  1 -
 lite/operators/is_empty_op.cc                 |  2 +-
 lite/operators/range_op.cc                    |  2 +-
 lite/operators/yolo_box_op.cc                 |  1 -
 .../kernels/affine_channel_compute_test.cc    |  2 --
 lite/tests/kernels/box_coder_compute_test.cc  |  6 ------
 lite/tests/kernels/cast_compute_test.cc       |  8 --------
 .../kernels/conv2d_transpose_compute_test.cc  |  1 -
 .../tests/kernels/elementwise_compute_test.cc |  8 --------
 lite/tests/kernels/fc_compute_test.cc         |  2 +-
 lite/tests/kernels/gru_unit_test.cc           |  2 +-
 lite/tests/kernels/lrn_compute_test.cc        |  1 -
 lite/tests/kernels/matmul_compute_test.cc     |  6 +++---
 lite/tests/kernels/pad2d_compute_test.cc      |  2 +-
 lite/tests/kernels/prior_box_compute_test.cc  |  1 -
 lite/tests/kernels/reduce_max_compute_test.cc |  4 ++--
 .../tests/kernels/reduce_mean_compute_test.cc |  4 ++--
 .../kernels/sequence_expand_compute_test.cc   |  6 ++----
 .../kernels/sequence_pool_compute_test.cc     |  2 +-
 lite/tests/math/gemm_int8_compute_test.cc     | 20 +++++++++----------
 lite/tools/ci_build.sh                        |  2 +-
 lite/tools/debug/CMakeLists.txt               |  5 ++++-
 lite/tools/debug/model_debug_tool.cc          | 12 ++++++++---
 lite/utils/logging.h                          |  4 ++++
 lite/utils/paddle_enforce.h                   |  2 +-
 65 files changed, 117 insertions(+), 134 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3643379acb..9834de8f96 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,7 @@ include(generic)            # simplify cmake module
 include(ccache)             # set ccache for compilation
 include(util)               # set unittest and link libs
 include(version)            # set PADDLE_VERSION
+include(flags)
 
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
 set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index c7f12914a0..1bf77867b0 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -105,8 +105,8 @@ set_property(GLOBAL PROPERTY FLUID_MODULES "")
 function(find_fluid_modules TARGET_NAME)
   get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
   string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
-  string(FIND "${__target_path}" "fluid" pos)
-  if(pos GREATER 1)
+  string(FIND "${__target_path}" "lite" pos)
+  if((pos GREATER 0) OR (pos EQUAL 0))
     get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
     set(fluid_modules ${fluid_modules} ${TARGET_NAME})
     set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
@@ -369,6 +369,7 @@ function(cc_binary TARGET_NAME)
   endif()
   get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
   target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
+  find_fluid_modules(${TARGET_NAME})
 endfunction(cc_binary)
 
 function(cc_test TARGET_NAME)
diff --git a/cmake/lite.cmake b/cmake/lite.cmake
index 18aa0f5f82..9427fffab7 100644
--- a/cmake/lite.cmake
+++ b/cmake/lite.cmake
@@ -126,12 +126,12 @@ function(lite_cc_library TARGET)
             )
 
     if (args_SHARED OR ARGS_shared)
-        cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS} SHARED)
+        cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} SHARED)
     elseif (args_MODULE OR ARGS_module)
         add_library(${TARGET} MODULE ${args_SRCS})
         add_dependencies(${TARGET} ${deps} ${args_DEPS})
     else()
-        cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
+        cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
     endif()
     target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
 
@@ -163,7 +163,7 @@ function(lite_cc_binary TARGET)
             LIGHT_DEPS ${args_LIGHT_DEPS}
             HVY_DEPS ${args_HVY_DEPS}
             )
-    cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
+    cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
     target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
     if (NOT APPLE)
         # strip binary target to reduce size
diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt
index 32ab388c86..97a6fb3072 100644
--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -210,6 +210,8 @@ if (NOT LITE_ON_TINY_PUBLISH)
       FPGA_DEPS ${fpga_kernels})
     # The final inference library for just MobileConfig.
     bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
+    get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
+    cc_library(api_full_static SRCS DEPS paddle_api_full cxx_api paddle_api light_api  ${cxx_api_deps} ${ops} ${host_kernels} ${cuda_kernels} program tensor memory naive_buffer types ${fluid_modules} protobuf)
 endif()
 bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api)
 #-----------------------------------------------------------------------------------------------------
diff --git a/lite/api/benchmark.cc b/lite/api/benchmark.cc
index ca7bfe7fe6..e525484c19 100644
--- a/lite/api/benchmark.cc
+++ b/lite/api/benchmark.cc
@@ -18,9 +18,6 @@
 #include <string>
 #include <vector>
 #include "lite/api/paddle_api.h"
-#include "lite/api/paddle_use_kernels.h"
-#include "lite/api/paddle_use_ops.h"
-#include "lite/api/paddle_use_passes.h"
 #include "lite/api/test_helper.h"
 #include "lite/core/device_info.h"
 #include "lite/utils/cp_logging.h"
diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc
index 459c8f34f1..c5aa0a00a5 100644
--- a/lite/api/cxx_api_impl.cc
+++ b/lite/api/cxx_api_impl.cc
@@ -15,6 +15,7 @@
 #include "lite/api/cxx_api.h"
 #include <string>
 #include "lite/api/paddle_api.h"
+#include "lite/core/device_info.h"
 #include "lite/core/version.h"
 
 namespace paddle {
@@ -49,6 +50,9 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
 CxxPaddleApiImpl::CxxPaddleApiImpl() {}
 
 void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
+#ifdef LITE_WITH_CUDA
+  Env<TARGET(kCUDA)>::Init();
+#endif
   auto places = config.valid_places();
   places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
   raw_predictor_.Build(config, places);
diff --git a/lite/api/model_test.cc b/lite/api/model_test.cc
index 6e0a249a81..825d4f51ed 100644
--- a/lite/api/model_test.cc
+++ b/lite/api/model_test.cc
@@ -16,9 +16,6 @@
 #include <string>
 #include <vector>
 #include "lite/api/paddle_api.h"
-#include "lite/api/paddle_use_kernels.h"
-#include "lite/api/paddle_use_ops.h"
-#include "lite/api/paddle_use_passes.h"
 #include "lite/api/test_helper.h"
 #include "lite/core/device_info.h"
 #include "lite/utils/cp_logging.h"
diff --git a/lite/backends/cuda/math/cudnn_conv.cc b/lite/backends/cuda/math/cudnn_conv.cc
index ec7fac3187..f401500f94 100644
--- a/lite/backends/cuda/math/cudnn_conv.cc
+++ b/lite/backends/cuda/math/cudnn_conv.cc
@@ -286,7 +286,6 @@ bool CudnnConv2DInt8<Ptype_out>::create(const operators::ConvParam& param,
     }
   }
   this->scale_.Resize({oc});
-  auto* scale_data = this->scale_.template mutable_data<float>(TARGET(kCUDA));
   this->scale_.template Assign<float, lite::DDim, TARGET(kCUDA)>(
       weight_scale.data(), this->scale_.dims());
 
diff --git a/lite/backends/cuda/math/cudnn_conv.h b/lite/backends/cuda/math/cudnn_conv.h
index 03612a5e5a..96f1a949b7 100644
--- a/lite/backends/cuda/math/cudnn_conv.h
+++ b/lite/backends/cuda/math/cudnn_conv.h
@@ -32,17 +32,17 @@ class CudnnConv2DBase {
  public:
   CudnnConv2DBase()
       : handle_(NULL),
-        workspace_data_(NULL),
-        workspace_(NULL),
-        conv_desc_(NULL),
+        fwd_algo_((cudnnConvolutionFwdAlgo_t)0),
         input_desc_(NULL),
         output_desc_(NULL),
+        bias_desc_(NULL),
         filter_desc_(NULL),
+        conv_desc_(NULL),
         act_desc_(NULL),
-        bias_desc_(NULL),
+        workspace_data_(NULL),
+        workspace_(NULL),
         workspace_fwd_sizes_(0),
-        workspace_size_inbytes_(0),
-        fwd_algo_((cudnnConvolutionFwdAlgo_t)0) {}
+        workspace_size_inbytes_(0) {}
 
   ~CudnnConv2DBase() {
     if (conv_desc_) {
@@ -85,10 +85,10 @@ class CudnnConv2DBase {
   cudnnActivationDescriptor_t act_desc_;
   bool with_relu_act_{true};
 
+  void* workspace_data_;  // underlying storage
+  void* workspace_;       // aliases into _workspaceData
   size_t workspace_fwd_sizes_;
   size_t workspace_size_inbytes_;  // size of underlying storage
-  void* workspace_data_;           // underlying storage
-  void* workspace_;                // aliases into _workspaceData
 
   const bool use_tensor_core_ = true;
   const size_t workspace_limit_bytes_ = 4 * 1024 * 1024;
@@ -104,6 +104,7 @@ template <PrecisionType Ptype_out>
 class CudnnConv2D : public CudnnConv2DBase<Ptype_out> {
  public:
   CudnnConv2D() : CudnnConv2DBase<Ptype_out>() {}
+  virtual ~CudnnConv2D() = default;
   virtual bool init(const operators::ConvParam& param,
                     Context<TARGET(kCUDA)>* ctx);
 
@@ -117,6 +118,7 @@ template <PrecisionType Ptype_out>
 class CudnnConv2DInt8 : CudnnConv2DBase<Ptype_out> {
  public:
   CudnnConv2DInt8() : CudnnConv2DBase<Ptype_out>() {}
+  virtual ~CudnnConv2DInt8() = default;
   virtual bool init(const operators::ConvParam& param,
                     Context<TARGET(kCUDA)>* ctx);
 
diff --git a/lite/backends/x86/dynamic_loader.cc b/lite/backends/x86/dynamic_loader.cc
index 0f27a19cf5..75bb528f38 100644
--- a/lite/backends/x86/dynamic_loader.cc
+++ b/lite/backends/x86/dynamic_loader.cc
@@ -153,16 +153,18 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
       dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags);
     }
   }
-  auto error_msg =
-      "Failed to find dynamic library: %s ( %s ) \n Please specify "
-      "its path correctly using following ways: \n Method. set "
-      "environment variable LD_LIBRARY_PATH on Linux or "
-      "DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
-      "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
-      "using the DYLD_LIBRARY_PATH is impossible unless System "
-      "Integrity Protection (SIP) is disabled.";
+/*
+auto error_msg =
+    "Failed to find dynamic library: %s ( %s ) \n Please specify "
+    "its path correctly using following ways: \n Method. set "
+    "environment variable LD_LIBRARY_PATH on Linux or "
+    "DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
+    "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
+    "using the DYLD_LIBRARY_PATH is impossible unless System "
+    "Integrity Protection (SIP) is disabled.";
+*/
 #if !defined(_WIN32)
-  auto errorno = dlerror();
+// auto errorno = dlerror();
 #else
   auto errorno = GetLastError();
 #endif  // !_WIN32
diff --git a/lite/backends/x86/math/beam_search.cc b/lite/backends/x86/math/beam_search.cc
index 93726afcc2..bbe35b4de5 100644
--- a/lite/backends/x86/math/beam_search.cc
+++ b/lite/backends/x86/math/beam_search.cc
@@ -49,6 +49,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
                                         end_id,
                                         is_accumulated);
     auto selected_items = ToMap(items, high_level.back());
+    /*
     if (FLAGS_v == 3) {
       VLOG(3) << "selected_items:";
       for (size_t i = 0; i < selected_items.size(); ++i) {
@@ -58,6 +59,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
         }
       }
     }
+    */
 
     PruneEndBeams(pre_ids, abs_lod, &selected_items, level, end_id);
     // calculate the output tensor's height
@@ -69,7 +71,8 @@ class BeamSearchFunctor<TARGET(kX86), T> {
     // the output tensor shape should be [num_instances, 1]
     // auto dims = framework::make_ddim(
     //     std::vector<int64_t>({static_cast<int>(num_instances), 1}));
-    lite::DDim dims(std::vector<int64_t>({num_instances, 1L}));
+    lite::DDim dims(
+        std::vector<int64_t>({static_cast<int>(num_instances), 1L}));
 
     selected_ids->Resize(dims);
     auto *selected_ids_data = selected_ids->mutable_data<int64_t>(TARGET(kX86));
@@ -296,7 +299,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
 
       result.emplace_back(top_beam);
     }
-
+    /*
     if (FLAGS_v == 3) {
       VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
       for (auto &items : result) {
@@ -306,7 +309,7 @@ class BeamSearchFunctor<TARGET(kX86), T> {
         }
       }
     }
-
+    */
     return result;
   }
 };
diff --git a/lite/backends/x86/math/detail/activation_functions.h b/lite/backends/x86/math/detail/activation_functions.h
index d12b1594d0..6a13a3d471 100644
--- a/lite/backends/x86/math/detail/activation_functions.h
+++ b/lite/backends/x86/math/detail/activation_functions.h
@@ -48,6 +48,7 @@ inline ActivationType GetActivationType(const std::string &type) {
   LOG(ERROR) << "Not support type " << type;
   // PADDLE_ENFORCE(false, "Not support type %s", type);
   // PADDLE_THROW("Not support type %s.", type);
+  return ActivationType();
 }
 
 namespace forward {
diff --git a/lite/backends/x86/math/tree2col.cc b/lite/backends/x86/math/tree2col.cc
index 8a34bebef0..20b9133313 100644
--- a/lite/backends/x86/math/tree2col.cc
+++ b/lite/backends/x86/math/tree2col.cc
@@ -107,7 +107,8 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> {
     //    patch->mutable_data<T>({static_cast<int64_t>(patch_size),
     //                            static_cast<int64_t>(patch_elem_size)},
     //                           cpu_place);
-    patch->Resize({static_cast<int64_t>(patch_size, patch_elem_size)});
+    patch->Resize({static_cast<int64_t>(patch_size),
+                   static_cast<int64_t>(patch_elem_size)});
     auto *patch_data = patch->mutable_data<T>(lite::TargetType::kX86);
     constant(context, patch, 0);
     const T *features = node_features.data<T>();
diff --git a/lite/core/arena/framework.h b/lite/core/arena/framework.h
index d044260b58..412ac0c167 100644
--- a/lite/core/arena/framework.h
+++ b/lite/core/arena/framework.h
@@ -42,6 +42,7 @@ class TestCase {
       : place_(place), scope_(new Scope), alias_(alias) {
     ctx_ = ContextScheduler::Global().NewContext(place_.target);
   }
+  virtual ~TestCase() {}
 
   void Prepare() {
     PrepareScopes();
@@ -138,20 +139,18 @@ class TestCase {
   }
 
  private:
+  Place place_;
   std::shared_ptr<Scope> scope_;
+  std::string alias_;
   // The workspace for the Instruction.
   Scope* inst_scope_{};
   // The workspace for the baseline implementation.
   Scope* base_scope_{};
   std::unique_ptr<cpp::OpDesc> op_desc_;
   std::unique_ptr<Instruction> instruction_;
-  Place place_;
-  std::string alias_;
 };
 
 class Arena {
-  float abs_error_{};
-
  public:
   Arena(std::unique_ptr<TestCase>&& tester,
         const Place& place,
@@ -203,12 +202,14 @@ class Arena {
 
       default:
         LOG(FATAL) << "not support type " << PrecisionToStr(type->precision());
+        return false;
     }
   }
 
  private:
   std::unique_ptr<TestCase> tester_;
   Place place_;
+  float abs_error_;
 };
 
 template <typename T>
diff --git a/lite/core/context.h b/lite/core/context.h
index bac0e3a627..c00d65229f 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -192,10 +192,10 @@ class Context<TargetType::kCUDA> {
     ctx->cublas_fp32_ = cublas_fp32_;
   }
 
-  const cudaStream_t exec_stream() { return exec_stream_; }
+  const cudaStream_t& exec_stream() const { return exec_stream_; }
   void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; }
 
-  const cudaStream_t io_stream() { return io_stream_; }
+  const cudaStream_t& io_stream() const { return io_stream_; }
   void SetIoStream(cudaStream_t stream) { io_stream_ = stream; }
 
   std::shared_ptr<cuda::Blas<float>> cublas_fp32() { return cublas_fp32_; }
diff --git a/lite/core/device_info.h b/lite/core/device_info.h
index 96f4680135..26954341e3 100644
--- a/lite/core/device_info.h
+++ b/lite/core/device_info.h
@@ -167,7 +167,7 @@ class Device<TARGET(kCUDA)> {
 
   int id() { return idx_; }
   int max_stream() { return max_stream_; }
-  int SetId(int idx) { idx_ = idx; }
+  void SetId(int idx) { idx_ = idx; }
   std::string name() { return device_prop_.name; }
   int core_num() { return device_prop_.multiProcessorCount; }
   float max_memory() { return device_prop_.totalGlobalMem / 1048576.; }
@@ -186,8 +186,8 @@ class Device<TARGET(kCUDA)> {
   void GetInfo();
 
  private:
-  int max_stream_;
   int idx_{0};
+  int max_stream_;
   cudaDeviceProp device_prop_;
   std::string device_name_;
   float max_memory_;
diff --git a/lite/core/mir/pass_utils.cc b/lite/core/mir/pass_utils.cc
index b67f5e4bd1..804d4e1b5b 100644
--- a/lite/core/mir/pass_utils.cc
+++ b/lite/core/mir/pass_utils.cc
@@ -46,7 +46,7 @@ std::set<T> Types<T>::ValidSet(const T& element) const {
   return std::set<T>({element});
 }
 
-bool ExpandPlaces(std::set<Place>* places, const Place& place) {
+void ExpandPlaces(std::set<Place>* places, const Place& place) {
   static const Types<TargetType> target_set({TARGET(kHost),
                                              TARGET(kX86),
                                              TARGET(kCUDA),
diff --git a/lite/core/mir/subgraph/subgraph_program_pass.cc b/lite/core/mir/subgraph/subgraph_program_pass.cc
index a3d95163ce..1ff9a1f1a1 100644
--- a/lite/core/mir/subgraph/subgraph_program_pass.cc
+++ b/lite/core/mir/subgraph/subgraph_program_pass.cc
@@ -278,19 +278,21 @@ int SubgraphProgramPass::FuseSubgraphID(
     const std::unique_ptr<SSAGraph>& graph) {
   int sub_id = 1;  // id start from 1 not 0
   for (auto& item : graph->StmtTopologicalOrder()) {
-    bool inputvar = 0;
+    // bool inputvar = false;
     if (!item->IsStmt()) continue;
     auto& stmt = item->AsStmt();
+    /*
     if (stmt.subgraph_id() == -1) {
       for (auto& i : item->outlinks) {
         for (auto& j : i->outlinks) {
           if (j->IsStmt()) {
             auto& jstmt = j->AsStmt();
-            if (jstmt.subgraph_id() == 0) inputvar = 1;
+            if (jstmt.subgraph_id() == 0) inputvar = true;
           }
         }
       }
     }
+    */
     if (stmt.subgraph_id() != 0) continue;
     ChangeAllOutConnectedID(item, sub_id);
     sub_id++;
diff --git a/lite/core/mir/subgraph/subgraph_program_pass_test.cc b/lite/core/mir/subgraph/subgraph_program_pass_test.cc
index de4acec91d..625c9ac924 100644
--- a/lite/core/mir/subgraph/subgraph_program_pass_test.cc
+++ b/lite/core/mir/subgraph/subgraph_program_pass_test.cc
@@ -214,7 +214,6 @@ TEST(SubGraphTest, SimpleNet) {
   auto* pass = new mir::subgraph::SubgraphProgramPass;
   ASSERT_EQ(pass->FuseSubgraph(graph, supported_op_types), 1);
 
-  const int num_nodes = graph->nodes().size();
   ASSERT_EQ(graph->nodes().size(), 9);
   // LOG(INFO) << "After NPU Pass \n" << Visualize(graph.get());
 }
diff --git a/lite/core/mir/type_layout_cast_pass.cc b/lite/core/mir/type_layout_cast_pass.cc
index 57523a0274..3019adb6a6 100644
--- a/lite/core/mir/type_layout_cast_pass.cc
+++ b/lite/core/mir/type_layout_cast_pass.cc
@@ -110,7 +110,6 @@ void TypeLayoutTransformPass::AddLayoutInst(
   bool is_found = false;
   for (auto& kernel : kernels) {
     const Type* in_arg_ty = kernel->GetInputDeclType("Input");
-    const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
     if (TypeCompatible(*in_arg_ty, from)) {
       is_found = true;
       selected_kernels.emplace_back(std::move(kernel));
diff --git a/lite/core/mir/type_target_cast_pass.cc b/lite/core/mir/type_target_cast_pass.cc
index 0141a488e4..c874db7f27 100644
--- a/lite/core/mir/type_target_cast_pass.cc
+++ b/lite/core/mir/type_target_cast_pass.cc
@@ -114,7 +114,6 @@ void TypeTargetTransformPass::AddIoCopyInst(
   std::vector<std::unique_ptr<KernelBase>> selected_kernels;
   for (auto& kernel : kernels) {
     const Type* in_arg_ty = kernel->GetInputDeclType("Input");
-    const Type* out_arg_ty = kernel->GetOutputDeclType("Out");
     if (TypeCompatible(*in_arg_ty, from)) {
       is_found = true;
       selected_kernels.emplace_back(std::move(kernel));
diff --git a/lite/core/op_registry.h b/lite/core/op_registry.h
index d2c0637b0b..948fa3a6d4 100644
--- a/lite/core/op_registry.h
+++ b/lite/core/op_registry.h
@@ -209,7 +209,7 @@ class KernelRegistry final {
     ss << "Count of kernel kinds: ";
     int count = 0;
     for (auto &item : kernel_info_map_) {
-      for (auto &kernel : item.second) ++count;
+      count += item.second.size();
     }
     ss << count << "\n";
 
diff --git a/lite/fluid/data_type.cc b/lite/fluid/data_type.cc
index aa8971499f..d33a77c4bf 100644
--- a/lite/fluid/data_type.cc
+++ b/lite/fluid/data_type.cc
@@ -68,6 +68,7 @@ framework::proto::VarType::Type ToDataType(std::type_index type) {
     return it->second;
   }
   PADDLE_THROW("Not support %s as tensor type", type.name());
+  return static_cast<framework::proto::VarType::Type>(-1);
 }
 
 std::type_index ToTypeIndex(framework::proto::VarType::Type type) {
@@ -77,6 +78,7 @@ std::type_index ToTypeIndex(framework::proto::VarType::Type type) {
   }
   PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type",
                static_cast<int>(type));
+  return std::type_index(typeid(void));
 }
 
 std::string DataTypeToString(const framework::proto::VarType::Type type) {
@@ -86,6 +88,7 @@ std::string DataTypeToString(const framework::proto::VarType::Type type) {
   }
   PADDLE_THROW("Not support framework::proto::VarType::Type(%d) as tensor type",
                static_cast<int>(type));
+  return std::string();
 }
 
 size_t SizeOfType(framework::proto::VarType::Type type) {
@@ -93,7 +96,8 @@ size_t SizeOfType(framework::proto::VarType::Type type) {
   if (it != gDataTypeMap().proto_to_size_.end()) {
     return it->second;
   }
-  PADDLE_THROW("Not support %s as tensor type", DataTypeToString(type));
+  PADDLE_THROW("Not support %s as tensor type", DataTypeToString(type).c_str());
+  return 0;
 }
 
 }  // namespace fluid
diff --git a/lite/fluid/selected_rows.cc b/lite/fluid/selected_rows.cc
index 18221d498d..98e9325ca2 100644
--- a/lite/fluid/selected_rows.cc
+++ b/lite/fluid/selected_rows.cc
@@ -163,7 +163,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key,
   if (iter == id_to_index_.end()) {
     rwlock_->UNLock();
     if (!auto_grown) {
-      PADDLE_THROW("key %d not found", key);
+      PADDLE_THROW("key %ld not found", key);
     }
     rwlock_->WRLock();
     auto map_size = id_to_index_.size();
@@ -171,7 +171,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key,
     if (map_size != vector_size) {
       rwlock_->UNLock();
       PADDLE_THROW(
-          "id_to_index_ size %d should have the same size with rows_ %d",
+          "id_to_index_ size %lu should have the same size with rows_ %lu",
           map_size,
           vector_size);
     }
diff --git a/lite/fluid/selected_rows.h b/lite/fluid/selected_rows.h
index 16f7cbc178..0624ec2b8d 100644
--- a/lite/fluid/selected_rows.h
+++ b/lite/fluid/selected_rows.h
@@ -82,7 +82,7 @@ class SelectedRows {
   int64_t Index(int64_t key) const {
     auto it = std::find(rows_.begin(), rows_.end(), key);
     if (it == rows_.end()) {
-      PADDLE_THROW("id %s not in table", key);
+      PADDLE_THROW("id %ld not in table", key);
     }
     return static_cast<int64_t>(std::distance(rows_.begin(), it));
   }
diff --git a/lite/kernels/cuda/calib_compute_cuda_test.cc b/lite/kernels/cuda/calib_compute_cuda_test.cc
index 691b52d257..8fefa34328 100644
--- a/lite/kernels/cuda/calib_compute_cuda_test.cc
+++ b/lite/kernels/cuda/calib_compute_cuda_test.cc
@@ -75,7 +75,6 @@ TEST(calib_cuda, int8_to_fp32) {
   output.Resize({n, c, h, w});
   output_cpu.Resize({n, c, h, w});
   // initialize the data of input tensors
-  auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
   for (int i = 0; i < x.dims().production(); i++) {
     float sign = i % 3 == 0 ? -1.0f : 1.0f;
@@ -131,7 +130,6 @@ TEST(calib_cuda, fp32_to_int8) {
   output.Resize({n, c, h, w});
   output_cpu.Resize({n, c, h, w});
   // initialize the data of input tensors
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<float>();
   for (int i = 0; i < x.dims().production(); i++) {
     float sign = i % 3 == 0 ? -1.0f : 1.0f;
diff --git a/lite/kernels/cuda/conv_compute_test.cc b/lite/kernels/cuda/conv_compute_test.cc
index 68b4475399..022e94dd0a 100644
--- a/lite/kernels/cuda/conv_compute_test.cc
+++ b/lite/kernels/cuda/conv_compute_test.cc
@@ -53,7 +53,6 @@ TEST(conv_compute, fp32) {
   y_cpu.Resize({n, c_o, h_o, w_o});
   bias_cpu.Resize({c_o});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
   auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
   float* x_cpu_data = x_cpu.mutable_data<float>();
   float* filter_cpu_data = filter_cpu.mutable_data<float>();
@@ -127,7 +126,6 @@ TEST(conv_compute, int8) {
   y_cpu.Resize({1, 1, 1, c});
   bias_cpu.Resize({c});
 
-  auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
   auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
   auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>();
@@ -194,7 +192,6 @@ TEST(conv_compute, int8_int8_out) {
   y_cpu.Resize({1, 1, 1, c});
   bias_cpu.Resize({c});
 
-  auto* x_data = x.mutable_data<int8_t>(TARGET(kCUDA));
   auto* y_data = y.mutable_data<int8_t>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<int8_t>();
   auto* filter_cpu_data = filter_cpu.mutable_data<int8_t>();
diff --git a/lite/kernels/cuda/elementwise_add_compute_test.cc b/lite/kernels/cuda/elementwise_add_compute_test.cc
index 7ba67e8eb7..ea9998c8d3 100644
--- a/lite/kernels/cuda/elementwise_add_compute_test.cc
+++ b/lite/kernels/cuda/elementwise_add_compute_test.cc
@@ -56,8 +56,6 @@ TEST(elementwise_add, normal) {
   y_ref.Resize({n, c, h, w});
   out_ref.Resize({n, c, h, w});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
-  auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
   auto* out_data = out.mutable_data<float>(TARGET(kCUDA));
 
   auto* x_cpu_data = x_cpu.mutable_data<float>();
diff --git a/lite/kernels/cuda/leaky_relu_compute_test.cc b/lite/kernels/cuda/leaky_relu_compute_test.cc
index 9fb5a5eddf..8ced10ce7d 100644
--- a/lite/kernels/cuda/leaky_relu_compute_test.cc
+++ b/lite/kernels/cuda/leaky_relu_compute_test.cc
@@ -35,7 +35,6 @@ TEST(leaky_relu, normal) {
   x_cpu.Resize({h, w});
   y_cpu.Resize({h, w});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
   auto* y_data = y.mutable_data<float>(TARGET(kCUDA));
   float* x_cpu_data = x_cpu.mutable_data<float>();
   float* y_cpu_data = x_cpu.mutable_data<float>();
diff --git a/lite/kernels/cuda/nearest_interp_compute_test.cc b/lite/kernels/cuda/nearest_interp_compute_test.cc
index 6b98bf143b..85032016d6 100644
--- a/lite/kernels/cuda/nearest_interp_compute_test.cc
+++ b/lite/kernels/cuda/nearest_interp_compute_test.cc
@@ -80,7 +80,6 @@ TEST(nearest_interp, normal) {
   Tensor x_ref, osz_ref, out_ref;
 
   int n = 1, c = 3, in_h = 40, in_w = 40;
-  int in_chw = c * in_h * in_w;
   int out_h = 80, out_w = 80;
   float scale = 2.0;
 
@@ -101,8 +100,6 @@ TEST(nearest_interp, normal) {
   osz_ref.Resize({2});
   out_ref.Resize({n, c, out_h, out_w});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
-  auto* osz_data = osz.mutable_data<float>(TARGET(kCUDA));
   auto* out_data = out.mutable_data<float>(TARGET(kCUDA));
 
   float* x_cpu_data = x_cpu.mutable_data<float>();
diff --git a/lite/kernels/cuda/transpose_compute_test.cc b/lite/kernels/cuda/transpose_compute_test.cc
index d0b4844dcf..517f761b61 100644
--- a/lite/kernels/cuda/transpose_compute_test.cc
+++ b/lite/kernels/cuda/transpose_compute_test.cc
@@ -41,7 +41,6 @@ void nchw2nhwc_ref(lite::Tensor* input,
   int input_c = input->dims()[1];
   int input_h = input->dims()[2];
   int input_w = input->dims()[3];
-  int output_n = output->dims()[0];
   int output_c = output->dims()[1];
   int output_h = output->dims()[2];
   int output_w = output->dims()[3];
@@ -75,7 +74,6 @@ void nhwc2nchw_ref(lite::Tensor* input,
   int input_h = input->dims()[1];
   int input_w = input->dims()[2];
   int input_c = input->dims()[3];
-  int output_n = output->dims()[0];
   int output_h = output->dims()[1];
   int output_w = output->dims()[2];
   int output_c = output->dims()[3];
@@ -145,7 +143,6 @@ TEST(transpose_nchw, normal) {
   x_ref.Resize({N, C, H, W});
   out_ref.Resize({N, H, W, C});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<float>();
   auto* out_cpu_data = out_cpu.mutable_data<float>();
   auto* x_ref_data = x_ref.mutable_data<float>();
@@ -200,7 +197,6 @@ TEST(transpose_nhwc, normal) {
   x_ref.Resize({N, H, W, C});
   out_ref.Resize({N, C, H, W});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<float>();
   auto* out_cpu_data = out_cpu.mutable_data<float>();
   auto* x_ref_data = x_ref.mutable_data<float>();
@@ -253,7 +249,6 @@ TEST(transpose, normal) {
   x_ref.Resize({C, H, W});
   out_ref.Resize({W, C, H});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
   auto* x_cpu_data = x_cpu.mutable_data<float>();
   auto* out_cpu_data = out_cpu.mutable_data<float>();
   auto* x_ref_data = x_ref.mutable_data<float>();
diff --git a/lite/kernels/cuda/yolo_box_compute_test.cc b/lite/kernels/cuda/yolo_box_compute_test.cc
index 5cd9579383..26b890c9f1 100644
--- a/lite/kernels/cuda/yolo_box_compute_test.cc
+++ b/lite/kernels/cuda/yolo_box_compute_test.cc
@@ -180,8 +180,6 @@ TEST(yolo_box, normal) {
   boxes_ref.Resize({n, m, 4});
   scores_ref.Resize({n, cls, m});
 
-  auto* x_data = x.mutable_data<float>(TARGET(kCUDA));
-  auto* sz_data = sz.mutable_data<float>(TARGET(kCUDA));
   auto* boxes_data = boxes.mutable_data<float>(TARGET(kCUDA));
   auto* scores_data = scores.mutable_data<float>(TARGET(kCUDA));
 
diff --git a/lite/kernels/x86/activation_compute.h b/lite/kernels/x86/activation_compute.h
index 105bc70e7a..2775240194 100644
--- a/lite/kernels/x86/activation_compute.h
+++ b/lite/kernels/x86/activation_compute.h
@@ -64,6 +64,7 @@ bool Activate(const lite::Tensor* X, lite::Tensor* Out) {
   auto x = lite::fluid::EigenVector<T>::Flatten(*X);
   auto out = lite::fluid::EigenVector<T>::Flatten(*Out);
   Functor()(place, x, out);
+  return true;
 }
 
 // square(x) = x^2
diff --git a/lite/model_parser/naive_buffer/naive_buffer_wrapper_test.cc b/lite/model_parser/naive_buffer/naive_buffer_wrapper_test.cc
index 45224de122..46fbec1b67 100644
--- a/lite/model_parser/naive_buffer/naive_buffer_wrapper_test.cc
+++ b/lite/model_parser/naive_buffer/naive_buffer_wrapper_test.cc
@@ -293,7 +293,7 @@ TEST(NaiveBufferWrapper, ProgramDesc) {
   // Set ProgramDesc
   nb_desc0.SetVersion(1);
   for (int i = 0; i < 3; ++i) {
-    auto* item = nb_desc0.AddBlock<proto::BlockDesc>();
+    nb_desc0.AddBlock<proto::BlockDesc>();
   }
 
   // Save model
diff --git a/lite/model_parser/naive_buffer/op_desc.h b/lite/model_parser/naive_buffer/op_desc.h
index c292293169..907f33a2a7 100644
--- a/lite/model_parser/naive_buffer/op_desc.h
+++ b/lite/model_parser/naive_buffer/op_desc.h
@@ -130,6 +130,7 @@ class OpDesc : public OpDescAPI {
       DEF_ONE(LONGS);
       default:
         LOG(FATAL) << "Unknown attribute type";
+        return static_cast<AttrType>(-1);
     }
 #undef DEF_ONE
   }
diff --git a/lite/model_parser/naive_buffer/param_desc.cc b/lite/model_parser/naive_buffer/param_desc.cc
index 4d38ca4a8d..d7e2b4caec 100644
--- a/lite/model_parser/naive_buffer/param_desc.cc
+++ b/lite/model_parser/naive_buffer/param_desc.cc
@@ -97,6 +97,7 @@ VarDescAPI::VarDataType ParamDesc::GetDataType() const {
     default:
       LOG(FATAL) << "Unknown var data type";
   }
+  return VarDescAPI::VarDataType();
 #undef GET_DATA_TYPE_CASE_ITEM
 }
 
diff --git a/lite/model_parser/naive_buffer/var_desc.cc b/lite/model_parser/naive_buffer/var_desc.cc
index 2e00199929..cccf758291 100644
--- a/lite/model_parser/naive_buffer/var_desc.cc
+++ b/lite/model_parser/naive_buffer/var_desc.cc
@@ -51,6 +51,7 @@ VarDescAPI::Type VarDesc::GetType() const {
     GET_TYPE_CASE_ITEM(READER);
     default:
       LOG(FATAL) << "Unknown var type";
+      return VarDescAPI::Type();
   }
 #undef GET_TYPE_CASE_ITEM
 }
diff --git a/lite/model_parser/pb/op_desc.h b/lite/model_parser/pb/op_desc.h
index 1a0af22f27..5275875c09 100644
--- a/lite/model_parser/pb/op_desc.h
+++ b/lite/model_parser/pb/op_desc.h
@@ -121,6 +121,7 @@ class OpDesc : public OpDescAPI {
       DEF_ONE(LONGS);
       default:
         LOG(FATAL) << "Unknown attribute type";
+        return static_cast<AttrType>(-1);
     }
 #undef DEF_ONE
   }
diff --git a/lite/model_parser/pb/var_desc.cc b/lite/model_parser/pb/var_desc.cc
index 91800c88b5..517f4cc6dc 100644
--- a/lite/model_parser/pb/var_desc.cc
+++ b/lite/model_parser/pb/var_desc.cc
@@ -39,6 +39,7 @@ VarDescAPI::Type VarDesc::GetType() const {
     GET_TYPE_CASE_ITEM(READER);
     default:
       LOG(FATAL) << "Unknown var type";
+      return VarDescAPI::Type();
   }
 #undef GET_TYPE_CASE_ITEM
 }
diff --git a/lite/operators/gru_unit_op.cc b/lite/operators/gru_unit_op.cc
index b1efd8d048..ed33507fc3 100644
--- a/lite/operators/gru_unit_op.cc
+++ b/lite/operators/gru_unit_op.cc
@@ -32,7 +32,6 @@ bool GRUUnitOpLite::CheckShape() const {
   auto hidden_prev_dims = param_.hidden_prev->dims();
   auto weight_dims = param_.weight->dims();
 
-  int batch_size = input_dims[0];
   int input_size = input_dims[1];
   int frame_size = hidden_prev_dims[1];
   int weight_height = weight_dims[0];
diff --git a/lite/operators/im2sequence_op.cc b/lite/operators/im2sequence_op.cc
index 1cd415bcd5..40ab2106af 100644
--- a/lite/operators/im2sequence_op.cc
+++ b/lite/operators/im2sequence_op.cc
@@ -29,7 +29,6 @@ bool Im2SequenceOp::CheckShape() const { return true; }
 bool Im2SequenceOp::InferShape() const {
   CHECK_OR_FALSE(param_.Out);
   // TODO(Superjomn) Enable data sharing.
-  auto inputs = param_.X;
   auto input_dims = param_.X->dims();
   int img_num = input_dims[0];
   int img_channels = input_dims[1];
diff --git a/lite/operators/is_empty_op.cc b/lite/operators/is_empty_op.cc
index e89c72d414..ed4c69e64e 100644
--- a/lite/operators/is_empty_op.cc
+++ b/lite/operators/is_empty_op.cc
@@ -21,7 +21,7 @@ namespace operators {
 
 bool IsEmptyOp::CheckShape() const { return true; }
 
-bool IsEmptyOp::InferShape() const {}
+bool IsEmptyOp::InferShape() const { return true; }
 
 bool IsEmptyOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
   param_.X =
diff --git a/lite/operators/range_op.cc b/lite/operators/range_op.cc
index ccfbe18719..a179d8ffe7 100644
--- a/lite/operators/range_op.cc
+++ b/lite/operators/range_op.cc
@@ -33,7 +33,7 @@ template <typename T>
 void GetSize(T start, T end, T step, int64_t* size) {
   CHECK(!std::equal_to<T>()(step, 0))
       << "The step of range op should not be 0.";
-  CHECK(((start < end) && (step > 0)) || (start > end) && (step < 0))
+  CHECK(((start < end) && (step > 0)) || ((start > end) && (step < 0)))
       << "The step should be greater than 0 while start < end. And the "
          "step should be less than 0 while start > end.";
   *size = std::is_integral<T>::value
diff --git a/lite/operators/yolo_box_op.cc b/lite/operators/yolo_box_op.cc
index de2ce77dfb..c8186d3f31 100644
--- a/lite/operators/yolo_box_op.cc
+++ b/lite/operators/yolo_box_op.cc
@@ -48,7 +48,6 @@ bool YoloBoxOp::CheckShape() const {
 
 bool YoloBoxOp::InferShape() const {
   auto* X = param_.X;
-  auto* ImgSize = param_.ImgSize;
   auto anchors = param_.anchors;
   int anchor_num = anchors.size() / 2;
   auto class_num = param_.class_num;
diff --git a/lite/tests/kernels/affine_channel_compute_test.cc b/lite/tests/kernels/affine_channel_compute_test.cc
index 0e0c044e56..9fac0d9379 100644
--- a/lite/tests/kernels/affine_channel_compute_test.cc
+++ b/lite/tests/kernels/affine_channel_compute_test.cc
@@ -64,8 +64,6 @@ class AffineChannelComputeTester : public arena::TestCase {
 
     if (data_layout_ == "NCHW") {
       int channel = x_dims_[1];
-      int height = x_dims_[2];
-      int width = x_dims_[3];
       int size = x_dims_[2] * x_dims_[3];
       int in_channel = channel * size;
       for (int n = 0; n < num; n++) {
diff --git a/lite/tests/kernels/box_coder_compute_test.cc b/lite/tests/kernels/box_coder_compute_test.cc
index f3f9b7e0ab..9a833db31d 100644
--- a/lite/tests/kernels/box_coder_compute_test.cc
+++ b/lite/tests/kernels/box_coder_compute_test.cc
@@ -121,16 +121,10 @@ class BoxCoderComputeTester : public arena::TestCase {
     auto* output_box = scope->NewTensor(output_box_);
     CHECK(output_box);
     output_box->Resize(target_box_dims_);
-    auto* output_box_data = output_box->mutable_data<float>();
 
     auto* prior_box = scope->FindTensor(prior_box_);
-    const auto* prior_box_data = prior_box->data<float>();
-
     auto* prior_box_var = scope->FindTensor(prior_box_var_);
-    const auto* prior_box_var_data = prior_box_var->data<float>();
-
     auto* target_box = scope->FindTensor(target_box_);
-    const auto* target_box_data = target_box->data<float>();
 
     box_coder_ref(output_box,
                   prior_box,
diff --git a/lite/tests/kernels/cast_compute_test.cc b/lite/tests/kernels/cast_compute_test.cc
index a5611b71f4..db69d866c9 100644
--- a/lite/tests/kernels/cast_compute_test.cc
+++ b/lite/tests/kernels/cast_compute_test.cc
@@ -45,10 +45,6 @@ class CastComputeTester : public arena::TestCase {
       auto* output_data = out->mutable_data<float>();
       auto* x = scope->FindTensor(input_);
       auto* x_data = x->data<char>();
-      int num = x_dims_[0];
-      int channel = x_dims_[1];
-      int size = x_dims_[2] * x_dims_[3];
-      int in_channel = channel * size;
       auto* output_data_tmp = output_data;
       auto* x_data_tmp = x_data;
       for (int i = 0; i < x_dims_.production(); i++) {
@@ -60,10 +56,6 @@ class CastComputeTester : public arena::TestCase {
       auto* output_data = out->mutable_data<float>();
       auto* x = scope->FindTensor(input_);
       auto* x_data = x->data<int32_t>();
-      int num = x_dims_[0];
-      int channel = x_dims_[1];
-      int size = x_dims_[2] * x_dims_[3];
-      int in_channel = channel * size;
       auto* output_data_tmp = output_data;
       auto* x_data_tmp = x_data;
       for (int i = 0; i < x_dims_.production(); i++) {
diff --git a/lite/tests/kernels/conv2d_transpose_compute_test.cc b/lite/tests/kernels/conv2d_transpose_compute_test.cc
index c44259022d..a287f0bb66 100644
--- a/lite/tests/kernels/conv2d_transpose_compute_test.cc
+++ b/lite/tests/kernels/conv2d_transpose_compute_test.cc
@@ -190,7 +190,6 @@ bool deconv_basic(const Dtype1* din,
   auto* workspace_ptr = workspace_tensor.mutable_data<Dtype2>();
 
   int group_size_in = win * hin * chin / group;
-  int group_size_out = wout * hout * chout / group;
   int group_size_coldata = m * n;
   int group_size_weights = chin * chout * kernel_w * kernel_h / (group * group);
   bool flag_1x1s1p1 = (kernel_w == 1) && (kernel_h == 1) && (stride_h == 1) &&
diff --git a/lite/tests/kernels/elementwise_compute_test.cc b/lite/tests/kernels/elementwise_compute_test.cc
index 90f7d02362..635f6e7c08 100644
--- a/lite/tests/kernels/elementwise_compute_test.cc
+++ b/lite/tests/kernels/elementwise_compute_test.cc
@@ -43,7 +43,6 @@ class ElementwiseComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -94,7 +93,6 @@ class ElementwiseSubComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -145,7 +143,6 @@ class ElementwiseMulComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -196,7 +193,6 @@ class ElementwiseMaxComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -249,7 +245,6 @@ class FusionElementwiseAddActivationComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -308,7 +303,6 @@ class FusionElementwiseSubActivationComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -367,7 +361,6 @@ class FusionElementwiseMulActivationComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
@@ -426,7 +419,6 @@ class FusionElementwiseMaxActivationComputeTester : public arena::TestCase {
 
     auto* x = scope->FindTensor(inputx_);
     const auto* x_data = x->data<float>();
-    auto* y = scope->FindTensor(inputy_);
     const auto* y_data = x->data<float>();
 
     for (int i = 0; i < dims_.production(); i++) {
diff --git a/lite/tests/kernels/fc_compute_test.cc b/lite/tests/kernels/fc_compute_test.cc
index 4611ec812f..ef5baa8185 100644
--- a/lite/tests/kernels/fc_compute_test.cc
+++ b/lite/tests/kernels/fc_compute_test.cc
@@ -51,10 +51,10 @@ class FcOPTest : public arena::TestCase {
   std::string weight_ = "w";
   std::string bias_ = "b";
   std::string out_ = "out";
-  int in_num_col_dims_{1};
   DDim dims_{{1, 128}};
   DDim wdims_{{128, 4}};
   DDim bdims_{{4}};
+  int in_num_col_dims_{1};
 
  public:
   FcOPTest(const Place& place,
diff --git a/lite/tests/kernels/gru_unit_test.cc b/lite/tests/kernels/gru_unit_test.cc
index 2cd2ad14bc..98ce7ebc19 100644
--- a/lite/tests/kernels/gru_unit_test.cc
+++ b/lite/tests/kernels/gru_unit_test.cc
@@ -243,11 +243,11 @@ class GRUUnitTester : public arena::TestCase {
   std::string reset_hidden_prev_ = "reset_hidden_prev";
   std::string hidden_ = "hidden";
 
-  DDim dims_{{16, 256 * 3}};
   // 0: indentity; 1: sigmoid; 2: tanh; 3: relu
   int gate_activation_{1};
   int activation_{2};
   bool origin_mode_{false};
+  DDim dims_{{16, 256 * 3}};
 
  public:
   GRUUnitTester(const Place& place,
diff --git a/lite/tests/kernels/lrn_compute_test.cc b/lite/tests/kernels/lrn_compute_test.cc
index cd0931fcc5..9ee43c5c60 100644
--- a/lite/tests/kernels/lrn_compute_test.cc
+++ b/lite/tests/kernels/lrn_compute_test.cc
@@ -123,7 +123,6 @@ class LrnComputeTester : public arena::TestCase {
     int H = dims_[2];
     int W = dims_[3];
 
-    int pre_pad = (local_size_ - 1) / 2;
     int offset_num = 0;
     int offset_within_channel = 0;
     int dst_id;
diff --git a/lite/tests/kernels/matmul_compute_test.cc b/lite/tests/kernels/matmul_compute_test.cc
index 8b70f59d47..4915614b34 100644
--- a/lite/tests/kernels/matmul_compute_test.cc
+++ b/lite/tests/kernels/matmul_compute_test.cc
@@ -120,12 +120,12 @@ class MatMulComputeTester : public arena::TestCase {
   // common attributes for this op.
   std::string x_ = "X";
   std::string y_ = "Y";
-  std::string out_ = "Out";
-  DDim x_dims_;
-  DDim y_dims_;
   bool x_transpose_;
   bool y_transpose_;
   float alpha_;
+  std::string out_ = "Out";
+  DDim x_dims_;
+  DDim y_dims_;
 
  public:
   MatMulComputeTester(const Place& place,
diff --git a/lite/tests/kernels/pad2d_compute_test.cc b/lite/tests/kernels/pad2d_compute_test.cc
index 78afbd97ae..818e7d2e3b 100644
--- a/lite/tests/kernels/pad2d_compute_test.cc
+++ b/lite/tests/kernels/pad2d_compute_test.cc
@@ -26,8 +26,8 @@ class Pad2dComputeTester : public arena::TestCase {
   std::string input_ = "X";
   std::string output_ = "Out";
   DDim dims_{{1, 1, 14, 14}};
-  std::vector<int> paddings_;
   std::string mode_{"constant"};
+  std::vector<int> paddings_;
   float pad_value_ = 0.f;
   std::string data_format_{"NCHW"};
 
diff --git a/lite/tests/kernels/prior_box_compute_test.cc b/lite/tests/kernels/prior_box_compute_test.cc
index 47f7bc9447..73fd612c3a 100644
--- a/lite/tests/kernels/prior_box_compute_test.cc
+++ b/lite/tests/kernels/prior_box_compute_test.cc
@@ -125,7 +125,6 @@ void prior_box_compute_ref(const lite::Tensor* input,
       if (fixed_size_.size() > 0) {
         for (int s = 0; s < fixed_size_.size(); ++s) {
           int fixed_size = fixed_size_[s];
-          int com_idx = 0;
           box_width = fixed_size;
           box_height = fixed_size;
 
diff --git a/lite/tests/kernels/reduce_max_compute_test.cc b/lite/tests/kernels/reduce_max_compute_test.cc
index 2a1116d65f..a6d66846d5 100644
--- a/lite/tests/kernels/reduce_max_compute_test.cc
+++ b/lite/tests/kernels/reduce_max_compute_test.cc
@@ -28,7 +28,7 @@ void reduce_n(const float* src,
               int width_in) {
   int hw_size = height_in * width_in;
   int chw_size = channel_in * hw_size;
-  int data_index, src_index, src_index0;
+  int data_index, src_index;
   for (int c = 0; c < channel_in; ++c) {
     for (int h = 0; h < height_in; ++h) {
       for (int w = 0; w < width_in; ++w) {
@@ -196,9 +196,9 @@ class ReduceMaxComputeTester : public arena::TestCase {
   std::string input_ = "x";
   std::string output_ = "out";
   std::vector<int> dim_{0};
-  DDim x_dims_{{3, 2, 3, 4}};
   bool keep_dim_ = false;
   bool reduce_all_ = false;
+  DDim x_dims_{{3, 2, 3, 4}};
 
  public:
   ReduceMaxComputeTester(const Place& place,
diff --git a/lite/tests/kernels/reduce_mean_compute_test.cc b/lite/tests/kernels/reduce_mean_compute_test.cc
index cda273239d..23f97fbb77 100644
--- a/lite/tests/kernels/reduce_mean_compute_test.cc
+++ b/lite/tests/kernels/reduce_mean_compute_test.cc
@@ -28,7 +28,7 @@ void reduce_mean_n(const float* src,
                    int width_in) {
   int hw_size = height_in * width_in;
   int chw_size = channel_in * hw_size;
-  int data_index, src_index, src_index0;
+  int data_index, src_index;
   for (int c = 0; c < channel_in; ++c) {
     for (int h = 0; h < height_in; ++h) {
       for (int w = 0; w < width_in; ++w) {
@@ -195,8 +195,8 @@ class ReduceMeanComputeTester : public arena::TestCase {
   std::string input_ = "x";
   std::string output_ = "out";
   std::vector<int> dim_{0};
-  DDim x_dims_{{3, 2, 3, 4}};
   bool keep_dim_ = false;
+  DDim x_dims_{{3, 2, 3, 4}};
   bool reduce_all_ = false;
 
  public:
diff --git a/lite/tests/kernels/sequence_expand_compute_test.cc b/lite/tests/kernels/sequence_expand_compute_test.cc
index c110f52793..05d8149796 100644
--- a/lite/tests/kernels/sequence_expand_compute_test.cc
+++ b/lite/tests/kernels/sequence_expand_compute_test.cc
@@ -25,10 +25,10 @@ class SequenceExpandComputeTester : public arena::TestCase {
   const std::string input_x_ = "x";
   const std::string input_y_ = "y";
   const std::string output_ = "out";
-  int ref_level_ = -1;
-  DDim dims_{{4, 1}};
   LoD lod_x_{{0, 2, 4}};
   LoD lod_y_{{0, 1, 4}};
+  int ref_level_ = -1;
+  DDim dims_{{4, 1}};
 
  public:
   SequenceExpandComputeTester(const Place& place,
@@ -50,7 +50,6 @@ class SequenceExpandComputeTester : public arena::TestCase {
     const auto* x_data = x->data<float>();
     (x->mutable_lod())->clear();
     (x->mutable_lod())->push_back(lod_x_[0]);
-    int x_rank = dims_.size();
     auto width = x->numel() / dims_[0];
     auto lod_x = x->lod();
 
@@ -59,7 +58,6 @@ class SequenceExpandComputeTester : public arena::TestCase {
     for (int i = 0; i < lod_y_.size(); i++) {
       (y->mutable_lod())->push_back(lod_y_[i]);
     }
-    const auto* y_data = y->data<float>();
     if (ref_level_ == -1) {
       ref_level_ = lod_y_.size() - 1;
     }
diff --git a/lite/tests/kernels/sequence_pool_compute_test.cc b/lite/tests/kernels/sequence_pool_compute_test.cc
index 717b468721..f987fb2802 100644
--- a/lite/tests/kernels/sequence_pool_compute_test.cc
+++ b/lite/tests/kernels/sequence_pool_compute_test.cc
@@ -25,9 +25,9 @@ class SequencePoolComputeTester : public arena::TestCase {
   // common attributes for this op.
   std::string input_ = "x";
   std::string output_ = "out";
-  DDim dims_{{5, 1}};
   LoD lod_{{0, 2, 5}};
   std::string pool_type_ = "SUM";
+  DDim dims_{{5, 1}};
 
  public:
   SequencePoolComputeTester(const Place& place,
diff --git a/lite/tests/math/gemm_int8_compute_test.cc b/lite/tests/math/gemm_int8_compute_test.cc
index 1b38d921f6..02bf8dbd43 100644
--- a/lite/tests/math/gemm_int8_compute_test.cc
+++ b/lite/tests/math/gemm_int8_compute_test.cc
@@ -60,10 +60,6 @@ bool test_gemm_int8(bool tra,
   Tensor tc_basic_fp32;
   Tensor tbias;
 
-  int lda = tra ? m : k;
-  int ldb = trb ? k : n;
-  int ldc = n;
-
   ta.Resize({m, k});
   tb.Resize({k, n});
   tc_int8.Resize({m, n});
@@ -94,6 +90,16 @@ bool test_gemm_int8(bool tra,
     scale_merge_int8[j] = scale_merge_fp32[j] / scale_c[0];
   }
 
+  LOG(INFO) << "gemm_int8 M: " << m << ", N: " << n << ", K: " << k
+            << ", transA: " << (tra ? "true" : "false")
+            << ", transB: " << (trb ? "true" : "false")
+            << ", relu: " << (has_relu ? "true" : "false")
+            << ", bias: " << (has_bias ? "true" : "false");
+#ifdef LITE_WITH_ARM
+  int lda = tra ? m : k;
+  int ldb = trb ? k : n;
+  int ldc = n;
+
   auto da = ta.mutable_data<int8_t>();
   auto db = tb.mutable_data<int8_t>();
   auto dc_int8 = tc_int8.mutable_data<int8_t>();
@@ -102,12 +108,6 @@ bool test_gemm_int8(bool tra,
   auto dc_basic_fp32 = tc_basic_fp32.mutable_data<float>();
   auto dbias = tbias.mutable_data<float>();
 
-  LOG(INFO) << "gemm_int8 M: " << m << ", N: " << n << ", K: " << k
-            << ", transA: " << (tra ? "true" : "false")
-            << ", transB: " << (trb ? "true" : "false")
-            << ", relu: " << (has_relu ? "true" : "false")
-            << ", bias: " << (has_bias ? "true" : "false");
-#ifdef LITE_WITH_ARM
   if (FLAGS_check_result) {
     Tensor ta_fp32;
     Tensor tb_fp32;
diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh
index 7e9c5068d5..17ec0fdccd 100755
--- a/lite/tools/ci_build.sh
+++ b/lite/tools/ci_build.sh
@@ -42,7 +42,7 @@ function prepare_workspace {
     cp ../${DEBUG_TOOL_PATH_PREFIX}/analysis_tool.py ./${DEBUG_TOOL_PATH_PREFIX}/
 
     # clone submodule
-    #git submodule update --init --recursive
+    # git submodule update --init --recursive
     prepare_thirdparty
 }
 
diff --git a/lite/tools/debug/CMakeLists.txt b/lite/tools/debug/CMakeLists.txt
index b26fd1545a..ae098b05a6 100644
--- a/lite/tools/debug/CMakeLists.txt
+++ b/lite/tools/debug/CMakeLists.txt
@@ -1,15 +1,18 @@
 lite_cc_library(debug_utils SRCS debug_utils.cc DEPS op_params model_parser)
 
-lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
+if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK OR LITE_ON_MODEL_OPTIMIZE_TOOL)
+  lite_cc_binary(lite_model_debug_tool SRCS model_debug_tool.cc
     DEPS
     cxx_api
     debug_utils
     target_wrapper_host
     mir_passes
     gflags
+    logging
     ${ops} ${host_kernels}
     X86_DEPS ${x86_kernels}
     ARM_DEPS ${arm_kernels}
     NPU_DEPS ${npu_kernels}
     FPGA_DEPS ${fpga_kernels}
     CL_DEPS ${opencl_kernels})
+endif()
diff --git a/lite/tools/debug/model_debug_tool.cc b/lite/tools/debug/model_debug_tool.cc
index a2ff37895c..30f35ca7fc 100644
--- a/lite/tools/debug/model_debug_tool.cc
+++ b/lite/tools/debug/model_debug_tool.cc
@@ -16,9 +16,6 @@
 #include <string>
 #include <vector>
 #include "lite/api/cxx_api.h"
-#include "lite/api/paddle_use_kernels.h"
-#include "lite/api/paddle_use_ops.h"
-#include "lite/api/paddle_use_passes.h"
 #include "lite/core/op_registry.h"
 #include "lite/model_parser/model_parser.h"
 #include "lite/model_parser/pb/program_desc.h"
@@ -47,6 +44,9 @@ void Run(DebugConfig* conf) {
 #endif
 #ifdef LITE_WITH_FPGA
       Place{TARGET(kFPGA), PRECISION(kFloat)},
+#endif
+#ifdef LITE_WITH_CUDA
+      Place{TARGET(kCUDA), PRECISION(kFloat)},
 #endif
   });
 
@@ -68,6 +68,12 @@ void Run(DebugConfig* conf) {
 #endif
 #ifdef LITE_WITH_X86
                   Place{TARGET(kX86), PRECISION(kFloat)},
+#endif
+#ifdef LITE_WITH_FPGA
+                  Place{TARGET(kFPGA), PRECISION(kFloat)},
+#endif
+#ifdef LITE_WITH_CUDA
+                  Place{TARGET(kCUDA), PRECISION(kFloat)},
 #endif
                   valid_places,
                   passes);
diff --git a/lite/utils/logging.h b/lite/utils/logging.h
index 85c716d52f..e85753ec30 100644
--- a/lite/utils/logging.h
+++ b/lite/utils/logging.h
@@ -18,6 +18,9 @@
  */
 #pragma once
 
+#ifndef _LOGGING_H_
+#define _LOGGING_H_
+
 #include <assert.h>
 #include <sys/time.h>
 #include <sys/types.h>
@@ -183,3 +186,4 @@ class VoidifyFatal : public Voidify {
 
 }  // namespace lite
 }  // namespace paddle
+#endif
diff --git a/lite/utils/paddle_enforce.h b/lite/utils/paddle_enforce.h
index 8317f45a0c..82534af996 100644
--- a/lite/utils/paddle_enforce.h
+++ b/lite/utils/paddle_enforce.h
@@ -35,5 +35,5 @@
   CHECK_GT((a), (b)) << paddle::lite::string_format("" __VA_ARGS__);
 
 #ifndef PADDLE_THROW
-#define PADDLE_THROW
+#define PADDLE_THROW(...) printf("" __VA_ARGS__);
 #endif
-- 
GitLab