diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt
index de1a76c9c391102b8d7a1d113164f45beb913e6e..84f8a09860edea1bd0f29a0e7a726b1011a02ffd 100644
--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -277,7 +277,7 @@ if (LITE_ON_MODEL_OPTIMIZE_TOOL)
     message(STATUS "Compiling model_optimize_tool")
     lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc
         DEPS gflags kernel op optimizer mir_passes utils)
-    add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc)
+    add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc supported_kernel_op_info_h)
 endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
 
 lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
diff --git a/lite/api/cxx_api.cc b/lite/api/cxx_api.cc
index 990d08f18f541088d797510e9dbd4881d42b164f..c1e9fc422450adf96d62c68d622907bd7e15b405 100644
--- a/lite/api/cxx_api.cc
+++ b/lite/api/cxx_api.cc
@@ -201,7 +201,11 @@ void Predictor::Build(const lite_api::CxxConfig &config,
   const std::string &model_file = config.model_file();
   const std::string &param_file = config.param_file();
   const bool model_from_memory = config.model_from_memory();
-  LOG(INFO) << "load from memory " << model_from_memory;
+  if (model_from_memory) {
+    LOG(INFO) << "Load model from memory.";
+  } else {
+    LOG(INFO) << "Load model from file.";
+  }
 
   Build(model_path,
         model_file,
diff --git a/lite/api/model_optimize_tool.cc b/lite/api/model_optimize_tool.cc
index b678c7ecd24c5ffbf3e9e3531264ac195c6a7325..fc23e0b54be41bff5b7b65b4e58908546b186bb4 100644
--- a/lite/api/model_optimize_tool.cc
+++ b/lite/api/model_optimize_tool.cc
@@ -16,8 +16,9 @@
 #ifdef PADDLE_WITH_TESTING
 #include <gtest/gtest.h>
 #endif
-// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during
-// model_optimize_tool's compiling period
+// "supported_kernel_op_info.h", "all_kernel_faked.cc" and "kernel_src_map.h"
+// are created automatically during model_optimize_tool's compiling period
+#include <iomanip>
 #include "all_kernel_faked.cc"  // NOLINT
 #include "kernel_src_map.h"     // NOLINT
 #include "lite/api/cxx_api.h"
@@ -25,8 +26,11 @@
 #include "lite/api/paddle_use_ops.h"
 #include "lite/api/paddle_use_passes.h"
 #include "lite/core/op_registry.h"
+#include "lite/model_parser/compatible_pb.h"
+#include "lite/model_parser/pb/program_desc.h"
 #include "lite/utils/cp_logging.h"
 #include "lite/utils/string.h"
+#include "supported_kernel_op_info.h"  // NOLINT
 
 DEFINE_string(model_dir,
               "",
@@ -62,10 +66,16 @@ DEFINE_string(valid_targets,
               "The targets this model optimized for, should be one of (arm, "
               "opencl, x86), splitted by space");
 DEFINE_bool(prefer_int8_kernel, false, "Prefer to run model with int8 kernels");
+DEFINE_bool(print_supported_ops,
+            false,
+            "Print supported operators on the inputed target");
+DEFINE_bool(print_all_ops,
+            false,
+            "Print all the valid operators of Paddle-Lite");
+DEFINE_bool(print_model_ops, false, "Print operators in the input model");
 
 namespace paddle {
 namespace lite_api {
-
 //! Display the kernel information.
 void DisplayKernels() {
   LOG(INFO) << ::paddle::lite::KernelRegistry::Global().DebugString();
@@ -130,9 +140,7 @@ void RunOptimize(const std::string& model_dir,
   config.set_model_dir(model_dir);
   config.set_model_file(model_file);
   config.set_param_file(param_file);
-
   config.set_valid_places(valid_places);
-
   auto predictor = lite_api::CreatePaddlePredictor(config);
 
   LiteModelType model_type;
@@ -168,6 +176,202 @@ void CollectModelMetaInfo(const std::string& output_dir,
   lite::WriteLines(std::vector<std::string>(total.begin(), total.end()),
                    output_path);
 }
+void PrintOpsInfo(std::set<std::string> valid_ops = {}) {
+  std::vector<std::string> targets = {"kHost",
+                                      "kX86",
+                                      "kCUDA",
+                                      "kARM",
+                                      "kOpenCL",
+                                      "kFPGA",
+                                      "kNPU",
+                                      "kXPU",
+                                      "kAny",
+                                      "kUnk"};
+  int maximum_optype_length = 0;
+  for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) {
+    maximum_optype_length = it->first.size() > maximum_optype_length
+                                ? it->first.size()
+                                : maximum_optype_length;
+  }
+  std::cout << std::setiosflags(std::ios::internal);
+  std::cout << std::setw(maximum_optype_length) << "OP_name";
+  for (int i = 0; i < targets.size(); i++) {
+    std::cout << std::setw(10) << targets[i].substr(1);
+  }
+  std::cout << std::endl;
+  if (valid_ops.empty()) {
+    for (auto it = supported_ops.begin(); it != supported_ops.end(); it++) {
+      std::cout << std::setw(maximum_optype_length) << it->first;
+      auto ops_valid_places = it->second;
+      for (int i = 0; i < targets.size(); i++) {
+        if (std::find(ops_valid_places.begin(),
+                      ops_valid_places.end(),
+                      targets[i]) != ops_valid_places.end()) {
+          std::cout << std::setw(10) << "Y";
+        } else {
+          std::cout << std::setw(10) << " ";
+        }
+      }
+      std::cout << std::endl;
+    }
+  } else {
+    for (auto op = valid_ops.begin(); op != valid_ops.end(); op++) {
+      std::cout << std::setw(maximum_optype_length) << *op;
+      // Check: If this kernel doesn't match any operator, we will skip it.
+      if (supported_ops.find(*op) == supported_ops.end()) {
+        continue;
+      }
+      // Print OP info.
+      auto ops_valid_places = supported_ops.at(*op);
+      for (int i = 0; i < targets.size(); i++) {
+        if (std::find(ops_valid_places.begin(),
+                      ops_valid_places.end(),
+                      targets[i]) != ops_valid_places.end()) {
+          std::cout << std::setw(10) << "Y";
+        } else {
+          std::cout << std::setw(10) << " ";
+        }
+      }
+      std::cout << std::endl;
+    }
+  }
+}
+/// Print help information
+void PrintHelpInfo() {
+  // at least one argument should be inputed
+  const char help_info[] =
+      "At least one argument should be inputed. Valid arguments are listed "
+      "below:\n"
+      "  Arguments of model optimization:\n"
+      "        `--model_dir=<model_param_dir>`\n"
+      "        `--model_file=<model_path>`\n"
+      "        `--param_file=<param_path>`\n"
+      "        `--optimize_out_type=(protobuf|naive_buffer)`\n"
+      "        `--optimize_out=<output_optimize_model_dir>`\n"
+      "        `--valid_targets=(arm|opencl|x86|npu|xpu)`\n"
+      "        `--prefer_int8_kernel=(true|false)`\n"
+      "        `--record_tailoring_info=(true|false)`\n"
+      "  Arguments of model checking and ops information:\n"
+      "        `--print_all_ops=true`   Display all the valid operators of "
+      "Paddle-Lite\n"
+      "        `--print_supported_ops=true  "
+      "--valid_targets=(arm|opencl|x86|npu|xpu)`"
+      "  Display valid operators of input targets\n"
+      "        `--print_model_ops=true  --model_dir=<model_param_dir> "
+      "--valid_targets=(arm|opencl|x86|npu|xpu)`"
+      "  Display operators in the input model\n";
+  std::cout << help_info << std::endl;
+  exit(1);
+}
+
+// Parse Input command
+void ParseInputCommand() {
+  if (FLAGS_print_all_ops) {
+    std::cout << "All OPs supported by Paddle-Lite: " << supported_ops.size()
+              << " ops in total." << std::endl;
+    PrintOpsInfo();
+    exit(1);
+  } else if (FLAGS_print_supported_ops) {
+    auto valid_places = paddle::lite_api::ParserValidPlaces();
+    // get valid_targets string
+    std::vector<TargetType> target_types = {};
+    for (int i = 0; i < valid_places.size(); i++) {
+      target_types.push_back(valid_places[i].target);
+    }
+    std::string targets_str = TargetToStr(target_types[0]);
+    for (int i = 1; i < target_types.size(); i++) {
+      targets_str = targets_str + TargetToStr(target_types[i]);
+    }
+
+    std::cout << "Supported OPs on '" << targets_str << "': " << std::endl;
+    target_types.push_back(TARGET(kHost));
+    target_types.push_back(TARGET(kUnk));
+
+    std::set<std::string> valid_ops;
+    for (int i = 0; i < target_types.size(); i++) {
+      auto ops = supported_ops_target[static_cast<int>(target_types[i])];
+      valid_ops.insert(ops.begin(), ops.end());
+    }
+    PrintOpsInfo(valid_ops);
+    exit(1);
+  }
+}
+// test whether this model is supported
+void CheckIfModelSupported() {
+  // 1. parse valid places and valid targets
+  auto valid_places = paddle::lite_api::ParserValidPlaces();
+  // set valid_ops
+  auto valid_ops = supported_ops_target[static_cast<int>(TARGET(kHost))];
+  auto valid_unktype_ops = supported_ops_target[static_cast<int>(TARGET(kUnk))];
+  valid_ops.insert(
+      valid_ops.end(), valid_unktype_ops.begin(), valid_unktype_ops.end());
+  for (int i = 0; i < valid_places.size(); i++) {
+    auto target = valid_places[i].target;
+    auto ops = supported_ops_target[static_cast<int>(target)];
+    valid_ops.insert(valid_ops.end(), ops.begin(), ops.end());
+  }
+  // get valid ops
+  std::set<std::string> valid_ops_set(valid_ops.begin(), valid_ops.end());
+
+  // 2.Load model into program to get ops in model
+  std::string prog_path = FLAGS_model_dir + "/__model__";
+  if (!FLAGS_model_file.empty() && !FLAGS_param_file.empty()) {
+    prog_path = FLAGS_model_file;
+  }
+  lite::cpp::ProgramDesc cpp_prog;
+  framework::proto::ProgramDesc pb_proto_prog =
+      *lite::LoadProgram(prog_path, false);
+  lite::pb::ProgramDesc pb_prog(&pb_proto_prog);
+  // Transform to cpp::ProgramDesc
+  lite::TransformProgramDescAnyToCpp(pb_prog, &cpp_prog);
+
+  std::set<std::string> unsupported_ops;
+  std::set<std::string> input_model_ops;
+  for (int index = 0; index < cpp_prog.BlocksSize(); index++) {
+    auto current_block = cpp_prog.GetBlock<lite::cpp::BlockDesc>(index);
+    for (size_t i = 0; i < current_block->OpsSize(); ++i) {
+      auto& op_desc = *current_block->GetOp<lite::cpp::OpDesc>(i);
+      auto op_type = op_desc.Type();
+      input_model_ops.insert(op_type);
+      if (valid_ops_set.count(op_type) == 0) {
+        unsupported_ops.insert(op_type);
+      }
+    }
+  }
+  // 3. Print ops_info of input model and check if this model is supported
+  if (FLAGS_print_model_ops) {
+    std::cout << "OPs in the input model include:\n";
+    PrintOpsInfo(input_model_ops);
+  }
+  if (!unsupported_ops.empty()) {
+    std::string unsupported_ops_str = *unsupported_ops.begin();
+    for (auto op_str = ++unsupported_ops.begin();
+         op_str != unsupported_ops.end();
+         op_str++) {
+      unsupported_ops_str = unsupported_ops_str + ", " + *op_str;
+    }
+    std::vector<TargetType> targets = {};
+    for (int i = 0; i < valid_places.size(); i++) {
+      targets.push_back(valid_places[i].target);
+    }
+    std::sort(targets.begin(), targets.end());
+    targets.erase(unique(targets.begin(), targets.end()), targets.end());
+    std::string targets_str = TargetToStr(targets[0]);
+    for (int i = 1; i < targets.size(); i++) {
+      targets_str = targets_str + "," + TargetToStr(targets[i]);
+    }
+
+    LOG(ERROR) << "Error: This model is not supported, because "
+               << unsupported_ops.size() << " ops are not supported on '"
+               << targets_str << "'. These unsupported ops are: '"
+               << unsupported_ops_str << "'.";
+    exit(1);
+  }
+  if (FLAGS_print_model_ops) {
+    std::cout << "Paddle-Lite supports this model!" << std::endl;
+    exit(1);
+  }
+}
 
 void Main() {
   if (FLAGS_display_kernels) {
@@ -241,7 +445,13 @@ void Main() {
 }  // namespace paddle
 
 int main(int argc, char** argv) {
+  // If there is none input argument, print help info.
+  if (argc < 2) {
+    paddle::lite_api::PrintHelpInfo();
+  }
   google::ParseCommandLineFlags(&argc, &argv, false);
+  paddle::lite_api::ParseInputCommand();
+  paddle::lite_api::CheckIfModelSupported();
   paddle::lite_api::Main();
   return 0;
 }
diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt
index 34d9deff6a5262c16c2f74301771b73479f3ae30..8fda0a12fd3a66e27acba91af58fa67b3c9cb348 100644
--- a/lite/core/CMakeLists.txt
+++ b/lite/core/CMakeLists.txt
@@ -95,7 +95,15 @@ add_custom_command(
 add_custom_target(op_list_h DEPENDS ops.h)
 add_custom_target(kernel_list_h DEPENDS kernels.h)
 add_custom_target(all_kernel_faked_cc DEPENDS all_kernel_faked.cc)
-
+# create headfile to restore ops info sorted by suppported platforms
+add_custom_command(
+  COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/record_supported_kernel_op.py
+  ${kernels_src_list}
+  ${ops_src_list}
+  ${CMAKE_BINARY_DIR}/supported_kernel_op_info.h
+  OUTPUT supported_kernel_op_info.h # not a real path to the output to force it execute every time.
+  )
+  add_custom_target(supported_kernel_op_info_h DEPENDS supported_kernel_op_info.h)
 #----------------------------------------------- NOT CHANGE -----------------------------------------------
 lite_cc_library(kernel SRCS kernel.cc
         DEPS context type_system target_wrapper any op_params tensor
diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt
index ce8b8365a8c55796772e7fbbe672ead682343a60..74b86c519e44f3aec5f0fbc7f3e2b3aa8d39c554 100644
--- a/lite/kernels/arm/CMakeLists.txt
+++ b/lite/kernels/arm/CMakeLists.txt
@@ -1,6 +1,6 @@
 # NOTE we leave the add_kernel not protected by LITE_WITH_LIGHT_WEIGHT_FRAMEWORK so that all the kernels will be registered
 # to the model_optimize_tool.
-if(NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM))
+if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)))
     return()
 endif()
 
diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt
index bf59d0272611d314dcee41c620bb3f9b3ca08c7e..2df00f00a4eefd8fc6f9bee5e0c9b76656232041 100644
--- a/lite/kernels/cuda/CMakeLists.txt
+++ b/lite/kernels/cuda/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(NOT LITE_WITH_CUDA)
+if((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_CUDA))
     return()
 endif()
 
diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt
index 7c47e72872ecae6216288c20fa1a6ae30fac65bd..f6c3a399490a86e2ac2fcd9cbeb76fca8c8ac479 100755
--- a/lite/kernels/fpga/CMakeLists.txt
+++ b/lite/kernels/fpga/CMakeLists.txt
@@ -1,4 +1,4 @@
-if (NOT LITE_WITH_FPGA)
+if ((NOT LITE_ON_MODEL_OPTIMIZE_TOOL) AND (NOT LITE_WITH_FPGA))
     return()
 endif()
 
diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt
index 3423b1e920e5e7c4aaa34125303b09d943e47b62..f4d3254a7b54cfea96fc2419bd425f8328990ebe 100644
--- a/lite/kernels/opencl/CMakeLists.txt
+++ b/lite/kernels/opencl/CMakeLists.txt
@@ -14,7 +14,7 @@ add_kernel(pool_opencl OPENCL basic SRCS pool_compute.cc DEPS ${cl_kernel_deps})
 add_kernel(io_copy_compute_opencl OPENCL basic SRCS io_copy_compute.cc DEPS ${tensor_lite} ${cl_kernel_deps})
 add_kernel(relu_opencl OPENCL basic SRCS relu_compute.cc DEPS ${cl_kernel_deps})
 add_kernel(depthwise_conv2d_opencl OPENCL basic SRCS depthwise_conv2d_compute.cc DEPS ${cl_kernel_deps})
-add_kernel(conv2d_1x1_opencl OPENCL basic SRCS conv2d_1x1_compute.cc DEPS ${cl_kernel_deps})
+#add_kernel(conv2d_1x1_opencl OPENCL basic SRCS conv2d_1x1_compute.cc DEPS ${cl_kernel_deps})
 add_kernel(reshape_opencl OPENCL basic SRCS reshape_compute.cc DEPS ${cl_kernel_deps})
 add_kernel(conv_opencl OPENCL basic SRCS conv_compute.cc DEPS ${cl_kernel_deps})
 add_kernel(layout_opencl OPENCL basic SRCS layout_compute.cc DEPS ${cl_kernel_deps})
@@ -49,12 +49,14 @@ lite_cc_test(test_depthwise_conv2d_opencl SRCS depthwise_conv2d_compute_test.cc
         DEPS depthwise_conv2d_opencl op_registry program context cl_image_converter
         ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
 
-lite_cc_test(test_conv2d_1x1_opencl SRCS conv2d_1x1_compute_test.cc
-        DEPS conv2d_1x1_opencl cl_image_converter op_registry program context
-        ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
+#lite_cc_test(test_conv2d_1x1_opencl SRCS conv2d_1x1_compute_test.cc
+#        DEPS conv2d_1x1_opencl cl_image_converter op_registry program context
+#        ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
+
 lite_cc_test(test_reshape_opencl SRCS reshape_compute_test.cc
         DEPS reshape_opencl cl_image_converter op_registry program context
         ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
+
 lite_cc_test(test_conv_opencl SRCS conv_compute_test.cc
         DEPS conv_opencl op_registry program context
         ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
diff --git a/lite/operators/compare_op.cc b/lite/operators/compare_op.cc
index 3210520cd5d71f239da258955df0e917e5e1153e..aa500ba35c37cf8af17091d8d37d8fd8d1a08e0e 100644
--- a/lite/operators/compare_op.cc
+++ b/lite/operators/compare_op.cc
@@ -54,7 +54,7 @@ bool CompareOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
 }  // namespace paddle
 
 REGISTER_LITE_OP(equal, paddle::lite::operators::CompareOp);
-REGISTER_LITE_OP(notequal, paddle::lite::operators::CompareOp);
+REGISTER_LITE_OP(not_equal, paddle::lite::operators::CompareOp);
 REGISTER_LITE_OP(less_than, paddle::lite::operators::CompareOp);
 REGISTER_LITE_OP(less_equal, paddle::lite::operators::CompareOp);
 REGISTER_LITE_OP(greater_than, paddle::lite::operators::CompareOp);
diff --git a/lite/tools/cmake_tools/create_fake_kernel_registry.py b/lite/tools/cmake_tools/create_fake_kernel_registry.py
index 140d77320704f62dfb2492eec3ad7238fe3868ff..35012d5b163aac2b6998790b4cfcf31e16cb1454 100644
--- a/lite/tools/cmake_tools/create_fake_kernel_registry.py
+++ b/lite/tools/cmake_tools/create_fake_kernel_registry.py
@@ -18,6 +18,9 @@ import logging
 from ast import RegisterLiteKernelParser
 from utils import *
 
+if len(sys.argv) != 4:
+    print("Error: create_fake_kernel_registry.py requires three inputs!")
+    exit(1)
 ops_list_path = sys.argv[1]
 dest_path = sys.argv[2]
 kernelmap_path = sys.argv[3]
diff --git a/lite/tools/cmake_tools/parse_kernel_registry.py b/lite/tools/cmake_tools/parse_kernel_registry.py
index f4f0b95483687d3785168c132d30ac8a4fa87c8e..6c020ec438682b670e4e36a926095fed5452ec37 100644
--- a/lite/tools/cmake_tools/parse_kernel_registry.py
+++ b/lite/tools/cmake_tools/parse_kernel_registry.py
@@ -12,10 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import print_function
 import sys
 import logging
 from ast import RegisterLiteKernelParser
 
+if len(sys.argv) != 5:
+    print("Error: parse_kernel_registry.py requires four inputs!")
+    exit(1)
 ops_list_path = sys.argv[1]
 dest_path = sys.argv[2]
 minkernels_list_path = sys.argv[3]
diff --git a/lite/tools/cmake_tools/parse_op_registry.py b/lite/tools/cmake_tools/parse_op_registry.py
index db58c455a9d5863ec0c66d7783871831c73c120f..7eb3337ed87b708102b2032de9a279fcae2d321c 100644
--- a/lite/tools/cmake_tools/parse_op_registry.py
+++ b/lite/tools/cmake_tools/parse_op_registry.py
@@ -13,10 +13,14 @@
 # limitations under the License.
 ''' Collect op registry information. '''
 
+from __future__ import print_function
 import sys
 import logging
 from ast import RegisterLiteOpParser
 
+if len(sys.argv) != 5:
+    print("Error: parse_op_registry.py requires four inputs!")
+    exit(1)
 ops_list_path = sys.argv[1]
 dest_path = sys.argv[2]
 minops_list_path = sys.argv[3]
diff --git a/lite/tools/cmake_tools/record_supported_kernel_op.py b/lite/tools/cmake_tools/record_supported_kernel_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6a3af6bd3e5a2decfb6b3b65b0357bff8b4a378
--- /dev/null
+++ b/lite/tools/cmake_tools/record_supported_kernel_op.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+import logging
+from ast import RegisterLiteKernelParser
+from ast import RegisterLiteOpParser
+
+if len(sys.argv) != 4:
+    print("Error: record_supported_kernel_op.py requires three inputs!")
+    exit(1)
+kernels_list_path = sys.argv[1]
+ops_list_path = sys.argv[2]
+kernel_op_map_dest_path = sys.argv[3]
+
+
+out_lines = [
+'''
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include<vector>
+#include<map>
+#include<string>
+
+const std::vector<std::vector<std::string>> supported_ops_target = {
+'''
+]
+
+ops_lines=[]
+
+# valid targets and valid_ops
+valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU"]
+valid_ops = [[],[],[],[],[],[],[],[],[],[]]
+class TargetType:
+    kUnk = 0
+    kHost = 1
+    kX86 = 2
+    kCUDA = 3
+    kARM = 4
+    kOpenCL = 5
+    kFPGA = 7
+    kNPU = 8
+    kXPU = 9
+    kAny = 6  # any target
+
+# record op_info of valid kernels into `valid_ops` according to different target type
+with open(kernels_list_path) as f:
+    paths = set([path for path in f])
+    for path in paths:
+        with open(path.strip()) as g:
+            c = g.read()
+            kernel_parser = RegisterLiteKernelParser(c)
+            kernel_parser.parse()
+            for k in kernel_parser.kernels:
+                if hasattr(TargetType, k.target):
+                    index=getattr(TargetType, k.target)
+                    valid_ops[index].append(k.op_type)
+
+# clear the repeated ops
+for target in valid_targets:
+    index = getattr(TargetType, target)
+    valid_ops[index] = list(set(valid_ops[index]))
+
+paths = set()
+with open(ops_list_path) as f:
+    paths = set([path for path in f])
+    for path in paths:
+        str_info = open(path.strip()).read()
+        op_parser = RegisterLiteOpParser(str_info)
+        ops = op_parser.parse()
+        for op in ops:
+            if "_grad" in op:
+                continue
+            out = '    {"%s", { "' % op
+            op_targets = []
+            for target in valid_targets:
+                if op in valid_ops[getattr(TargetType, target)]:
+                    op_targets.append(target)
+            if len(op_targets) > 0:
+                out = out +'", "'.join(op_targets)+ '" }}'
+            else:
+                # unknow type op:  kUnk = 0
+                valid_ops[0].append(op)
+                out = out +'kUnk" }}'
+            ops_lines.append(out)
+
+with open(kernel_op_map_dest_path, 'w') as f:
+    logging.info("write kernel list to %s" % kernel_op_map_dest_path)
+    f.write('\n'.join(out_lines))
+    # write kernels into head file
+    for target in valid_targets:
+        if len(valid_ops[getattr(TargetType, target)]) == 0 :
+            f.write("\n    // %s_OPS: " %target)
+            f.write('\n    {},')
+        else:
+            f.write("\n    // %s_OPS: " %target)
+            f.write('\n    {"')
+            f.write('","'.join(valid_ops[getattr(TargetType, target)]))
+            f.write('"},\n')
+    f.write('};')
+    # write op info into head file
+    f.write('\nconst std::map<std::string, std::vector<std::string>> supported_ops={\n')
+    f.write(',\n'.join(ops_lines))
+    f.write('\n};')
diff --git a/mobile/src/common/log.h b/mobile/src/common/log.h
index 69654c505d234709d6c3119be346cefaf82c04a9..3b42188b62278c0acde41d52d68cc4b48ee6cda9 100644
--- a/mobile/src/common/log.h
+++ b/mobile/src/common/log.h
@@ -80,7 +80,6 @@ static const char *ANDROID_LOG_TAG =
 #endif
 
 enum LogLevel {
-  kNO_LOG,
   kLOG_ERROR,
   kLOG_WARNING,
   kLOG_INFO,
@@ -89,15 +88,16 @@ enum LogLevel {
   kLOG_DEBUG1,
   kLOG_DEBUG2,
   kLOG_DEBUG3,
-  kLOG_DEBUG4
+  kLOG_DEBUG4,
+  kNO_LOG,
 };
 
 // log level
 static LogLevel log_level = kLOG_DEBUG4;
 
-static std::vector<std::string> logs{"NO     ", "ERROR  ", "WARNING", "INFO   ",
-                                     "VERBOSE", "DEBUG  ", "DEBUG1 ", "DEBUG2 ",
-                                     "DEBUG3 ", "DEBUG4 "};
+static std::vector<std::string> logs{"ERROR  ", "WARNING", "INFO   ", "VERBOSE",
+                                     "DEBUG  ", "DEBUG1 ", "DEBUG2 ", "DEBUG3 ",
+                                     "DEBUG4 ", "NO     "};
 struct ToLog;
 struct Print;
 
@@ -217,7 +217,6 @@ struct ToLog {
 #define ANDROIDLOGV(...)
 
 enum LogLevel {
-  kNO_LOG,
   kLOG_ERROR,
   kLOG_WARNING,
   kLOG_INFO,
@@ -226,7 +225,8 @@ enum LogLevel {
   kLOG_DEBUG1,
   kLOG_DEBUG2,
   kLOG_DEBUG3,
-  kLOG_DEBUG4
+  kLOG_DEBUG4,
+  kNO_LOG
 };
 
 struct ToLog;
diff --git a/mobile/src/framework/cl/cl_engine.h b/mobile/src/framework/cl/cl_engine.h
index 2e21dd9e395354d2bd5e35a648687a6116347caf..cf758f8328338f936e26270c24f2bf73688312c7 100644
--- a/mobile/src/framework/cl/cl_engine.h
+++ b/mobile/src/framework/cl/cl_engine.h
@@ -124,9 +124,9 @@ class CLEngine {
     if (status != CL_SUCCESS || ret_size / sizeof(size_t) < 3) {
       return CLLocalWorkSizeInfo(0, 0, 0, 0);
     }
-    DLOG << max_work_item_sizes[0];
-    DLOG << max_work_item_sizes[1];
-    DLOG << max_work_item_sizes[2];
+    DLOG << " max_work_item_sizes {" << max_work_item_sizes[0] << ", "
+         << max_work_item_sizes[1] << ", " << max_work_item_sizes[2] << "}";
+
     localWorkSizeInfo_ =
         CLLocalWorkSizeInfo(max_work_group_size, max_work_item_sizes[0],
                             max_work_item_sizes[1], max_work_item_sizes[2]);
@@ -182,8 +182,8 @@ class CLEngine {
     cl_program p =
         clCreateProgramWithSource(context, 1, &source, sourceSize, &status_);
 
-    DLOG << " cl kernel from source";
-    DLOG << " source size: " << sourceSize[0];
+    LOG(kLOG_DEBUG4) << " cl kernel from source";
+    LOG(kLOG_DEBUG4) << " source size: " << sourceSize[0];
     CL_CHECK_ERRORS(status_);
 
     std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p);
diff --git a/mobile/src/framework/cl/cl_helper.h b/mobile/src/framework/cl/cl_helper.h
index 893456211d0429701b49d0f0be654beaad16e0e2..db9aa37ae2b7219131b5950e54ec008828f1fc70 100644
--- a/mobile/src/framework/cl/cl_helper.h
+++ b/mobile/src/framework/cl/cl_helper.h
@@ -36,9 +36,9 @@ class CLHelper {
 
   void AddKernel(const std::string &kernel_name, const std::string &file_name,
                  const std::string &options = "") {
-    DLOG << " begin add kernel ";
+    LOG(kLOG_DEBUG1) << " begin add kernel ";
     auto kernel = scope_->GetKernel(kernel_name, file_name, options);
-    DLOG << " add kernel ing ";
+    LOG(kLOG_DEBUG1) << " begin add kernel ";
     kernels.emplace_back(std::move(kernel));
   }
 
diff --git a/mobile/src/framework/cl/cl_image.h b/mobile/src/framework/cl/cl_image.h
index d3d48cda8b86b07e76658ef903863268042ab36f..f891e41a6a715f4e97776f90afcf42945e2449cf 100644
--- a/mobile/src/framework/cl/cl_image.h
+++ b/mobile/src/framework/cl/cl_image.h
@@ -87,14 +87,14 @@ class CLImage {
     PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
                           " need call SetTensorData first");
 
-    DLOG << " begin init cl image ";
+    LOG(kNO_LOG) << " begin init cl image ";
     image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);
 
     half_t *image_data = new half_t[product(image_dims_) * 4];
 
-    DLOG << " convert to image";
+    LOG(kNO_LOG) << " convert to image";
     converter->NCHWToImage(tensor_data_, image_data, tensor_dims_);
-    DLOG << " end convert to image";
+    LOG(kNO_LOG) << " end convert to image";
 
     InitCLImage(context, image_dims_[0], image_dims_[1], image_data);
 
@@ -105,7 +105,7 @@ class CLImage {
     tensor_data_ = nullptr;
     image_converter_ = converter;
     initialized_ = true;
-    DLOG << " end init cl image";
+    LOG(kNO_LOG) << " end init cl image";
   }
 
   void InitNImage(cl_context context, cl_command_queue command_queue) {
@@ -137,9 +137,9 @@ class CLImage {
     //    CLImageConverterFolder();
     CLImageConverterNormal *normal_converter = new CLImageConverterNormal();
     PADDLE_MOBILE_ENFORCE(!shared_mem_, "do not init mem after shared .")
-    DLOG << " to get image dims ";
+    //    LOG(kNO_LOG) << " to get image dims ";
     image_dims_ = normal_converter->InitImageDimInfoWith(dim);
-    DLOG << " end get image dims " << image_dims_;
+    //    LOG(kNO_LOG) << " end get image dims " << image_dims_;
 
     InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
 
@@ -148,7 +148,7 @@ class CLImage {
     image_converter_ = normal_converter;
     cl_event_ = CLEngine::Instance()->CreateEvent(context);
     initialized_ = true;
-    DLOG << " end init cl image";
+    //    LOG(kNO_LOG) << " end init cl image";
   }
   /**
    *  create fake size cl_mem for mem share
@@ -169,9 +169,9 @@ class CLImage {
     InitCLImage(context, real_image_dims_[0], real_image_dims_[1], nullptr);
     // cheat cl_image they got what they wanted
     image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
-    DLOG << "InitFakeSizeImage ... ";
-    DLOG << "real_image_dims:  " << real_image_dims_;
-    DLOG << "image_dims_:  " << image_dims_;
+    LOG(kNO_LOG) << "InitFakeSizeImage ... ";
+    LOG(kNO_LOG) << "real_image_dims:  " << real_image_dims_;
+    LOG(kNO_LOG) << "image_dims_:  " << image_dims_;
     PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
                               real_image_dims_[1] >= image_dims_[1],
                           "real image is not enough");
@@ -182,7 +182,7 @@ class CLImage {
     initialized_ = true;
     shared_mem_ = true;
 
-    DLOG << " end init FakeSizeImage";
+    LOG(kNO_LOG) << " end init FakeSizeImage";
   }
   /**
    * init cl mem with a exist cl mem
@@ -197,15 +197,15 @@ class CLImage {
     real_image_dims_ = src.real_image_dims_;
     image_dims_ = normal_converter->InitImageDimInfoWith(need_dims);
 
-    DLOG << "InitWithExistMem ... ";
-    DLOG << "real_image_dims:  " << real_image_dims_;
-    DLOG << "image_dims_:  " << image_dims_;
+    LOG(kNO_LOG) << "InitWithExistMem ... ";
+    LOG(kNO_LOG) << "real_image_dims:  " << real_image_dims_;
+    LOG(kNO_LOG) << "image_dims_:  " << image_dims_;
 
     if (real_image_dims_[0] < image_dims_[0] ||
         real_image_dims_[1] < image_dims_[1]) {
-      DLOG << "real image is not enough!";
-      DLOG << "real_image_dims:  " << real_image_dims_;
-      DLOG << "image_dims_:  " << image_dims_;
+      LOG(kNO_LOG) << "real image is not enough!";
+      LOG(kNO_LOG) << "real_image_dims:  " << real_image_dims_;
+      LOG(kNO_LOG) << "image_dims_:  " << image_dims_;
     }
     PADDLE_MOBILE_ENFORCE(real_image_dims_[0] >= image_dims_[0] &&
                               real_image_dims_[1] >= image_dims_[1],
@@ -221,7 +221,7 @@ class CLImage {
     initialized_ = true;
     shared_mem_ = true;
 
-    DLOG << " end init WithExistMem";
+    LOG(kNO_LOG) << " end init WithExistMem";
   }
 
   void InitConv2dTransposeFilterCLImage(cl_context context,
diff --git a/mobile/src/framework/cl/cl_scope.h b/mobile/src/framework/cl/cl_scope.h
index 643ce32b57616305da0c581d6d50dfcbbc4f1b1d..25552165640cca5ef31b53b7fe442214384eeab8 100644
--- a/mobile/src/framework/cl/cl_scope.h
+++ b/mobile/src/framework/cl/cl_scope.h
@@ -47,14 +47,14 @@ class CLScope {
   std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
       const std::string &kernel_name, const std::string &file_name,
       const std::string &options) {
-    DLOG << " to get program " << file_name;
+    LOG(kLOG_DEBUG2) << " to get program " << file_name;
     auto program = Program(file_name, kernel_name, options);
-    DLOG << " end get program ~ ";
-    DLOG << " to create kernel: " << kernel_name;
+    LOG(kLOG_DEBUG2) << " end get program ~ ";
+    LOG(kLOG_DEBUG2) << " to create kernel: " << kernel_name;
     std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
         clCreateKernel(program, kernel_name.c_str(), &status_));
     CL_CHECK_ERRORS(status_);
-    DLOG << " end create kernel ~ ";
+    LOG(kLOG_DEBUG2) << " end create kernel ~ ";
     return std::move(kernel);
   }
 
@@ -81,9 +81,11 @@ class CLScope {
       auto program = CLEngine::Instance()->CreateProgramWithSource(
           context_, source.c_str());
 
-      DLOG << " --- begin build program -> " << program_key << " --- ";
+      LOG(kLOG_DEBUG3) << " --- begin build program -> " << program_key
+                       << " --- ";
       CLEngine::Instance()->BuildProgram(program.get(), options);
-      DLOG << " --- end build program -> " << program_key << " --- ";
+      LOG(kLOG_DEBUG3) << " --- end build program -> " << program_key
+                       << " --- ";
 
       programs_[program_key] = std::move(program);
       return programs_[program_key].get();
@@ -100,9 +102,11 @@ class CLScope {
           context_,
           CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
 
-      DLOG << " --- begin build program -> " << program_key << " --- ";
+      LOG(kLOG_DEBUG3) << " --- begin build program ele-> " << program_key
+                       << " --- ";
       CLEngine::Instance()->BuildProgram(program.get(), options);
-      DLOG << " --- end build program -> " << program_key << " --- ";
+      LOG(kLOG_DEBUG3) << " --- end build program ele-> " << program_key
+                       << " --- ";
 
       programs_[program_key] = std::move(program);
       return programs_[program_key].get();
diff --git a/mobile/src/framework/context.h b/mobile/src/framework/context.h
index 944d54cc499f2a3c4fcad5c2fb0dfc4fe9bcac1d..18e40311bc2a5d555bb02cf0eb7af6356cbbf0b0 100644
--- a/mobile/src/framework/context.h
+++ b/mobile/src/framework/context.h
@@ -44,15 +44,13 @@ namespace framework {
 struct CPUContext {
  private:
   CPUContext();
-  virtual ~CPUContext() {}
 
  public:
+  ~CPUContext() {}
+
   static CPUContext* Context() {
-    static CPUContext* ctx = nullptr;
-    if (ctx == nullptr) {
-      ctx = new CPUContext();
-    }
-    return ctx;
+    static CPUContext ctx;
+    return &ctx;
   }
 
   void set_thread_num(int thread_num,
diff --git a/mobile/src/framework/executor.cpp b/mobile/src/framework/executor.cpp
index d03cefe59a221093d4e5fb4e86273b3007097d9f..cda5c5522c961c70fc15bf76fcd650a17bb76835 100644
--- a/mobile/src/framework/executor.cpp
+++ b/mobile/src/framework/executor.cpp
@@ -80,7 +80,7 @@ Executor<Device, T>::Executor(const Program<Device> &program,
   std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
   for (int j = 0; j < ops.size(); ++j) {
     std::shared_ptr<OpDesc> op_desc = ops[j];
-    DLOG << "create op: " << op_desc->Type();
+    LOG(kLOG_INFO) << "create op[" << j << "]: " << op_desc->Type();
 
     auto op_handler = OpRegistry<Device>::CreateOp(
         op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
@@ -111,7 +111,8 @@ Executor<Device, T>::Executor(const Program<Device> &program,
     clock_gettime(CLOCK_MONOTONIC, &ts);
     profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
-    DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
+    LOG(kLOG_INFO) << "Initialize op[" << count++
+                   << "]: " << op_handler->Type();
     if (op_handler->Type() == "feed" || op_handler->Type() == "fetch") {
       op_handler->setPrePostType(config_.pre_post_type);
     }
@@ -1015,7 +1016,7 @@ void Executor<GPU_CL, float>::InitMemory() {
           const TensorDesc &desc = var_desc->Tensor_desc();
           //          DDim ddim = make_ddim(desc.Dims());
           DDim ddim = cl_image->dims();
-          DLOG << var_desc->Name();
+          LOG(kLOG_DEBUG1) << "init image of " << var_desc->Name();
           cl_image->InitEmptyImage(context, command_queue, ddim);
         }
       }
diff --git a/mobile/src/framework/loader.cpp b/mobile/src/framework/loader.cpp
index 34cf6253cb4571c3b52fe61161cba3e140eb0110..31274743f8b1d4b3d8195526e1ae77129c2729bb 100644
--- a/mobile/src/framework/loader.cpp
+++ b/mobile/src/framework/loader.cpp
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "framework/loader.h"
+#include <memory>
 
 #include "framework/lod_tensor.h"
 #include "framework/program/program-optimize/program_optimize.h"
@@ -173,7 +174,7 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
   rewind(fp);
 
   DLOG << "model size: " << size;
-
+  PADDLE_MOBILE_ENFORCE(size > 0, "model size should > 0")
   *out = reinterpret_cast<uint8_t *>(malloc(size));
 
   size_t cur_len = 0;
diff --git a/mobile/src/framework/operator.cpp b/mobile/src/framework/operator.cpp
index 402512c7237be0ca26470361cc16369bd97f7758..a091a49b35203445cda48b2387413193079ecd5e 100644
--- a/mobile/src/framework/operator.cpp
+++ b/mobile/src/framework/operator.cpp
@@ -62,31 +62,39 @@ void OperatorBase<Dtype>::Run() {
   DLOG << "-------------" << type_ << "----------------------------";
   vector<string> input_keys = GetInputKeys();
   for (const auto key : input_keys) {
-    auto var_vec_in = inputs_.at(key);
-    for (int i = 0; i < var_vec_in.size(); ++i) {
-      auto var = this->scope_->FindVar(var_vec_in[i]);
-      if (var->IsInitialized() &&
-          var->template IsType<framework::LoDTensor>()) {
-        const Tensor *tensor = var->template Get<framework::LoDTensor>();
-        if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
+    if (inputs_.count(key) > 0) {
+      auto var_vec_in = inputs_.at(key);
+      for (int i = 0; i < var_vec_in.size(); ++i) {
+        auto var = this->scope_->FindVar(var_vec_in[i]);
+        if (var->IsInitialized() &&
+            var->template IsType<framework::LoDTensor>()) {
+          const Tensor *tensor = var->template Get<framework::LoDTensor>();
+          if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
 #ifdef PADDLE_MOBILE_FPGA
-        DLOG << var_vec_in[i];
+          DLOG << var_vec_in[i];
 #endif
+        }
       }
+    } else {
+      DLOG << "did not find key (" << key << ") in inputs_";
     }
   }
   for (const auto key : GetOutKeys()) {
-    auto var_vec_out = outputs_.at(key);
-    for (int i = 0; i < var_vec_out.size(); ++i) {
-      auto var = scope_->FindVar(var_vec_out[i]);
-      if (var->IsInitialized() &&
-          var->template IsType<framework::LoDTensor>()) {
-        const Tensor *tensor = var->template Get<framework::LoDTensor>();
-        if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
+    if (outputs_.count(key) > 0) {
+      auto var_vec_out = outputs_.at(key);
+      for (int i = 0; i < var_vec_out.size(); ++i) {
+        auto var = scope_->FindVar(var_vec_out[i]);
+        if (var->IsInitialized() &&
+            var->template IsType<framework::LoDTensor>()) {
+          const Tensor *tensor = var->template Get<framework::LoDTensor>();
+          if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
 #ifdef PADDLE_MOBILE_FPGA
-        DLOG << var_vec_out[i];
+          DLOG << var_vec_out[i];
 #endif
+        }
       }
+    } else {
+      DLOG << "did not find key (" << key << ") in outputs_";
     }
   }
 #endif
@@ -100,27 +108,37 @@ void OperatorBase<GPU_CL>::Run() {
   DLOG << "-------------" << type_ << "----------------------------";
   vector<string> input_keys = GetInputKeys();
   for (const auto key : input_keys) {
-    auto var_vec_in = inputs_.at(key);
-    for (int i = 0; i < var_vec_in.size(); ++i) {
-      auto var = scope_->FindVar(var_vec_in[i]);
-      if (var->IsInitialized() && var->template IsType<framework::CLImage>()) {
-        const CLImage *cl_image = var->template Get<framework::CLImage>();
-        if (cl_image) {
-          DLOG << type_ << " input- " << key << "=" << *cl_image;
+    if (inputs_.count(key) > 0) {
+      auto var_vec_in = inputs_.at(key);
+      for (int i = 0; i < var_vec_in.size(); ++i) {
+        auto var = scope_->FindVar(var_vec_in[i]);
+        if (var->IsInitialized() &&
+            var->template IsType<framework::CLImage>()) {
+          const CLImage *cl_image = var->template Get<framework::CLImage>();
+          if (cl_image) {
+            DLOG << type_ << " input- " << key << "=" << *cl_image;
+          }
         }
       }
+    } else {
+      DLOG << "did not find key (" << key << ") in inputs_";
     }
   }
   for (const auto key : GetOutKeys()) {
-    auto var_vec_out = outputs_.at(key);
-    for (int i = 0; i < var_vec_out.size(); ++i) {
-      auto var = scope_->FindVar(var_vec_out[i]);
-      if (var->IsInitialized() && var->template IsType<framework::CLImage>()) {
-        const CLImage *cl_image = var->template Get<framework::CLImage>();
-        if (cl_image) {
-          DLOG << type_ << " output- " << key << "=" << *cl_image;
+    if (outputs_.count(key) > 0) {
+      auto var_vec_out = outputs_.at(key);
+      for (int i = 0; i < var_vec_out.size(); ++i) {
+        auto var = scope_->FindVar(var_vec_out[i]);
+        if (var->IsInitialized() &&
+            var->template IsType<framework::CLImage>()) {
+          const CLImage *cl_image = var->template Get<framework::CLImage>();
+          if (cl_image) {
+            DLOG << type_ << " output- " << key << "=" << *cl_image;
+          }
         }
       }
+    } else {
+      DLOG << "did not find key (" << key << ") in outputs_";
     }
   }
 #endif
diff --git a/mobile/src/operators/op_param.h b/mobile/src/operators/op_param.h
index e58159fbb74e7a91a88c3e76f8aa713b679d94b8..85dabe3bcd009c8c00a59ccf74b7651d907b6dc2 100644
--- a/mobile/src/operators/op_param.h
+++ b/mobile/src/operators/op_param.h
@@ -344,10 +344,14 @@ class OpParam {
 
   template <typename T>
   static const T GetAttr(const string &key, const AttributeMap &map) {
+    PADDLE_MOBILE_ENFORCE(HasAttr(key, map), "%s is not contained in attr map",
+                          key.c_str())
     return ((Attribute)map.at(key)).Get<T>();
   }
   static const std::string GetStringAttr(const string &key,
                                          const AttributeMap &map) {
+    PADDLE_MOBILE_ENFORCE(HasAttr(key, map), "%s is not contained in attr map",
+                          key.c_str())
     return ((Attribute)map.at(key)).GetString();
   }
 
@@ -355,6 +359,10 @@ class OpParam {
     return map.count(key) > 0;
   }
 
+  static const bool HasVar(const string &key, const VariableNameMap &var_map) {
+    return var_map.count(key) > 0;
+  }
+
   template <typename T>
   static T *GetVarValue(const string &key, const VariableNameMap &var_map,
                         const Scope &scope) {
@@ -3100,16 +3108,37 @@ class NearestInterpolationParam : public OpParam {
                             const AttributeMap &attrs, Scope *scope)
       : OpParam(inputs, outputs, attrs, scope) {
     input_x_ = InputXFrom<GType>(inputs, *scope);
-    input_outsize_ = InputOutSizeFrom<GType>(inputs, *scope);
+    const bool has_out_size = HasVar("OutSize", inputs);
+
+    if (has_out_size) {
+      input_outsize_ = InputOutSizeFrom<GType>(inputs, *scope);
+    }
+
     out_ = OutFrom<GType>(outputs, *scope);
-    out_h_ = GetAttr<int>("out_h", attrs);
-    out_w_ = GetAttr<int>("out_w", attrs);
+
+    if (HasAttr("out_h", attrs)) {
+      out_h_ = GetAttr<int>("out_h", attrs);
+    } else if (HasAttr("out_h ", attrs)) {
+      // some models hurts ....   attr with space ..
+      out_h_ = GetAttr<int>("out_h ", attrs);
+    }
+
+    if (HasAttr("out_w", attrs)) {
+      out_w_ = GetAttr<int>("out_w", attrs);
+    } else if (HasAttr("out_w ", attrs)) {
+      // some models hurts ....   attr with space ..
+      out_w_ = GetAttr<int>("out_w ", attrs);
+    }
+
+    LOG(kLOG_DEBUG1) << "out_h_: " << out_h_;
+    LOG(kLOG_DEBUG1) << "out_w_: " << out_w_;
+
     if (HasAttr("scale", attrs)) {
       has_scale_ = true;
       scale_ = GetAttr<float>("scale", attrs);
     }
-    DLOG << "has_scale_:  " << has_scale_;
-    DLOG << "scale_:  " << scale_;
+    LOG(kLOG_DEBUG1) << "has_scale_:  " << has_scale_;
+    LOG(kLOG_DEBUG1) << "scale_:  " << scale_;
   }
   const GType *InputX() const { return input_x_; }
   const GType *InputOutPutSize() const { return input_outsize_; }
diff --git a/mobile/src/pass/memory_optimize_cl.cpp b/mobile/src/pass/memory_optimize_cl.cpp
index 355123349d645075fd2ccc37144144da7d332a8f..53bb675f17b2bae9c3954fa57894b8f73fc611fe 100644
--- a/mobile/src/pass/memory_optimize_cl.cpp
+++ b/mobile/src/pass/memory_optimize_cl.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 #ifdef PADDLE_MOBILE_CL
 #include "pass/memory_optimize_cl.h"
 #include <algorithm>
+#include <utility>
 #include "framework/cl/cl_image.h"
 #include "framework/lod_tensor.h"
 namespace paddle_mobile {
@@ -79,7 +80,7 @@ void MemoryOptPassCl::operator()(
 
     std::vector<ClVarNode *> fetch_var_nodes;
     for (const auto &op : block->Ops()) {
-      DLOG << "op_desc->Type(): " << op->Type();
+      LOG(kNO_LOG) << "op_desc->Type(): " << op->Type();
       for (const auto &outputs : op->GetOutputs()) {
         for (const auto &output : outputs.second) {
           // not a persistable and not a exclude one ,then add it to
@@ -87,7 +88,7 @@ void MemoryOptPassCl::operator()(
           if (!IsPersistable(output) &&
               std::find(exclude_var_names.begin(), exclude_var_names.end(),
                         output) == exclude_var_names.end()) {
-            DLOG << "output: " << output;
+            LOG(kNO_LOG) << "output: " << output;
             ClVarNode *node = CreateNode(output);
             analysis_nodes_.push(node);
           }
@@ -100,7 +101,7 @@ void MemoryOptPassCl::operator()(
           if (!IsPersistable(input) &&
               std::find(exclude_var_names.begin(), exclude_var_names.end(),
                         input) == exclude_var_names.end()) {
-            DLOG << "input: " << input;
+            LOG(kNO_LOG) << "input: " << input;
             ClVarNode *node = CreateNode(input);
             analysis_nodes_.push(node);
             if (op->Type() == "fetch") {
@@ -114,7 +115,7 @@ void MemoryOptPassCl::operator()(
           if (!IsPersistable(output) &&
               std::find(exclude_var_names.begin(), exclude_var_names.end(),
                         output) == exclude_var_names.end()) {
-            DLOG << "output: " << output;
+            LOG(kNO_LOG) << "output: " << output;
             ClVarNode *node = CreateNode(output);
             analysis_nodes_.push(node);
           }
@@ -164,8 +165,8 @@ void MemoryOptPassCl::ShareData(
   cl_command_queue command_queue = scope->GetCLScpoe()->CommandQueue();
 
   for (const auto &list : reused_nodes_) {
-    DLOG << "\n";
-    DLOG << "gpu . share memory within these variables";
+    LOG(kNO_LOG) << "\n";
+    LOG(kNO_LOG) << "gpu . share memory within these variables";
     int64_t x_based_max_numl = -1;
     int64_t y_based_max_numl = -1;
     int64_t x_based_max_x = -1;