diff --git a/CMakeLists.txt b/CMakeLists.txt
index d76ac16aacd9a3c4aa666960a17cf1cc1988c752..a80c50067e06d4317d89fcfcca96e14837e4d09d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
 project(paddle-mobile)
 
 option(DEBUGING "enable debug mode" ON)
-option(USE_EXCEPTION "use std exception" OFF)
+option(USE_EXCEPTION "use std exception" ON)
 option(LOG_PROFILE "log profile" OFF)
 # select the platform to build
 option(CPU "armv7 with neon" OFF)
diff --git a/src/framework/cl/cl_engine.h b/src/framework/cl/cl_engine.h
index ee671a1ff276b6597535a0f0bf20b02c46bf5eac..930e511957dc1cd1a2ca588e39d89c19d372bf19 100644
--- a/src/framework/cl/cl_engine.h
+++ b/src/framework/cl/cl_engine.h
@@ -52,7 +52,7 @@ class CLEngine {
       cl_context context, std::string file_name) {
     FILE *file = fopen(file_name.c_str(), "rb");
     PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
-                          filename.c_str());
+                          file_name.c_str());
     fseek(file, 0, SEEK_END);
     int64_t size = ftell(file);
     PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
diff --git a/src/framework/cl/cl_helper.h b/src/framework/cl/cl_helper.h
index e611a209238070420d15caa456c426b7b3650b79..8ebbcc911a558e3acbc5f54914300ca8226f0b0a 100644
--- a/src/framework/cl/cl_helper.h
+++ b/src/framework/cl/cl_helper.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <type_traits>
 #include <vector>
 
+#include "common/log.h"
 #include "framework/cl/cl_deleter.h"
 #include "framework/cl/cl_image.h"
 #include "framework/cl/cl_scope.h"
@@ -32,11 +33,16 @@ class CLHelper {
   explicit CLHelper(CLScope *scope) : scope_(scope) {}
 
   void AddKernel(const std::string &kernel_name, const std::string &file_name) {
+    DLOG << " begin add kernel ";
     auto kernel = scope_->GetKernel(kernel_name, file_name);
+    DLOG << " add kernel ing ";
     kernels.emplace_back(std::move(kernel));
   }
 
-  cl_kernel KernelAt(const int index) { return kernels[index].get(); }
+  cl_kernel KernelAt(const int index) {
+    DLOG << " kernel count: " << kernels.size();
+    return kernels[index].get();
+  }
 
   cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }
 
diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h
index e57fad58f4868d8798529b458974274478a527a2..6e7a229ae4b6d42291be7df43eb1573f94a9092c 100644
--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -17,7 +17,9 @@ limitations under the License. */
 #include <vector>
 
 #include "CL/cl.h"
+
 #include "framework/cl/cl_half.h"
+#include "framework/cl/cl_tool.h"
 #include "framework/ddim.h"
 #include "framework/tensor.h"
 
@@ -205,6 +207,7 @@ class CLImage {
 
     if (err != CL_SUCCESS) {
       // TODO(HaiPeng): error handling
+      CL_CHECK_ERRORS(err);
       PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
     }
   }
diff --git a/src/framework/cl/cl_scope.h b/src/framework/cl/cl_scope.h
index 24757db138a2dc62d82ffe837b939f50bcbfe0f1..f843275c0277052f98a888e5f7074ba7d0800d44 100644
--- a/src/framework/cl/cl_scope.h
+++ b/src/framework/cl/cl_scope.h
@@ -40,8 +40,10 @@ class CLScope {
   std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
       const std::string &kernel_name, const std::string &file_name) {
     auto program = Program(file_name);
+    DLOG << " get program ~ ";
     std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
         clCreateKernel(program, kernel_name.c_str(), NULL));
+    DLOG << " create kernel ~ ";
     return std::move(kernel);
   }
 
diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp
index ae41930984caeb76679e69db19d6704c4557291f..3135d11d63d4d4aba8bc9f97c4fa873a2c647a3f 100644
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -37,6 +37,8 @@ limitations under the License. */
 #include "framework/cl/cl_image.h"
 #endif
 
+int debug_to = 2;
+
 namespace paddle_mobile {
 namespace framework {
 
@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   for (int i = 0; i < blocks.size(); ++i) {
     std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
     std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
-    for (int j = 0; j < ops.size(); ++j) {
+    for (int j = 0; j < debug_to; ++j) {
       std::shared_ptr<framework::OpDesc> op = ops[j];
       DLOG << "create op: " << j << "  " << op->Type();
       auto op_base = framework::OpRegistry<Dtype>::CreateOp(
@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
     }
   }
 #else
-  for (int i = 0; i < 1; i++) {
+  for (int i = 0; i < debug_to; i++) {
 #ifdef PADDLE_MOBILE_PROFILE
     struct timespec ts;
     clock_gettime(CLOCK_MONOTONIC, &ts);
@@ -430,7 +432,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
 #endif
 
   DLOG << " predict return nullptr";
+
   return nullptr;
+
   auto last_op = ops.rbegin();
   auto output_map = (*last_op)->Outputs();
   std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp
index d31553b60ef2827e9e818443e49a4be148305cf4..83861446850989d3e5bee043ca5f37008fb973aa 100644
--- a/src/operators/kernel/cl/conv_kernel.cpp
+++ b/src/operators/kernel/cl/conv_kernel.cpp
@@ -30,14 +30,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
                static_cast<int>(param->Paddings()[1]);
   param->SetOffset(offset);
 
+  DLOG << " init helper: " << &cl_helper_;
+  DLOG << " conv kernel add kernel ~ ";
+
+  DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock();
+  DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock();
+
+  DLOG << " filter dims: " << param->Filter()->dims();
+
+
   if (param->Filter()->WidthOfOneBlock() == 1 &&
       param->Filter()->HeightOfOneBlock() == 1) {
+
+    DLOG << " here1 ";
     this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
+
   } else if (param->Filter()->dims()[1] == 1) {
+
+    DLOG << " here2 ";
     this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
+
   } else if (param->Filter()->WidthOfOneBlock() == 3 &&
              param->Filter()->HeightOfOneBlock() == 3) {
+
+    DLOG << " here3 ";
     this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
+
   } else {
     PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
   }
@@ -47,14 +65,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
 
 template <>
 void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
+  DLOG << " Compute helper: " << &cl_helper_;
+  DLOG << " begin compute ";
   auto kernel = this->cl_helper_.KernelAt(0);
+  DLOG << " get work size ";
   auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
+  DLOG << " end work size ";
   int c_block = default_work_size[0];
   int w = default_work_size[1];
   int nh = default_work_size[2];
   auto input = param.Input()->GetCLImage();
+
+  DLOG << " get Input ";
+
   auto filter = param.Filter()->GetCLImage();
+
+  DLOG << " get Filter ";
+
   auto output = param.Output();
+
+  DLOG << " get Output ";
+
   int stride = param.Strides()[0];
   int offset = param.Offset();
   int input_c = param.Input()->CBlock();
@@ -64,6 +95,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
 
   cl_int status;
 
+  DLOG << " begin set kernel arg ";
+
   status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
   status = clSetKernelArg(kernel, 1, sizeof(int), &w);
   status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
@@ -77,12 +110,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
   status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
   status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
 
+  DLOG << " end set kernel arg ";
+
   CL_CHECK_ERRORS(status);
 
+  DLOG << " begin enqueue ";
+
   status =
       clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
                              default_work_size.data(), NULL, 0, NULL, NULL);
 
+  DLOG << " end enqueue ";
+
   CL_CHECK_ERRORS(status);
 }