add debug code

cc7485bb · liuruilong · 414ec29a · cc7485bb · cc7485bb · cc7485bb
7 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
 project(paddle-mobile)
 option(DEBUGING "enable debug mode" ON)
-option(USE_EXCEPTION "use std exception" OFF)
+option(USE_EXCEPTION "use std exception" ON)
 option(LOG_PROFILE "log profile" OFF)
 # select the platform to build
 option(CPU "armv7 with neon" OFF)

--- a/src/framework/cl/cl_engine.h
+++ b/src/framework/cl/cl_engine.h
@@ -52,7 +52,7 @@ class CLEngine {
      cl_context context, std::string file_name) {
    FILE *file = fopen(file_name.c_str(), "rb");
    PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
-                          filename.c_str());
+                          file_name.c_str());
    fseek(file, 0, SEEK_END);
    int64_t size = ftell(file);
    PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");

--- a/src/framework/cl/cl_helper.h
+++ b/src/framework/cl/cl_helper.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <type_traits>
 #include <vector>
+#include "common/log.h"
 #include "framework/cl/cl_deleter.h"
 #include "framework/cl/cl_image.h"
 #include "framework/cl/cl_scope.h"
@@ -32,11 +33,16 @@ class CLHelper {
  explicit CLHelper(CLScope *scope) : scope_(scope) {}
  void AddKernel(const std::string &kernel_name, const std::string &file_name) {
+    DLOG << " begin add kernel ";
    auto kernel = scope_->GetKernel(kernel_name, file_name);
+    DLOG << " add kernel ing ";
    kernels.emplace_back(std::move(kernel));
  }
-  cl_kernel KernelAt(const int index) { return kernels[index].get(); }
+  cl_kernel KernelAt(const int index) {
+    DLOG << " kernel count: " << kernels.size();
+    return kernels[index].get();
+  }
  cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }

--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -17,7 +17,9 @@ limitations under the License. */
 #include <vector>
 #include "CL/cl.h"
 #include "framework/cl/cl_half.h"
+#include "framework/cl/cl_tool.h"
 #include "framework/ddim.h"
 #include "framework/tensor.h"
@@ -205,6 +207,7 @@ class CLImage {
    if (err != CL_SUCCESS) {
      // TODO(HaiPeng): error handling
+      CL_CHECK_ERRORS(err);
      PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
    }
  }

--- a/src/framework/cl/cl_scope.h
+++ b/src/framework/cl/cl_scope.h
@@ -40,8 +40,10 @@ class CLScope {
  std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
      const std::string &kernel_name, const std::string &file_name) {
    auto program = Program(file_name);
+    DLOG << " get program ~ ";
    std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
        clCreateKernel(program, kernel_name.c_str(), NULL));
+    DLOG << " create kernel ~ ";
    return std::move(kernel);
  }

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -37,6 +37,8 @@ limitations under the License. */
 #include "framework/cl/cl_image.h"
 #endif
+int debug_to = 2;
 namespace paddle_mobile {
 namespace framework {
@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
  for (int i = 0; i < blocks.size(); ++i) {
    std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
    std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
-    for (int j = 0; j < ops.size(); ++j) {
+    for (int j = 0; j < debug_to; ++j) {
      std::shared_ptr<framework::OpDesc> op = ops[j];
      DLOG << "create op: " << j << "  " << op->Type();
      auto op_base = framework::OpRegistry<Dtype>::CreateOp(
@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
    }
  }
 #else
-  for (int i = 0; i < 1; i++) {
+  for (int i = 0; i < debug_to; i++) {
 #ifdef PADDLE_MOBILE_PROFILE
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
@@ -430,7 +432,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
 #endif
  DLOG << " predict return nullptr";
  return nullptr;
  auto last_op = ops.rbegin();
  auto output_map = (*last_op)->Outputs();
  std::vector<std::string> out_keys = (*last_op)->GetOutKeys();

--- a/src/operators/kernel/cl/conv_kernel.cpp
+++ b/src/operators/kernel/cl/conv_kernel.cpp
@@ -30,14 +30,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
               static_cast<int>(param->Paddings()[1]);
  param->SetOffset(offset);
+  DLOG << " init helper: " << &cl_helper_;
+  DLOG << " conv kernel add kernel ~ ";
+  DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock();
+  DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock();
+  DLOG << " filter dims: " << param->Filter()->dims();
  if (param->Filter()->WidthOfOneBlock() == 1 &&
      param->Filter()->HeightOfOneBlock() == 1) {
+    DLOG << " here1 ";
    this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
  } else if (param->Filter()->dims()[1] == 1) {
+    DLOG << " here2 ";
    this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
  } else if (param->Filter()->WidthOfOneBlock() == 3 &&
             param->Filter()->HeightOfOneBlock() == 3) {
+    DLOG << " here3 ";
    this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
  } else {
    PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
  }
@@ -47,14 +65,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
 template <>
 void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
+  DLOG << " Compute helper: " << &cl_helper_;
+  DLOG << " begin compute ";
  auto kernel = this->cl_helper_.KernelAt(0);
+  DLOG << " get work size ";
  auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
+  DLOG << " end work size ";
  int c_block = default_work_size[0];
  int w = default_work_size[1];
  int nh = default_work_size[2];
  auto input = param.Input()->GetCLImage();
+  DLOG << " get Input ";
  auto filter = param.Filter()->GetCLImage();
+  DLOG << " get Filter ";
  auto output = param.Output();
+  DLOG << " get Output ";
  int stride = param.Strides()[0];
  int offset = param.Offset();
  int input_c = param.Input()->CBlock();
@@ -64,6 +95,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
  cl_int status;
+  DLOG << " begin set kernel arg ";
  status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
  status = clSetKernelArg(kernel, 1, sizeof(int), &w);
  status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
@@ -77,12 +110,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
  status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
  status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
+  DLOG << " end set kernel arg ";
  CL_CHECK_ERRORS(status);
+  DLOG << " begin enqueue ";
  status =
      clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
                             default_work_size.data(), NULL, 0, NULL, NULL);
+  DLOG << " end enqueue ";
  CL_CHECK_ERRORS(status);
 }