diff --git a/CMakeLists.txt b/CMakeLists.txt index d76ac16aacd9a3c4aa666960a17cf1cc1988c752..a80c50067e06d4317d89fcfcca96e14837e4d09d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF) project(paddle-mobile) option(DEBUGING "enable debug mode" ON) -option(USE_EXCEPTION "use std exception" OFF) +option(USE_EXCEPTION "use std exception" ON) option(LOG_PROFILE "log profile" OFF) # select the platform to build option(CPU "armv7 with neon" OFF) diff --git a/src/framework/cl/cl_engine.h b/src/framework/cl/cl_engine.h index ee671a1ff276b6597535a0f0bf20b02c46bf5eac..930e511957dc1cd1a2ca588e39d89c19d372bf19 100644 --- a/src/framework/cl/cl_engine.h +++ b/src/framework/cl/cl_engine.h @@ -52,7 +52,7 @@ class CLEngine { cl_context context, std::string file_name) { FILE *file = fopen(file_name.c_str(), "rb"); PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", - filename.c_str()); + file_name.c_str()); fseek(file, 0, SEEK_END); int64_t size = ftell(file); PADDLE_MOBILE_ENFORCE(size > 0, "size is too small"); diff --git a/src/framework/cl/cl_helper.h b/src/framework/cl/cl_helper.h index e611a209238070420d15caa456c426b7b3650b79..8ebbcc911a558e3acbc5f54914300ca8226f0b0a 100644 --- a/src/framework/cl/cl_helper.h +++ b/src/framework/cl/cl_helper.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include +#include "common/log.h" #include "framework/cl/cl_deleter.h" #include "framework/cl/cl_image.h" #include "framework/cl/cl_scope.h" @@ -32,11 +33,16 @@ class CLHelper { explicit CLHelper(CLScope *scope) : scope_(scope) {} void AddKernel(const std::string &kernel_name, const std::string &file_name) { + DLOG << " begin add kernel "; auto kernel = scope_->GetKernel(kernel_name, file_name); + DLOG << " add kernel ing "; kernels.emplace_back(std::move(kernel)); } - cl_kernel KernelAt(const int index) { return kernels[index].get(); } + cl_kernel KernelAt(const int index) { + DLOG << " kernel count: " << kernels.size(); + return kernels[index].get(); + } cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); } diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index e57fad58f4868d8798529b458974274478a527a2..6e7a229ae4b6d42291be7df43eb1573f94a9092c 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -17,7 +17,9 @@ limitations under the License. */ #include #include "CL/cl.h" + #include "framework/cl/cl_half.h" +#include "framework/cl/cl_tool.h" #include "framework/ddim.h" #include "framework/tensor.h" @@ -205,6 +207,7 @@ class CLImage { if (err != CL_SUCCESS) { // TODO(HaiPeng): error handling + CL_CHECK_ERRORS(err); PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error "); } } diff --git a/src/framework/cl/cl_scope.h b/src/framework/cl/cl_scope.h index 24757db138a2dc62d82ffe837b939f50bcbfe0f1..f843275c0277052f98a888e5f7074ba7d0800d44 100644 --- a/src/framework/cl/cl_scope.h +++ b/src/framework/cl/cl_scope.h @@ -40,8 +40,10 @@ class CLScope { std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( const std::string &kernel_name, const std::string &file_name) { auto program = Program(file_name); + DLOG << " get program ~ "; std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel( clCreateKernel(program, kernel_name.c_str(), NULL)); + DLOG << " create kernel ~ "; return std::move(kernel); } diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index ae41930984caeb76679e69db19d6704c4557291f..3135d11d63d4d4aba8bc9f97c4fa873a2c647a3f 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -37,6 +37,8 @@ limitations under the License. */ #include "framework/cl/cl_image.h" #endif +int debug_to = 2; + namespace paddle_mobile { namespace framework { @@ -85,7 +87,7 @@ Executor::Executor(const framework::Program p, int batch_size, for (int i = 0; i < blocks.size(); ++i) { std::shared_ptr block_desc = blocks[i]; std::vector> ops = block_desc->Ops(); - for (int j = 0; j < ops.size(); ++j) { + for (int j = 0; j < debug_to; ++j) { std::shared_ptr op = ops[j]; DLOG << "create op: " << j << " " << op->Type(); auto op_base = framework::OpRegistry::CreateOp( @@ -414,7 +416,7 @@ std::shared_ptr Executor::Predict( } } #else - for (int i = 0; i < 1; i++) { + for (int i = 0; i < debug_to; i++) { #ifdef PADDLE_MOBILE_PROFILE struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -430,7 +432,9 @@ std::shared_ptr Executor::Predict( #endif DLOG << " predict return nullptr"; + return nullptr; + auto last_op = ops.rbegin(); auto output_map = (*last_op)->Outputs(); std::vector out_keys = (*last_op)->GetOutKeys(); diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp index d31553b60ef2827e9e818443e49a4be148305cf4..83861446850989d3e5bee043ca5f37008fb973aa 100644 --- a/src/operators/kernel/cl/conv_kernel.cpp +++ b/src/operators/kernel/cl/conv_kernel.cpp @@ -30,14 +30,32 @@ bool ConvKernel::Init(ConvParam *param) { static_cast(param->Paddings()[1]); param->SetOffset(offset); + DLOG << " init helper: " << &cl_helper_; + DLOG << " conv kernel add kernel ~ "; + + DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock(); + DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock(); + + DLOG << " filter dims: " << param->Filter()->dims(); + + if (param->Filter()->WidthOfOneBlock() == 1 && param->Filter()->HeightOfOneBlock() == 1) { + + DLOG << " here1 "; this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->dims()[1] == 1) { + + DLOG << " here2 "; this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->WidthOfOneBlock() == 3 && param->Filter()->HeightOfOneBlock() == 3) { + + DLOG << " here3 "; this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else { PADDLE_MOBILE_THROW_EXCEPTION(" not support "); } @@ -47,14 +65,27 @@ bool ConvKernel::Init(ConvParam *param) { template <> void ConvKernel::Compute(const ConvParam ¶m) { + DLOG << " Compute helper: " << &cl_helper_; + DLOG << " begin compute "; auto kernel = this->cl_helper_.KernelAt(0); + DLOG << " get work size "; auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); + DLOG << " end work size "; int c_block = default_work_size[0]; int w = default_work_size[1]; int nh = default_work_size[2]; auto input = param.Input()->GetCLImage(); + + DLOG << " get Input "; + auto filter = param.Filter()->GetCLImage(); + + DLOG << " get Filter "; + auto output = param.Output(); + + DLOG << " get Output "; + int stride = param.Strides()[0]; int offset = param.Offset(); int input_c = param.Input()->CBlock(); @@ -64,6 +95,8 @@ void ConvKernel::Compute(const ConvParam ¶m) { cl_int status; + DLOG << " begin set kernel arg "; + status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); status = clSetKernelArg(kernel, 1, sizeof(int), &w); status = clSetKernelArg(kernel, 2, sizeof(int), &nh); @@ -77,12 +110,18 @@ void ConvKernel::Compute(const ConvParam ¶m) { status = clSetKernelArg(kernel, 10, sizeof(int), &input_width); status = clSetKernelArg(kernel, 11, sizeof(int), &input_height); + DLOG << " end set kernel arg "; + CL_CHECK_ERRORS(status); + DLOG << " begin enqueue "; + status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, default_work_size.data(), NULL, 0, NULL, NULL); + DLOG << " end enqueue "; + CL_CHECK_ERRORS(status); }