提交 cc7485bb 编写于 作者: L liuruilong

add debug code

上级 414ec29a
...@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF) ...@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
project(paddle-mobile) project(paddle-mobile)
option(DEBUGING "enable debug mode" ON) option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF) option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" OFF) option(LOG_PROFILE "log profile" OFF)
# select the platform to build # select the platform to build
option(CPU "armv7 with neon" OFF) option(CPU "armv7 with neon" OFF)
......
...@@ -52,7 +52,7 @@ class CLEngine { ...@@ -52,7 +52,7 @@ class CLEngine {
cl_context context, std::string file_name) { cl_context context, std::string file_name) {
FILE *file = fopen(file_name.c_str(), "rb"); FILE *file = fopen(file_name.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str()); file_name.c_str());
fseek(file, 0, SEEK_END); fseek(file, 0, SEEK_END);
int64_t size = ftell(file); int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small"); PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "common/log.h"
#include "framework/cl/cl_deleter.h" #include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h" #include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h" #include "framework/cl/cl_scope.h"
...@@ -32,11 +33,16 @@ class CLHelper { ...@@ -32,11 +33,16 @@ class CLHelper {
explicit CLHelper(CLScope *scope) : scope_(scope) {} explicit CLHelper(CLScope *scope) : scope_(scope) {}
void AddKernel(const std::string &kernel_name, const std::string &file_name) { void AddKernel(const std::string &kernel_name, const std::string &file_name) {
DLOG << " begin add kernel ";
auto kernel = scope_->GetKernel(kernel_name, file_name); auto kernel = scope_->GetKernel(kernel_name, file_name);
DLOG << " add kernel ing ";
kernels.emplace_back(std::move(kernel)); kernels.emplace_back(std::move(kernel));
} }
cl_kernel KernelAt(const int index) { return kernels[index].get(); } cl_kernel KernelAt(const int index) {
DLOG << " kernel count: " << kernels.size();
return kernels[index].get();
}
cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); } cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }
......
...@@ -17,7 +17,9 @@ limitations under the License. */ ...@@ -17,7 +17,9 @@ limitations under the License. */
#include <vector> #include <vector>
#include "CL/cl.h" #include "CL/cl.h"
#include "framework/cl/cl_half.h" #include "framework/cl/cl_half.h"
#include "framework/cl/cl_tool.h"
#include "framework/ddim.h" #include "framework/ddim.h"
#include "framework/tensor.h" #include "framework/tensor.h"
...@@ -205,6 +207,7 @@ class CLImage { ...@@ -205,6 +207,7 @@ class CLImage {
if (err != CL_SUCCESS) { if (err != CL_SUCCESS) {
// TODO(HaiPeng): error handling // TODO(HaiPeng): error handling
CL_CHECK_ERRORS(err);
PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error "); PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
} }
} }
......
...@@ -40,8 +40,10 @@ class CLScope { ...@@ -40,8 +40,10 @@ class CLScope {
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) { const std::string &kernel_name, const std::string &file_name) {
auto program = Program(file_name); auto program = Program(file_name);
DLOG << " get program ~ ";
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel( std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
clCreateKernel(program, kernel_name.c_str(), NULL)); clCreateKernel(program, kernel_name.c_str(), NULL));
DLOG << " create kernel ~ ";
return std::move(kernel); return std::move(kernel);
} }
......
...@@ -37,6 +37,8 @@ limitations under the License. */ ...@@ -37,6 +37,8 @@ limitations under the License. */
#include "framework/cl/cl_image.h" #include "framework/cl/cl_image.h"
#endif #endif
int debug_to = 2;
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, ...@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for (int i = 0; i < blocks.size(); ++i) { for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i]; std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops(); std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) { for (int j = 0; j < debug_to; ++j) {
std::shared_ptr<framework::OpDesc> op = ops[j]; std::shared_ptr<framework::OpDesc> op = ops[j];
DLOG << "create op: " << j << " " << op->Type(); DLOG << "create op: " << j << " " << op->Type();
auto op_base = framework::OpRegistry<Dtype>::CreateOp( auto op_base = framework::OpRegistry<Dtype>::CreateOp(
...@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
} }
} }
#else #else
for (int i = 0; i < 1; i++) { for (int i = 0; i < debug_to; i++) {
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
...@@ -430,7 +432,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict( ...@@ -430,7 +432,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
#endif #endif
DLOG << " predict return nullptr"; DLOG << " predict return nullptr";
return nullptr; return nullptr;
auto last_op = ops.rbegin(); auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs(); auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys(); std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
......
...@@ -30,14 +30,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -30,14 +30,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset); param->SetOffset(offset);
DLOG << " init helper: " << &cl_helper_;
DLOG << " conv kernel add kernel ~ ";
DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock();
DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock();
DLOG << " filter dims: " << param->Filter()->dims();
if (param->Filter()->WidthOfOneBlock() == 1 && if (param->Filter()->WidthOfOneBlock() == 1 &&
param->Filter()->HeightOfOneBlock() == 1) { param->Filter()->HeightOfOneBlock() == 1) {
DLOG << " here1 ";
this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
} else if (param->Filter()->dims()[1] == 1) { } else if (param->Filter()->dims()[1] == 1) {
DLOG << " here2 ";
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
} else if (param->Filter()->WidthOfOneBlock() == 3 && } else if (param->Filter()->WidthOfOneBlock() == 3 &&
param->Filter()->HeightOfOneBlock() == 3) { param->Filter()->HeightOfOneBlock() == 3) {
DLOG << " here3 ";
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
} }
...@@ -47,14 +65,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -47,14 +65,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template <> template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
DLOG << " Compute helper: " << &cl_helper_;
DLOG << " begin compute ";
auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
DLOG << " get work size ";
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
DLOG << " end work size ";
int c_block = default_work_size[0]; int c_block = default_work_size[0];
int w = default_work_size[1]; int w = default_work_size[1];
int nh = default_work_size[2]; int nh = default_work_size[2];
auto input = param.Input()->GetCLImage(); auto input = param.Input()->GetCLImage();
DLOG << " get Input ";
auto filter = param.Filter()->GetCLImage(); auto filter = param.Filter()->GetCLImage();
DLOG << " get Filter ";
auto output = param.Output(); auto output = param.Output();
DLOG << " get Output ";
int stride = param.Strides()[0]; int stride = param.Strides()[0];
int offset = param.Offset(); int offset = param.Offset();
int input_c = param.Input()->CBlock(); int input_c = param.Input()->CBlock();
...@@ -64,6 +95,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -64,6 +95,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
cl_int status; cl_int status;
DLOG << " begin set kernel arg ";
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w); status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh); status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
...@@ -77,12 +110,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { ...@@ -77,12 +110,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width); status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height); status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
DLOG << " end set kernel arg ";
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
DLOG << " begin enqueue ";
status = status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
DLOG << " end enqueue ";
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册