提交 6382d7a3 编写于 作者: R Ray Liu 提交者: GitHub

Merge pull request #1077 from codeWorm2015/opencl

 add debug code
......@@ -4,7 +4,7 @@ option(USE_OPENMP "openmp support" OFF)
project(paddle-mobile)
option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF)
option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" OFF)
# select the platform to build
option(CPU "armv7 with neon" OFF)
......
......@@ -52,7 +52,7 @@ class CLEngine {
cl_context context, std::string file_name) {
FILE *file = fopen(file_name.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
file_name.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <type_traits>
#include <vector>
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h"
......@@ -32,11 +33,16 @@ class CLHelper {
explicit CLHelper(CLScope *scope) : scope_(scope) {}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
DLOG << " begin add kernel ";
auto kernel = scope_->GetKernel(kernel_name, file_name);
DLOG << " add kernel ing ";
kernels.emplace_back(std::move(kernel));
}
cl_kernel KernelAt(const int index) { return kernels[index].get(); }
cl_kernel KernelAt(const int index) {
DLOG << " kernel count: " << kernels.size();
return kernels[index].get();
}
cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }
......
......@@ -17,7 +17,9 @@ limitations under the License. */
#include <vector>
#include "CL/cl.h"
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_tool.h"
#include "framework/ddim.h"
#include "framework/tensor.h"
......@@ -205,6 +207,7 @@ class CLImage {
if (err != CL_SUCCESS) {
// TODO(HaiPeng): error handling
CL_CHECK_ERRORS(err);
PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
}
}
......
......@@ -40,8 +40,10 @@ class CLScope {
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) {
auto program = Program(file_name);
DLOG << " get program ~ ";
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
clCreateKernel(program, kernel_name.c_str(), NULL));
DLOG << " create kernel ~ ";
return std::move(kernel);
}
......
......@@ -37,6 +37,8 @@ limitations under the License. */
#include "framework/cl/cl_image.h"
#endif
int debug_to = 2;
namespace paddle_mobile {
namespace framework {
......@@ -85,7 +87,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
for (int j = 0; j < debug_to; ++j) {
std::shared_ptr<framework::OpDesc> op = ops[j];
DLOG << "create op: " << j << " " << op->Type();
auto op_base = framework::OpRegistry<Dtype>::CreateOp(
......@@ -414,7 +416,7 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
}
}
#else
for (int i = 0; i < 1; i++) {
for (int i = 0; i < debug_to; i++) {
#ifdef PADDLE_MOBILE_PROFILE
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
......@@ -430,7 +432,9 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::Predict(
#endif
DLOG << " predict return nullptr";
return nullptr;
auto last_op = ops.rbegin();
auto output_map = (*last_op)->Outputs();
std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
......
......@@ -30,14 +30,32 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
DLOG << " init helper: " << &cl_helper_;
DLOG << " conv kernel add kernel ~ ";
DLOG << " width of one block: " << param->Filter()->WidthOfOneBlock();
DLOG << " height of one block: " << param->Filter()->HeightOfOneBlock();
DLOG << " filter dims: " << param->Filter()->dims();
if (param->Filter()->WidthOfOneBlock() == 1 &&
param->Filter()->HeightOfOneBlock() == 1) {
DLOG << " here1 ";
this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
} else if (param->Filter()->dims()[1] == 1) {
DLOG << " here2 ";
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
} else if (param->Filter()->WidthOfOneBlock() == 3 &&
param->Filter()->HeightOfOneBlock() == 3) {
DLOG << " here3 ";
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
......@@ -47,14 +65,27 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
DLOG << " Compute helper: " << &cl_helper_;
DLOG << " begin compute ";
auto kernel = this->cl_helper_.KernelAt(0);
DLOG << " get work size ";
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
DLOG << " end work size ";
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
DLOG << " get Input ";
auto filter = param.Filter()->GetCLImage();
DLOG << " get Filter ";
auto output = param.Output();
DLOG << " get Output ";
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = param.Input()->CBlock();
......@@ -64,6 +95,8 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
cl_int status;
DLOG << " begin set kernel arg ";
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
......@@ -77,12 +110,18 @@ void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
DLOG << " end set kernel arg ";
CL_CHECK_ERRORS(status);
DLOG << " begin enqueue ";
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
DLOG << " end enqueue ";
CL_CHECK_ERRORS(status);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册