提交 404bfb04 编写于 作者: Y yangfei

imp CLimage printer function

上级 8013c608
...@@ -37,7 +37,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor, ...@@ -37,7 +37,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor,
size_t width = W * ((C + 3) / 4); size_t width = W * ((C + 3) / 4);
size_t height = H * N; size_t height = H * N;
float *p = tensor->data<float>(); float *p = tensor->mutable_data<float>();
half imageData[width * height * 4]; half imageData[width * height * 4];
cl_int err; cl_int err;
cl_mem image = cl_image->GetCLImage(); cl_mem image = cl_image->GetCLImage();
...@@ -116,5 +116,65 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image, ...@@ -116,5 +116,65 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,
i0 += width * H; i0 += width * H;
} }
} }
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &cl_image){
printer << " dims: " << cl_image.dims() << "\n";
int stride = cl_image.numel() / 20;
stride = stride > 0 ? stride : 1;
float *data = new float[cl_image.numel()];
DDim ddim = cl_image.dims();
size_t N, C, H, W;
if (ddim.size() == 4) {
N = ddim[0];
if (N < 0) {
N = 1;
}
C = ddim[1];
H = ddim[2];
W = ddim[3];
} else if (ddim.size() == 1) {
N = 1;
C = ddim[0];
H = 1;
W = 1;
}
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
float *p = data;
half imageData[width * height * 4];
cl_int err;
cl_mem image = cl_image.GetCLImage();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin, region, 0, 0,
imageData, 0, NULL, NULL);
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
*p = Half2Float(imageData[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
if (err != CL_SUCCESS) {
// TODO: error handling
}
for (int i = 0; i < cl_image.numel(); i += stride) {
printer << data[i] << " ";
}
return printer;
}
#endif
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -46,27 +46,27 @@ class CLImage { ...@@ -46,27 +46,27 @@ class CLImage {
/* /*
* need call SetTensorData first * need call SetTensorData first
* */ * */
void InitCLImage(cl_context context) { void InitCLImage(cl_context context,cl_command_queue command_queue) {
if (tensor_data_ == nullptr) { if (tensor_data_ == nullptr) {
PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first"); PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
} }
if (tensor_dims_.size() <= 2) { if (tensor_dims_.size() <= 2) {
InitCLImage2C(context, tensor_data_, tensor_dims_); InitCLImage2C(context, command_queue,tensor_data_, tensor_dims_);
} else { } else {
InitCLImage(context, tensor_data_, tensor_dims_); InitCLImage(context, command_queue,tensor_data_, tensor_dims_);
} }
delete[](tensor_data_); delete[](tensor_data_);
tensor_data_ = nullptr; tensor_data_ = nullptr;
initialized_ = true; initialized_ = true;
} }
void InitEmptyImage(cl_context context, const DDim &dim) { void InitEmptyImage(cl_context context, cl_command_queue command_queue,const DDim &dim) {
if (tensor_data_ != nullptr) { if (tensor_data_ != nullptr) {
PADDLE_MOBILE_THROW_EXCEPTION( PADDLE_MOBILE_THROW_EXCEPTION(
" empty image tensor data shouldn't have value"); " empty image tensor data shouldn't have value");
} }
DLOG << " init empty image "; DLOG << " init empty image ";
InitCLImage(context, nullptr, dim); InitCLImage(context, command_queue,nullptr, dim);
initialized_ = true; initialized_ = true;
} }
...@@ -93,6 +93,8 @@ class CLImage { ...@@ -93,6 +93,8 @@ class CLImage {
* */ * */
inline size_t HeightOfOneBlock() const { return height_of_one_block_; } inline size_t HeightOfOneBlock() const { return height_of_one_block_; }
inline cl_command_queue CommandQueue() const{ return command_queue_;}
/* /*
* resize original tensor dim * resize original tensor dim
* */ * */
...@@ -122,7 +124,8 @@ class CLImage { ...@@ -122,7 +124,8 @@ class CLImage {
const DDim &dims() const { return tensor_dims_; } const DDim &dims() const { return tensor_dims_; }
private: private:
void InitCLImage2C(cl_context context, float *tensor_data, const DDim &dim) { void InitCLImage2C(cl_context context, cl_command_queue command_queue,float *tensor_data, const DDim &dim) {
command_queue_ = command_queue;
assert(dim.size() <= 2); assert(dim.size() <= 2);
int tdim[2] = {1, 1}; int tdim[2] = {1, 1};
if (dim.size() == 1) { if (dim.size() == 1) {
...@@ -145,7 +148,7 @@ class CLImage { ...@@ -145,7 +148,7 @@ class CLImage {
InitCLImage(context, width, height, imageData.get()); InitCLImage(context, width, height, imageData.get());
} }
void InitCLImage(cl_context context, int width, int height, void *data) { void InitCLImage(cl_context context,int width, int height, void *data) {
cl_image_format cf = {.image_channel_order = CL_RGBA, cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT}; .image_channel_data_type = CL_HALF_FLOAT};
cl_image_desc cid = { cl_image_desc cid = {
...@@ -174,10 +177,11 @@ class CLImage { ...@@ -174,10 +177,11 @@ class CLImage {
PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error "); PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
} }
} }
void InitCLImage(cl_context context, float *tensor_data, const DDim &dim) { void InitCLImage(cl_context context, cl_command_queue command_queue,float *tensor_data, const DDim &dim) {
DLOG << " tensor dim: " << dim; DLOG << " tensor dim: " << dim;
// NCHW -> [W * (C+3)/4, H * N] // NCHW -> [W * (C+3)/4, H * N]
tensor_dims_ = dim; tensor_dims_ = dim;
command_queue_ = command_queue;
if (tensor_data) { if (tensor_data) {
tensor_data_ = tensor_data; tensor_data_ = tensor_data;
} }
...@@ -240,6 +244,7 @@ class CLImage { ...@@ -240,6 +244,7 @@ class CLImage {
DDim image_dims_; DDim image_dims_;
float *tensor_data_; float *tensor_data_;
cl_context context_; cl_context context_;
cl_command_queue command_queue_;
}; };
void TensorToCLImage(Tensor *tensor, CLImage *image, void TensorToCLImage(Tensor *tensor, CLImage *image,
......
...@@ -37,7 +37,7 @@ limitations under the License. */ ...@@ -37,7 +37,7 @@ limitations under the License. */
#include "framework/cl/cl_image.h" #include "framework/cl/cl_image.h"
#endif #endif
int debug_to = 115; int debug_to = 4;
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -953,12 +953,13 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() { ...@@ -953,12 +953,13 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto cl_image = var->template GetMutable<framework::CLImage>(); auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
cl_command_queue command_queue = program_.scope->GetCLScpoe()->CommandQueue();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
// framework::DDim ddim = framework::make_ddim(desc.Dims()); // framework::DDim ddim = framework::make_ddim(desc.Dims());
framework::DDim ddim = cl_image->dims(); framework::DDim ddim = cl_image->dims();
DLOG << var_desc->Name(); DLOG << var_desc->Name();
cl_image->InitEmptyImage(context, ddim); cl_image->InitEmptyImage(context,command_queue, ddim);
} }
} }
} }
...@@ -1010,11 +1011,11 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() { ...@@ -1010,11 +1011,11 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
} else { } else {
auto cl_image = var->template GetMutable<framework::CLImage>(); auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
cl_command_queue command_queue = program_.scope->GetCLScpoe()->CommandQueue();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = cl_image->dims(); framework::DDim ddim = cl_image->dims();
// framework::DDim ddim = framework::make_ddim(desc.Dims()); // framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image->InitEmptyImage(context, ddim); cl_image->InitEmptyImage(context, command_queue,ddim);
} }
} }
} }
......
...@@ -57,10 +57,7 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {} ...@@ -57,10 +57,7 @@ void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template <typename Dtype> template <typename Dtype>
void OperatorBase<Dtype>::Run() { void OperatorBase<Dtype>::Run() {
DLOG << " begin run " << type_;
RunImpl(); RunImpl();
DLOG << " end run " << type_;
return;
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
DLOG << "-------------" << type_ << "----------------------------"; DLOG << "-------------" << type_ << "----------------------------";
vector<string> input_keys = GetInputKeys(); vector<string> input_keys = GetInputKeys();
...@@ -75,16 +72,8 @@ void OperatorBase<Dtype>::Run() { ...@@ -75,16 +72,8 @@ void OperatorBase<Dtype>::Run() {
if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
} else { } else {
CLImage *cl_image = vari->template GetMutable<framework::CLImage>(); CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue =
// scope_->GetCLScpoe()->CommandQueue(); Tensor
// *tmp ;
// CLImageToTensor(cl_image,tmp,commandQueue);
// tmp->Resize(cl_image->dims());
const float *input = cl_image->data<float>();
if (cl_image) { if (cl_image) {
DLOG << type_ << " input- " << key << "=" << cl_image->dims(); DLOG << type_ << " input- " << key << "="<<*cl_image;
// if(input)
// DLOG<<type_<<" input- "<<key<<"="<<*input;
} }
} }
...@@ -108,15 +97,8 @@ void OperatorBase<Dtype>::Run() { ...@@ -108,15 +97,8 @@ void OperatorBase<Dtype>::Run() {
} }
} else { } else {
CLImage *cl_image = vari->template GetMutable<framework::CLImage>(); CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue =
// scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ;
// CLImageToTensor(cl_image,tmp,commandQueue);
// tmp->Resize(cl_image->dims());
if (cl_image) { if (cl_image) {
const float *output = cl_image->data<float>(); DLOG << type_ << " output- " << key << "="<<*cl_image ;
DLOG << type_ << " output- " << key << "=" << cl_image->dims();
// if(output)
// DLOG<<type_<<" output- "<<key<<"="<<*output;
} }
} }
......
...@@ -49,11 +49,11 @@ bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) { ...@@ -49,11 +49,11 @@ bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) {
framework::CLImage *new_scale = new framework::CLImage(); framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims()); new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext()); new_scale->InitCLImage(this->cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
framework::CLImage *new_bias = new framework::CLImage(); framework::CLImage *new_bias = new framework::CLImage();
new_bias->SetTensorData(new_bias_ptr, variance->dims()); new_bias->SetTensorData(new_bias_ptr, variance->dims());
new_bias->InitCLImage(this->cl_helper_.CLContext()); new_bias->InitCLImage(this->cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
......
...@@ -29,8 +29,8 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -29,8 +29,8 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext()); param->Filter()->InitCLImage(cl_helper_.CLContext(),cl_helper_.CLCommandQueue());
param->Bias()->InitCLImage(cl_helper_.CLContext()); param->Bias()->InitCLImage(cl_helper_.CLContext(),cl_helper_.CLCommandQueue());
// const CL *mean = param->InputMean(); // const CL *mean = param->InputMean();
const framework::CLImage *mean = param->InputMean(); const framework::CLImage *mean = param->InputMean();
...@@ -62,12 +62,12 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init( ...@@ -62,12 +62,12 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
framework::CLImage *new_scale = new framework::CLImage(); framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims()); new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext()); new_scale->InitCLImage(this->cl_helper_.CLContext(),cl_helper_.CLCommandQueue());
framework::CLImage *new_bias = new framework::CLImage(); framework::CLImage *new_bias = new framework::CLImage();
new_bias->SetTensorData(new_bias_ptr, variance->dims()); new_bias->SetTensorData(new_bias_ptr, variance->dims());
new_bias->InitCLImage(this->cl_helper_.CLContext()); new_bias->InitCLImage(this->cl_helper_.CLContext(),cl_helper_.CLCommandQueue());
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
......
...@@ -25,8 +25,8 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) { ...@@ -25,8 +25,8 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext()); param->Filter()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
param->Bias()->InitCLImage(cl_helper_.CLContext()); param->Bias()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
......
...@@ -26,7 +26,7 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -26,7 +26,7 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext()); param->Filter()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
......
...@@ -27,7 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -27,7 +27,7 @@ bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Filter()->dims()[2] == param->Filter()->dims()[3] && param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1], param->Paddings()[0] == param->Paddings()[1],
"need equal"); "need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext()); param->Filter()->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset); param->SetOffset(offset);
......
...@@ -23,7 +23,7 @@ template <> ...@@ -23,7 +23,7 @@ template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init( bool ElementwiseAddKernel<GPU_CL, float>::Init(
ElementwiseAddParam<GPU_CL> *param) { ElementwiseAddParam<GPU_CL> *param) {
CLImage *bias = (CLImage*)param->InputY(); CLImage *bias = (CLImage*)param->InputY();
bias->InitCLImage(cl_helper_.CLContext()); bias->InitCLImage(cl_helper_.CLContext(),this->cl_helper_.CLCommandQueue());
if(bias->dims().size()==4){ if(bias->dims().size()==4){
this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl"); this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
}else if(param->InputY()->dims().size()==1){ }else if(param->InputY()->dims().size()==1){
......
...@@ -30,6 +30,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -30,6 +30,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
cl_int status; cl_int status;
auto output = param.Out(); auto output = param.Out();
const Tensor *input = param.InputX(); const Tensor *input = param.InputX();
DLOG<<*input;
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
int numel = input->numel(); int numel = input->numel();
cl_mem cl_image = output->GetCLImage(); cl_mem cl_image = output->GetCLImage();
...@@ -53,14 +54,6 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -53,14 +54,6 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL); NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
int len = 4 * 224 * 224;
half *out = new half[len];
cl_command_queue commandQueue = this->cl_helper_.CLCommandQueue();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {height, width, 1};
clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
0, NULL, NULL);
} }
template class FeedKernel<GPU_CL, float>; template class FeedKernel<GPU_CL, float>;
......
...@@ -19,44 +19,44 @@ namespace operators { ...@@ -19,44 +19,44 @@ namespace operators {
template <> template <>
bool FetchKernel<GPU_CL, float>::Init(FetchParam<GPU_CL> *param) { bool FetchKernel<GPU_CL, float>::Init(FetchParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl"); // this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl");
return true; return true;
} }
template <> template <>
void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) { void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0); // auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX()); // auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX());
//
auto input = param.InputX()->GetCLImage(); // auto input = param.InputX()->GetCLImage();
auto *out = param.Out(); // auto *out = param.Out();
//
const auto &dims = param.InputX()->dims(); // const auto &dims = param.InputX()->dims();
const int N = dims[0]; // const int N = dims[0];
const int C = dims[1]; // const int C = dims[1];
const int in_height = dims[2]; // const int in_height = dims[2];
const int in_width = dims[3]; // const int in_width = dims[3];
//
int size_ch = in_height * in_width; // int size_ch = in_height * in_width;
int size_block = size_ch * 4; // int size_block = size_ch * 4;
int size_batch = size_ch * C; // int size_batch = size_ch * C;
//
// need create outputBuffer // // need create outputBuffer
cl_image_format imageFormat; // cl_image_format imageFormat;
imageFormat.image_channel_order = CL_RGBA; // imageFormat.image_channel_order = CL_RGBA;
imageFormat.image_channel_data_type = CL_FLOAT; // imageFormat.image_channel_data_type = CL_FLOAT;
cl_mem outputBuffer; // cl_mem outputBuffer;
//
clSetKernelArg(kernel, 0, sizeof(int), &in_height); // clSetKernelArg(kernel, 0, sizeof(int), &in_height);
clSetKernelArg(kernel, 1, sizeof(int), &in_width); // clSetKernelArg(kernel, 1, sizeof(int), &in_width);
clSetKernelArg(kernel, 2, sizeof(int), &size_ch); // clSetKernelArg(kernel, 2, sizeof(int), &size_ch);
clSetKernelArg(kernel, 3, sizeof(int), &size_block); // clSetKernelArg(kernel, 3, sizeof(int), &size_block);
clSetKernelArg(kernel, 4, sizeof(int), &size_batch); // clSetKernelArg(kernel, 4, sizeof(int), &size_batch);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &input); // clSetKernelArg(kernel, 5, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &outputBuffer); // clSetKernelArg(kernel, 6, sizeof(cl_mem), &outputBuffer);
//
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); // default_work_size.data(), NULL, 0, NULL, NULL);
} }
template class FetchKernel<GPU_CL, float>; template class FetchKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册