提交 462f4649 编写于 作者: Y Yanzhan Yang 提交者: zp7

add cl status check test=develop (#1956)

上级 31ee212a
...@@ -38,21 +38,31 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor, cl_context context, ...@@ -38,21 +38,31 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor, cl_context context,
auto input_image = cl_image->GetCLImage(); auto input_image = cl_image->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(int), &in_height); cl_int status;
clSetKernelArg(kernel, 1, sizeof(int), &in_width); status = clSetKernelArg(kernel, 0, sizeof(int), &in_height);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &input_image); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); status = clSetKernelArg(kernel, 1, sizeof(int), &in_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &input_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer);
CL_CHECK_ERRORS(status);
int size_ch = in_height * in_width; int size_ch = in_height * in_width;
int size_block = size_ch * 4; int size_block = size_ch * 4;
int size_batch = size_ch * C; int size_batch = size_ch * C;
clSetKernelArg(kernel, 4, sizeof(int), &size_ch); status = clSetKernelArg(kernel, 4, sizeof(int), &size_ch);
clSetKernelArg(kernel, 5, sizeof(int), &size_block); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 6, sizeof(int), &size_batch); status = clSetKernelArg(kernel, 5, sizeof(int), &size_block);
clSetKernelArg(kernel, 7, sizeof(int), &C); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(int), &size_batch);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(int), &C);
CL_CHECK_ERRORS(status);
size_t global_work_size[3] = {(new_dims[1] + 3) / 4, new_dims[3], size_t global_work_size[3] = {(new_dims[1] + 3) / 4, new_dims[3],
new_dims[0] * new_dims[2]}; new_dims[0] * new_dims[2]};
clEnqueueNDRangeKernel(commandQueue, kernel, 3, NULL, global_work_size, NULL, status = clEnqueueNDRangeKernel(commandQueue, kernel, 3, NULL,
0, NULL, NULL); global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
memcpy(tensor->data<float>(), out_cl_tensor.Data<float>(), memcpy(tensor->data<float>(), out_cl_tensor.Data<float>(),
tensor->memory_size()); tensor->memory_size());
} }
......
...@@ -87,18 +87,20 @@ void BatchNormKernel<GPU_CL, float>::Compute( ...@@ -87,18 +87,20 @@ void BatchNormKernel<GPU_CL, float>::Compute(
DLOG << out_width; DLOG << out_width;
DLOG << *param.OutputY(); DLOG << *param.OutputY();
cl_int status; cl_int status;
clSetKernelArg(kernel, 0, sizeof(cl_int), &out_width); status = clSetKernelArg(kernel, 0, sizeof(cl_int), &out_width);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &input); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &new_scale); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &new_scale);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &new_bias); status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &new_bias);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &out); status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &out);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} }
template class BatchNormKernel<GPU_CL, float>; template class BatchNormKernel<GPU_CL, float>;
......
...@@ -59,23 +59,31 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) { ...@@ -59,23 +59,31 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
out_cl_tensor.Resize(out->dims()); out_cl_tensor.Resize(out->dims());
cl_mem outBuffer = out_cl_tensor.mutable_data<float>(); cl_mem outBuffer = out_cl_tensor.mutable_data<float>();
clSetKernelArg(kernel, 0, sizeof(int), &in_height); cl_int status;
clSetKernelArg(kernel, 1, sizeof(int), &in_width); status = clSetKernelArg(kernel, 0, sizeof(int), &in_height);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &input); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); status = clSetKernelArg(kernel, 1, sizeof(int), &in_width);
clSetKernelArg(kernel, 4, sizeof(int), &size_ch); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 5, sizeof(int), &size_block); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 6, sizeof(int), &size_batch); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 7, sizeof(int), &in_ch); status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 4, sizeof(int), &size_ch);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 5, sizeof(int), &size_block);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(int), &size_batch);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(int), &in_ch);
CL_CHECK_ERRORS(status);
// cl_event wait_event = param.InpdutX()->GetClEvent(); // cl_event wait_event = param.InpdutX()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
// printf(" before finish \n");
// clFlsh(this->cl_helper_.CLCommandQueue());
clFinish(this->cl_helper_.CLCommandQueue()); clFinish(this->cl_helper_.CLCommandQueue());
// printf(" after finish \n");
DLOG << "fetch kernel out dims = " << out->dims(); DLOG << "fetch kernel out dims = " << out->dims();
DLOG << "fetch kernel out memory size = " << out->memory_size(); DLOG << "fetch kernel out memory size = " << out->memory_size();
......
...@@ -76,24 +76,26 @@ void InstanceNormKernel<GPU_CL, float>::Compute( ...@@ -76,24 +76,26 @@ void InstanceNormKernel<GPU_CL, float>::Compute(
<< " " << local_work_size[2]; << " " << local_work_size[2];
cl_int status; cl_int status;
clSetKernelArg(kernel, 0, sizeof(cl_int), &w); status = clSetKernelArg(kernel, 0, sizeof(cl_int), &w);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 1, sizeof(cl_int), &h); status = clSetKernelArg(kernel, 1, sizeof(cl_int), &h);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 2, sizeof(cl_int), &c_group); status = clSetKernelArg(kernel, 2, sizeof(cl_int), &c_group);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 3, sizeof(cl_int), &local_work_size1); status = clSetKernelArg(kernel, 3, sizeof(cl_int), &local_work_size1);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 4, sizeof(cl_int), &local_work_size2); status = clSetKernelArg(kernel, 4, sizeof(cl_int), &local_work_size2);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon); status = clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &input); status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 7, sizeof(cl_mem), &out); status = clSetKernelArg(kernel, 7, sizeof(cl_mem), &out);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
work_size, local_work_size, 0, NULL, NULL); work_size, local_work_size, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} }
template class InstanceNormKernel<GPU_CL, float>; template class InstanceNormKernel<GPU_CL, float>;
......
...@@ -57,23 +57,38 @@ void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) { ...@@ -57,23 +57,38 @@ void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) {
const int ksize_h = ksize[0]; const int ksize_h = ksize[0];
const int ksize_w = ksize[1]; const int ksize_w = ksize[1];
clSetKernelArg(kernel, 0, sizeof(cl_int), &in_height); cl_int status;
clSetKernelArg(kernel, 1, sizeof(cl_int), &in_width); status = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_height);
clSetKernelArg(kernel, 2, sizeof(cl_int), &out_height); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 3, sizeof(cl_int), &out_width); status = clSetKernelArg(kernel, 1, sizeof(cl_int), &in_width);
clSetKernelArg(kernel, 4, sizeof(cl_int), &pad_top); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 5, sizeof(cl_int), &pad_left); status = clSetKernelArg(kernel, 2, sizeof(cl_int), &out_height);
clSetKernelArg(kernel, 6, sizeof(cl_int), &stride_h); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 7, sizeof(cl_int), &stride_w); status = clSetKernelArg(kernel, 3, sizeof(cl_int), &out_width);
clSetKernelArg(kernel, 8, sizeof(cl_int), &ksize_h); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 9, sizeof(cl_int), &ksize_w); status = clSetKernelArg(kernel, 4, sizeof(cl_int), &pad_top);
clSetKernelArg(kernel, 10, sizeof(cl_mem), &input); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 11, sizeof(cl_mem), &out); status = clSetKernelArg(kernel, 5, sizeof(cl_int), &pad_left);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(cl_int), &stride_h);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(cl_int), &stride_w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 8, sizeof(cl_int), &ksize_h);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 9, sizeof(cl_int), &ksize_w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 10, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 11, sizeof(cl_mem), &out);
CL_CHECK_ERRORS(status);
// cl_event out_event = param.Output()->GetClEvent(); // cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} }
template class PoolKernel<GPU_CL, float>; template class PoolKernel<GPU_CL, float>;
......
...@@ -43,8 +43,11 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) { ...@@ -43,8 +43,11 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
auto outputImage = output->GetCLImage(); auto outputImage = output->GetCLImage();
// auto tImage = // auto tImage =
// const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage(); // const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); cl_int status;
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
CL_CHECK_ERRORS(status);
// clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage); // clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage);
// clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage); // clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage);
// clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage); // clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage);
...@@ -54,8 +57,9 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) { ...@@ -54,8 +57,9 @@ void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
// cl_event out_event = param.Out()->GetClEvent(); // cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent(); // cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
work_size, NULL, 0, NULL, NULL); NULL, work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
// NULL, // NULL,
// work_size, NULL, 0, NULL, NULL); // work_size, NULL, 0, NULL, NULL);
......
...@@ -36,14 +36,22 @@ void ScaleKernel<GPU_CL, float>::Compute(const ScaleParam<GPU_CL>& param) { ...@@ -36,14 +36,22 @@ void ScaleKernel<GPU_CL, float>::Compute(const ScaleParam<GPU_CL>& param) {
auto inputImage = input->GetCLImage(); auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage(); auto outputImage = output->GetCLImage();
int out_width = (output->dims().size() == 4) ? output->dims()[3] : 1; int out_width = (output->dims().size() == 4) ? output->dims()[3] : 1;
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); cl_int status;
clSetKernelArg(kernel, 2, sizeof(float), &scale); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 3, sizeof(float), &bias); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 4, sizeof(int), &out_width); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, CL_CHECK_ERRORS(status);
default_work_size.size(), NULL, status = clSetKernelArg(kernel, 2, sizeof(float), &scale);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(float), &bias);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 4, sizeof(int), &out_width);
CL_CHECK_ERRORS(status);
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL); default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} }
template class ScaleKernel<GPU_CL, float>; template class ScaleKernel<GPU_CL, float>;
......
...@@ -32,12 +32,16 @@ void TanhKernel<GPU_CL, float>::Compute(const TanhParam<GPU_CL>& param) { ...@@ -32,12 +32,16 @@ void TanhKernel<GPU_CL, float>::Compute(const TanhParam<GPU_CL>& param) {
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
auto inputImage = input->GetCLImage(); auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage(); auto outputImage = output->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); cl_int status;
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
CL_CHECK_ERRORS(status);
const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
work_size, NULL, 0, NULL, NULL); NULL, work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} }
template class TanhKernel<GPU_CL, float>; template class TanhKernel<GPU_CL, float>;
......
...@@ -483,6 +483,7 @@ def check_mobile_results(args, fuse, mem_opt): ...@@ -483,6 +483,7 @@ def check_mobile_results(args, fuse, mem_opt):
pp_yellow("paddle mobile results are : ", 1) pp_yellow("paddle mobile results are : ", 1)
pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1) pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1)
if not fuse and not mem_opt: if not fuse and not mem_opt:
pp_yellow("checking individual ops : ", 1)
error_index = None error_index = None
error_values1 = None error_values1 = None
error_values2 = None error_values2 = None
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册