diff --git a/mobile/src/framework/cl/cl_image.cpp b/mobile/src/framework/cl/cl_image.cpp index 7c716f370bfb1cc26d36146a41aefab53a99da2e..4f4b0d8883586e221b9178a104a7f295fab06f83 100644 --- a/mobile/src/framework/cl/cl_image.cpp +++ b/mobile/src/framework/cl/cl_image.cpp @@ -38,21 +38,31 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor, cl_context context, auto input_image = cl_image->GetCLImage(); - clSetKernelArg(kernel, 0, sizeof(int), &in_height); - clSetKernelArg(kernel, 1, sizeof(int), &in_width); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &input_image); - clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(int), &in_height); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(int), &in_width); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &input_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); + CL_CHECK_ERRORS(status); int size_ch = in_height * in_width; int size_block = size_ch * 4; int size_batch = size_ch * C; - clSetKernelArg(kernel, 4, sizeof(int), &size_ch); - clSetKernelArg(kernel, 5, sizeof(int), &size_block); - clSetKernelArg(kernel, 6, sizeof(int), &size_batch); - clSetKernelArg(kernel, 7, sizeof(int), &C); + status = clSetKernelArg(kernel, 4, sizeof(int), &size_ch); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 5, sizeof(int), &size_block); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 6, sizeof(int), &size_batch); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 7, sizeof(int), &C); + CL_CHECK_ERRORS(status); size_t global_work_size[3] = {(new_dims[1] + 3) / 4, new_dims[3], new_dims[0] * new_dims[2]}; - clEnqueueNDRangeKernel(commandQueue, kernel, 3, NULL, global_work_size, NULL, - 0, NULL, NULL); + status = clEnqueueNDRangeKernel(commandQueue, kernel, 3, NULL, + global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); memcpy(tensor->data(), out_cl_tensor.Data(), tensor->memory_size()); } diff --git a/mobile/src/operators/kernel/cl/batchnorm_kernel.cpp b/mobile/src/operators/kernel/cl/batchnorm_kernel.cpp index ce57e8dc2e1504e0e3a1fe957bded6de5312d5fa..6e5039cf050b04c681f738f06f8cd3baf2ccbb75 100644 --- a/mobile/src/operators/kernel/cl/batchnorm_kernel.cpp +++ b/mobile/src/operators/kernel/cl/batchnorm_kernel.cpp @@ -87,18 +87,20 @@ void BatchNormKernel::Compute( DLOG << out_width; DLOG << *param.OutputY(); cl_int status; - clSetKernelArg(kernel, 0, sizeof(cl_int), &out_width); + status = clSetKernelArg(kernel, 0, sizeof(cl_int), &out_width); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &input); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &input); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &new_scale); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &new_scale); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 3, sizeof(cl_mem), &new_bias); + status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &new_bias); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 4, sizeof(cl_mem), &out); + status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &out); + CL_CHECK_ERRORS(status); + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + default_work_size.data(), NULL, 0, NULL, NULL); CL_CHECK_ERRORS(status); - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); } template class BatchNormKernel; diff --git a/mobile/src/operators/kernel/cl/fetch_kernel.cpp b/mobile/src/operators/kernel/cl/fetch_kernel.cpp index 2ce3e928a12ce752236709211ce06a52e3fcd9c3..e1e1522a449685902dd64369bcc15798d1376a72 100644 --- a/mobile/src/operators/kernel/cl/fetch_kernel.cpp +++ b/mobile/src/operators/kernel/cl/fetch_kernel.cpp @@ -59,23 +59,31 @@ void FetchKernel::Compute(const FetchParam ¶m) { out_cl_tensor.Resize(out->dims()); cl_mem outBuffer = out_cl_tensor.mutable_data(); - clSetKernelArg(kernel, 0, sizeof(int), &in_height); - clSetKernelArg(kernel, 1, sizeof(int), &in_width); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &input); - clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); - clSetKernelArg(kernel, 4, sizeof(int), &size_ch); - clSetKernelArg(kernel, 5, sizeof(int), &size_block); - clSetKernelArg(kernel, 6, sizeof(int), &size_batch); - clSetKernelArg(kernel, 7, sizeof(int), &in_ch); + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(int), &in_height); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(int), &in_width); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &input); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 4, sizeof(int), &size_ch); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 5, sizeof(int), &size_block); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 6, sizeof(int), &size_batch); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 7, sizeof(int), &in_ch); + CL_CHECK_ERRORS(status); // cl_event wait_event = param.InpdutX()->GetClEvent(); - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + default_work_size.data(), NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); - // printf(" before finish \n"); - // clFlsh(this->cl_helper_.CLCommandQueue()); clFinish(this->cl_helper_.CLCommandQueue()); - // printf(" after finish \n"); DLOG << "fetch kernel out dims = " << out->dims(); DLOG << "fetch kernel out memory size = " << out->memory_size(); diff --git a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp index 0486852d5e6ff2d9104d3ab7bfeb032097e6cda2..a8307d05d5b493a983e33cebdb331bdc09c27fd9 100644 --- a/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp +++ b/mobile/src/operators/kernel/cl/instancenorm_kernel.cpp @@ -76,24 +76,26 @@ void InstanceNormKernel::Compute( << " " << local_work_size[2]; cl_int status; - clSetKernelArg(kernel, 0, sizeof(cl_int), &w); + status = clSetKernelArg(kernel, 0, sizeof(cl_int), &w); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 1, sizeof(cl_int), &h); + status = clSetKernelArg(kernel, 1, sizeof(cl_int), &h); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 2, sizeof(cl_int), &c_group); + status = clSetKernelArg(kernel, 2, sizeof(cl_int), &c_group); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 3, sizeof(cl_int), &local_work_size1); + status = clSetKernelArg(kernel, 3, sizeof(cl_int), &local_work_size1); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 4, sizeof(cl_int), &local_work_size2); + status = clSetKernelArg(kernel, 4, sizeof(cl_int), &local_work_size2); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon); + status = clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 6, sizeof(cl_mem), &input); + status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &input); CL_CHECK_ERRORS(status); - clSetKernelArg(kernel, 7, sizeof(cl_mem), &out); + status = clSetKernelArg(kernel, 7, sizeof(cl_mem), &out); + CL_CHECK_ERRORS(status); + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + work_size, local_work_size, 0, NULL, NULL); CL_CHECK_ERRORS(status); - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - work_size, local_work_size, 0, NULL, NULL); } template class InstanceNormKernel; diff --git a/mobile/src/operators/kernel/cl/pool_kernel.cpp b/mobile/src/operators/kernel/cl/pool_kernel.cpp index df79ababadd4c1b959a1eb0fe237a45ab97a6bd8..ed0731c31b01728592336abcd1e282fc74f5ca11 100644 --- a/mobile/src/operators/kernel/cl/pool_kernel.cpp +++ b/mobile/src/operators/kernel/cl/pool_kernel.cpp @@ -57,23 +57,38 @@ void PoolKernel::Compute(const PoolParam ¶m) { const int ksize_h = ksize[0]; const int ksize_w = ksize[1]; - clSetKernelArg(kernel, 0, sizeof(cl_int), &in_height); - clSetKernelArg(kernel, 1, sizeof(cl_int), &in_width); - clSetKernelArg(kernel, 2, sizeof(cl_int), &out_height); - clSetKernelArg(kernel, 3, sizeof(cl_int), &out_width); - clSetKernelArg(kernel, 4, sizeof(cl_int), &pad_top); - clSetKernelArg(kernel, 5, sizeof(cl_int), &pad_left); - clSetKernelArg(kernel, 6, sizeof(cl_int), &stride_h); - clSetKernelArg(kernel, 7, sizeof(cl_int), &stride_w); - clSetKernelArg(kernel, 8, sizeof(cl_int), &ksize_h); - clSetKernelArg(kernel, 9, sizeof(cl_int), &ksize_w); - clSetKernelArg(kernel, 10, sizeof(cl_mem), &input); - clSetKernelArg(kernel, 11, sizeof(cl_mem), &out); + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_height); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_int), &in_width); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(cl_int), &out_height); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(cl_int), &out_width); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 4, sizeof(cl_int), &pad_top); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 5, sizeof(cl_int), &pad_left); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 6, sizeof(cl_int), &stride_h); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 7, sizeof(cl_int), &stride_w); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 8, sizeof(cl_int), &ksize_h); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 9, sizeof(cl_int), &ksize_w); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 10, sizeof(cl_mem), &input); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 11, sizeof(cl_mem), &out); + CL_CHECK_ERRORS(status); // cl_event out_event = param.Output()->GetClEvent(); // cl_event wait_event = param.Input()->GetClEvent(); - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + status = + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + default_work_size.data(), NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); } template class PoolKernel; diff --git a/mobile/src/operators/kernel/cl/relu_kernel.cpp b/mobile/src/operators/kernel/cl/relu_kernel.cpp index dcd1275e3cf2fc7a226f32371e81b7b1ba91b9d2..f166963d946e85d5923b54980cfbd265c1b6560d 100644 --- a/mobile/src/operators/kernel/cl/relu_kernel.cpp +++ b/mobile/src/operators/kernel/cl/relu_kernel.cpp @@ -43,8 +43,11 @@ void ReluKernel::Compute(const ReluParam& param) { auto outputImage = output->GetCLImage(); // auto tImage = // const_cast&>(param).getMidImage().GetCLImage(); - clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); + CL_CHECK_ERRORS(status); // clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage); // clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage); // clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage); @@ -54,8 +57,9 @@ void ReluKernel::Compute(const ReluParam& param) { // cl_event out_event = param.Out()->GetClEvent(); // cl_event wait_event = param.InputX()->GetClEvent(); - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, - work_size, NULL, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3, // NULL, // work_size, NULL, 0, NULL, NULL); diff --git a/mobile/src/operators/kernel/cl/scale_kernel.cpp b/mobile/src/operators/kernel/cl/scale_kernel.cpp index ac0ac05beadd95a7f0a0e50dae0e51b1fbc78547..4ab2be7c3fdd304a6c082b92180e931888855a82 100644 --- a/mobile/src/operators/kernel/cl/scale_kernel.cpp +++ b/mobile/src/operators/kernel/cl/scale_kernel.cpp @@ -36,14 +36,22 @@ void ScaleKernel::Compute(const ScaleParam& param) { auto inputImage = input->GetCLImage(); auto outputImage = output->GetCLImage(); int out_width = (output->dims().size() == 4) ? output->dims()[3] : 1; - clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); - clSetKernelArg(kernel, 2, sizeof(float), &scale); - clSetKernelArg(kernel, 3, sizeof(float), &bias); - clSetKernelArg(kernel, 4, sizeof(int), &out_width); - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, - default_work_size.size(), NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(float), &scale); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 3, sizeof(float), &bias); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 4, sizeof(int), &out_width); + CL_CHECK_ERRORS(status); + status = clEnqueueNDRangeKernel( + this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL, + default_work_size.data(), NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); } template class ScaleKernel; diff --git a/mobile/src/operators/kernel/cl/tanh_kernel.cpp b/mobile/src/operators/kernel/cl/tanh_kernel.cpp index d3b921ae58bfff459157db343b3394eab1100c41..5c63a3606dab53a6f3c85ab61302db357a4cecb1 100644 --- a/mobile/src/operators/kernel/cl/tanh_kernel.cpp +++ b/mobile/src/operators/kernel/cl/tanh_kernel.cpp @@ -32,12 +32,16 @@ void TanhKernel::Compute(const TanhParam& param) { auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); auto inputImage = input->GetCLImage(); auto outputImage = output->GetCLImage(); - clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); + CL_CHECK_ERRORS(status); const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, - work_size, NULL, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, + NULL, work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); } template class TanhKernel; diff --git a/mobile/tools/python/fluidtools/run.py b/mobile/tools/python/fluidtools/run.py index 5da371f8d95d5928520347045b492b8653a861be..a77943e2af40361876e950c316eda67cb6457191 100644 --- a/mobile/tools/python/fluidtools/run.py +++ b/mobile/tools/python/fluidtools/run.py @@ -482,73 +482,74 @@ def check_mobile_results(args, fuse, mem_opt): pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1) pp_yellow("paddle mobile results are : ", 1) pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1) - if not fuse and not mem_opt: - error_index = None - error_values1 = None - error_values2 = None - checked_names = [] - fetch_names = [] - for fetch in fetches: - fetch_names.append(fetch.name) - for index in op_cache: - op_output_var_name, op = op_cache[index] - if mem_opt: - found_in_fetch = False - for fetch in fetches: - if op_output_var_name == fetch.name: - found_in_fetch = True - break - if not found_in_fetch: + if not fuse and not mem_opt: + pp_yellow("checking individual ops : ", 1) + error_index = None + error_values1 = None + error_values2 = None + checked_names = [] + fetch_names = [] + for fetch in fetches: + fetch_names.append(fetch.name) + for index in op_cache: + op_output_var_name, op = op_cache[index] + if mem_opt: + found_in_fetch = False + for fetch in fetches: + if op_output_var_name == fetch.name: + found_in_fetch = True + break + if not found_in_fetch: + continue + if not op_output_var_name in output_var_cache: continue - if not op_output_var_name in output_var_cache: - continue - if not op_output_var_name in mobile_var_cache: - continue - if fuse or mem_opt: - if op_output_var_name not in fetch_names: + if not op_output_var_name in mobile_var_cache: continue - values1 = output_var_cache[op_output_var_name] - values2 = mobile_var_cache[op_output_var_name] - shape = get_var_shape(op_output_var_name) if check_shape else [] - if len(values1) + len(shape) != len(values2): - error_index = index - for i in range(len(shape)): - v1 = shape[i] - v2 = values2[i] - if v1 != v2: + if fuse or mem_opt: + if op_output_var_name not in fetch_names: + continue + values1 = output_var_cache[op_output_var_name] + values2 = mobile_var_cache[op_output_var_name] + shape = get_var_shape(op_output_var_name) if check_shape else [] + if len(values1) + len(shape) != len(values2): error_index = index - break - if error_index == None: - for i in range(len(values1)): - v1 = values1[i] - v2 = values2[len(shape) + i] - if abs(v1 - v2) > diff_threshold: + for i in range(len(shape)): + v1 = shape[i] + v2 = values2[i] + if v1 != v2: error_index = index break - checked_names.append(op_output_var_name) - if error_index != None: - error_values1 = values1 - error_values2 = values2 - break - if error_index == None: - for name in fetch_names: - if name not in checked_names: - error_index = -1 + if error_index == None: + for i in range(len(values1)): + v1 = values1[i] + v2 = values2[len(shape) + i] + if abs(v1 - v2) > diff_threshold: + error_index = index + break + checked_names.append(op_output_var_name) + if error_index != None: + error_values1 = values1 + error_values2 = values2 break - if error_index == None: - pp_green("outputs are all correct", 1) - elif error_index == -1: - pp_red("outputs are missing") - else: - error_values1 = np.array(error_values1) - error_values2 = np.array(error_values2) - # pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}") - pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format( - error_index,op_cache[error_index][1].type,op_output_var_name), 1) - pp_red("fluid results are : ", 1) - pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1) - pp_yellow("paddle mobile results are : ", 1) - pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1) + if error_index == None: + for name in fetch_names: + if name not in checked_names: + error_index = -1 + break + if error_index == None: + pp_green("outputs are all correct", 1) + elif error_index == -1: + pp_red("outputs are missing") + else: + error_values1 = np.array(error_values1) + error_values2 = np.array(error_values2) + # pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}") + pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format( + error_index,op_cache[error_index][1].type,op_output_var_name), 1) + pp_red("fluid results are : ", 1) + pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1) + pp_yellow("paddle mobile results are : ", 1) + pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1) # print(output_var_cache) # print(mobile_var_cache)