diff --git a/src/framework/cl/cl_half.cpp b/src/framework/cl/cl_half.cpp index d511b950dc787e83439094ae8d9be76af817b4b0..40f94c9d4d267ebb1c0a320da716bbf731d52244 100644 --- a/src/framework/cl/cl_half.cpp +++ b/src/framework/cl/cl_half.cpp @@ -487,13 +487,13 @@ static const uint8_t shifttable[512] = { 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d}; -half_t float2half(float f) { +half_t Float2Half(float f) { uint32_t v = *reinterpret_cast(&f); return basetable[(v >> 23) & 0x1ff] + ((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]); } -float half2float(half_t h) { +float Half2Float(half_t h) { uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + exponenttable[h >> 10]; return *reinterpret_cast(&v); @@ -501,12 +501,12 @@ float half2float(half_t h) { void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) { for (int i = 0; i < count; ++i) { - h_array[i] = float2half(f_array[i]); + h_array[i] = Float2Half(f_array[i]); } } void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) { for (int i = 0; i < count; ++i) { - f_array[i] = float2half(h_array[i]); + f_array[i] = Half2Float(h_array[i]); } } diff --git a/src/framework/cl/cl_half.h b/src/framework/cl/cl_half.h index ee6e2c7621af2749c520e5a3b242a099c0c28f0e..fc864912b090adb1b673e4a2e1b35d832cada326 100644 --- a/src/framework/cl/cl_half.h +++ b/src/framework/cl/cl_half.h @@ -17,9 +17,9 @@ limitations under the License. */ typedef uint16_t half_t; -half_t float2half(float f); +half_t Float2Half(float f); -float half2float(half_t h); +float Half2Float(half_t h); void FloatArray2HalfArray(float *f_array, half_t *h_array, int count); diff --git a/src/framework/cl/cl_image.cpp b/src/framework/cl/cl_image.cpp index e59166df4c39d549ba62787175df1b2bead58907..447e08a7012018eae1d09b056a77a233e74f2bee 100644 --- a/src/framework/cl/cl_image.cpp +++ b/src/framework/cl/cl_image.cpp @@ -52,7 +52,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor, for (int h = 0; h < H; h++) { size_t i2 = (i1 << 2) + c % 4; for (int w = 0; w < W; w++) { - *p = half2float(imageData[i2]); + *p = Half2Float(imageData[i2]); i2 += 4; p++; } @@ -106,7 +106,7 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image, for (int h = 0; h < H; h++) { size_t i2 = (i1 << 2) + c % 4; for (int w = 0; w < W; w++) { - imageData[i2] = float2half(*p); + imageData[i2] = Float2Half(*p); i2 += 4; p++; } diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index d971ad2c9bd847190fcea76b92b5ae9a38fe75ef..b2b1a1071ab3e0267ece35936a5a5d39c5b1aff6 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -179,7 +179,7 @@ class CLImage { } assert(i2 < width * height * 4); - imageData[i2] = float2half(*p); + imageData[i2] = Float2Half(*p); i2 += 4; p++; // count++; @@ -206,7 +206,6 @@ class CLImage { &err); if (err != CL_SUCCESS) { - // TODO(HaiPeng): error handling CL_CHECK_ERRORS(err); PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error "); } diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index 5c1f5ea33df06173796ce4b2ec692be19a9a3e4a..33ba7d2e17f874da15af4406dea78534d6a34cb0 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -29,23 +29,40 @@ template <> void FeedKernel::Compute(const FeedParam ¶m) { auto kernel = this->cl_helper_.KernelAt(0); cl_int status; + DLOG << " feed 0"; auto output = param.Out(); + DLOG << " feed 1"; const Tensor *input = param.InputX(); + DLOG << " feed 2"; const float *input_data = nullptr; + DLOG << " feed 3"; input_data = input->data(); + DLOG << " feed 4"; cl_mem cl_image = output->GetCLImage(); + DLOG << " feed 5"; + int height = output->dims()[2]; int width = output->dims()[3]; + DLOG << output->dims(); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &height); + CL_CHECK_ERRORS(status); size_t global_work_size[2] = {height, width}; - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, + status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + CL_CHECK_ERRORS(status); + } template class FeedKernel;