提交 50f92c8f 编写于 作者: Z zdenop 提交者: GitHub

Merge pull request #843 from stweil/opencl

opencl: Remove unused code
......@@ -1175,38 +1175,6 @@ void kernel_ThresholdRectToPix_OneChan(
pix[w] = word;
}
}
)
KERNEL(
\n#define RED_SHIFT 24\n
\n#define GREEN_SHIFT 16\n
\n#define BLUE_SHIFT 8\n
\n#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))\n
\n
\n__attribute__((reqd_work_group_size(256, 1, 1)))\n
\n__kernel\n
\nvoid kernel_RGBToGray(
__global const unsigned int *srcData,
__global unsigned char *dstData,
int srcWPL,
int dstWPL,
int height,
int width,
float rwt,
float gwt,
float bwt ) {
// pixel index
int pixelIdx = get_global_id(0);
if (pixelIdx >= height*width) return;
unsigned int word = srcData[pixelIdx];
int output = (rwt * ((word >> RED_SHIFT) & 0xff) +
gwt * ((word >> GREEN_SHIFT) & 0xff) +
bwt * ((word >> BLUE_SHIFT) & 0xff) + 0.5f);
// SET_DATA_BYTE
dstData[pixelIdx] = output;
}
)
; // close char*
......
......@@ -3611,170 +3611,4 @@ bool OpenclDevice::selectedDeviceIsOpenCL() {
return (device.type == DS_DEVICE_OPENCL_DEVICE);
}
bool OpenclDevice::selectedDeviceIsNativeCPU() {
ds_device device = getDeviceSelection();
return (device.type == DS_DEVICE_NATIVE_CPU);
}
/*!
* pixConvertRGBToGray() from leptonica, converted to opencl kernel
*
* Input: pix (32 bpp RGB)
* rwt, gwt, bwt (non-negative; these should add to 1.0,
* or use 0.0 for default)
* Return: 8 bpp pix, or null on error
*
* Notes:
* (1) Use a weighted average of the RGB values.
*/
#define SET_DATA_BYTE(pdata, n, val) \
(*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))
Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source
float rwt, float gwt, float bwt) {
PERF_COUNT_START("pixConvertRGBToGrayOCL")
Pix *dstPix; // 8-bit destination
if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return nullptr;
if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) {
// magic numbers from leptonica
rwt = 0.3;
gwt = 0.5;
bwt = 0.2;
}
// normalize
float sum = rwt + gwt + bwt;
rwt /= sum;
gwt /= sum;
bwt /= sum;
// source pix
int w, h;
pixGetDimensions(srcPix, &w, &h, nullptr);
// printf("Image is %i x %i\n", w, h);
unsigned int *srcData = pixGetData(srcPix);
int srcWPL = pixGetWpl(srcPix);
int srcSize = srcWPL * h * sizeof(unsigned int);
// destination pix
if ((dstPix = pixCreate(w, h, 8)) == nullptr) return nullptr;
pixCopyResolution(dstPix, srcPix);
unsigned int *dstData = pixGetData(dstPix);
int dstWPL = pixGetWpl(dstPix);
int dstWords = dstWPL * h;
int dstSize = dstWords * sizeof(unsigned int);
// printf("dstSize = %i\n", dstSize);
PERF_COUNT_SUB("pix setup")
// opencl objects
cl_int clStatus;
KernelEnv kEnv;
SetKernelEnv(&kEnv);
// source buffer
cl_mem srcBuffer =
clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
srcSize, srcData, &clStatus);
CHECK_OPENCL(clStatus, "clCreateBuffer srcBuffer");
// destination buffer
cl_mem dstBuffer =
clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
dstSize, dstData, &clStatus);
CHECK_OPENCL(clStatus, "clCreateBuffer dstBuffer");
// setup work group size parameters
int block_size = 256;
int numWorkGroups = ((h * w + block_size - 1) / block_size);
int numThreads = block_size * numWorkGroups;
size_t local_work_size[] = {static_cast<size_t>(block_size)};
size_t global_work_size[] = {static_cast<size_t>(numThreads)};
// printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h);
/* compile kernel */
kEnv.mpkKernel =
clCreateKernel(kEnv.mpkProgram, "kernel_RGBToGray", &clStatus);
CHECK_OPENCL(clStatus, "clCreateKernel kernel_RGBToGray");
/* set kernel arguments */
clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), &srcBuffer);
CHECK_OPENCL(clStatus, "clSetKernelArg srcBuffer");
clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), &dstBuffer);
CHECK_OPENCL(clStatus, "clSetKernelArg dstBuffer");
clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), &srcWPL);
CHECK_OPENCL(clStatus, "clSetKernelArg srcWPL");
clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(int), &dstWPL);
CHECK_OPENCL(clStatus, "clSetKernelArg dstWPL");
clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(int), &h);
CHECK_OPENCL(clStatus, "clSetKernelArg height");
clStatus = clSetKernelArg(kEnv.mpkKernel, 5, sizeof(int), &w);
CHECK_OPENCL(clStatus, "clSetKernelArg width");
clStatus = clSetKernelArg(kEnv.mpkKernel, 6, sizeof(float), &rwt);
CHECK_OPENCL(clStatus, "clSetKernelArg rwt");
clStatus = clSetKernelArg(kEnv.mpkKernel, 7, sizeof(float), &gwt);
CHECK_OPENCL(clStatus, "clSetKernelArg gwt");
clStatus = clSetKernelArg(kEnv.mpkKernel, 8, sizeof(float), &bwt);
CHECK_OPENCL(clStatus, "clSetKernelArg bwt");
/* launch kernel & wait */
PERF_COUNT_SUB("before")
clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
nullptr, global_work_size, local_work_size,
0, nullptr, nullptr);
CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray");
clFinish(kEnv.mpkCmdQueue);
PERF_COUNT_SUB("kernel")
/* map results back from gpu */
void *ptr =
clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0,
dstSize, 0, nullptr, nullptr, &clStatus);
CHECK_OPENCL(clStatus, "clEnqueueMapBuffer dstBuffer");
clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, nullptr,
nullptr);
#if 0
// validate: compute on cpu
Pix *cpuPix = pixCreate(w, h, 8);
pixCopyResolution(cpuPix, srcPix);
unsigned int *cpuData = pixGetData(cpuPix);
int cpuWPL = pixGetWpl(cpuPix);
unsigned int *cpuLine, *srcLine;
int i, j;
for (i = 0, srcLine = srcData, cpuLine = cpuData; i < h; i++) {
for (j = 0; j < w; j++) {
unsigned int word = *(srcLine + j);
int val = (l_int32)(rwt * ((word >> L_RED_SHIFT) & 0xff) +
gwt * ((word >> L_GREEN_SHIFT) & 0xff) +
bwt * ((word >> L_BLUE_SHIFT) & 0xff) + 0.5);
SET_DATA_BYTE(cpuLine, j, val);
}
srcLine += srcWPL;
cpuLine += cpuWPL;
}
// validate: compare
printf("converted 32-bit -> 8-bit image\n");
for (int row = 0; row < h; row++) {
for (int col = 0; col < w; col++) {
int idx = row*w + col;
unsigned int srcVal = srcData[idx];
unsigned char cpuVal = ((unsigned char *)cpuData)[idx];
unsigned char oclVal = ((unsigned char *)dstData)[idx];
if (srcVal > 0) {
printf("%4i,%4i: %u, %u, %u\n", row, col, srcVal, cpuVal, oclVal);
}
}
//printf("\n");
}
#endif
// release opencl objects
clReleaseMemObject(srcBuffer);
clReleaseMemObject(dstBuffer);
PERF_COUNT_END
// success
return dstPix;
}
#endif
......@@ -313,16 +313,10 @@ public:
int rect_height, int rect_width,
int rect_top, int rect_left);
static Pix *pixConvertRGBToGrayOCL(Pix *pix, float weightRed = 0.3,
float weightGreen = 0.5,
float weightBlue = 0.2);
static ds_device getDeviceSelection();
static ds_device selectedDevice;
static bool deviceIsSelected;
static bool selectedDeviceIsOpenCL();
static bool selectedDeviceIsNativeCPU();
};
#endif // USE_OPENCL
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册