提交 b629d3a5 编写于 作者: D dolphin8

Merge branch 'opencl' of https://github.com/PaddlePaddle/paddle-mobile into opencl

......@@ -487,13 +487,13 @@ static const uint8_t shifttable[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
half_t float2half(float f) {
half_t Float2Half(float f) {
uint32_t v = *reinterpret_cast<uint32_t *>(&f);
return basetable[(v >> 23) & 0x1ff] +
((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
}
float half2float(half_t h) {
float Half2Float(half_t h) {
uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
exponenttable[h >> 10];
return *reinterpret_cast<float *>(&v);
......@@ -501,12 +501,12 @@ float half2float(half_t h) {
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
for (int i = 0; i < count; ++i) {
h_array[i] = float2half(f_array[i]);
h_array[i] = Float2Half(f_array[i]);
}
}
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
for (int i = 0; i < count; ++i) {
f_array[i] = float2half(h_array[i]);
f_array[i] = Half2Float(h_array[i]);
}
}
......@@ -17,9 +17,9 @@ limitations under the License. */
typedef uint16_t half_t;
half_t float2half(float f);
half_t Float2Half(float f);
float half2float(half_t h);
float Half2Float(half_t h);
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);
......
......@@ -52,7 +52,7 @@ void CLImageToTensor(CLImage *cl_image, Tensor *tensor,
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
*p = half2float(imageData[i2]);
*p = Half2Float(imageData[i2]);
i2 += 4;
p++;
}
......@@ -106,7 +106,7 @@ void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
imageData[i2] = float2half(*p);
imageData[i2] = Float2Half(*p);
i2 += 4;
p++;
}
......
......@@ -177,7 +177,7 @@ class CLImage {
}
assert(i2 < width * height * 4);
imageData[i2] = float2half(*p);
imageData[i2] = Float2Half(*p);
i2 += 4;
p++;
// count++;
......@@ -219,7 +219,6 @@ class CLImage {
&err);
if (err != CL_SUCCESS) {
// TODO(HaiPeng): error handling
CL_CHECK_ERRORS(err);
PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
}
......
......@@ -29,23 +29,39 @@ template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
cl_int status;
DLOG << " feed 0";
auto output = param.Out();
DLOG << " feed 1";
const Tensor *input = param.InputX();
DLOG << " feed 2";
const float *input_data = nullptr;
DLOG << " feed 3";
input_data = input->data<float>();
DLOG << " feed 4";
cl_mem cl_image = output->GetCLImage();
DLOG << " feed 5";
int height = output->dims()[2];
int width = output->dims()[3];
DLOG << output->dims();
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &height);
CL_CHECK_ERRORS(status);
size_t global_work_size[2] = {height, width};
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
global_work_size, NULL, 0, NULL, NULL);
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class FeedKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册