From cec396b34cab3b2402959e3cbefdfb4c50dcb9c3 Mon Sep 17 00:00:00 2001 From: yangfei Date: Wed, 17 Oct 2018 19:27:28 +0800 Subject: [PATCH] imp CLImage printer function --- src/framework/cl/cl_image.cpp | 113 ++++++++++++------ src/framework/cl/cl_image.h | 25 +++- .../kernel/cl/cl_kernel/channel_add_kernel.cl | 2 +- src/operators/op_param.h | 13 +- test/net/test_mobilenet_GPU.cpp | 2 +- 5 files changed, 106 insertions(+), 49 deletions(-) diff --git a/src/framework/cl/cl_image.cpp b/src/framework/cl/cl_image.cpp index a999971192..fd2cf760cd 100644 --- a/src/framework/cl/cl_image.cpp +++ b/src/framework/cl/cl_image.cpp @@ -124,51 +124,88 @@ Print &operator<<(Print &printer, const CLImage &cl_image) { stride = stride > 0 ? stride : 1; float *data = new float[cl_image.numel()]; DDim ddim = cl_image.dims(); - size_t N, C, H, W; - if (ddim.size() == 4) { - N = ddim[0]; - if (N < 0) { + size_t N, C, H, W, width, height; + if (cl_image.GetImageType() == Normal) { + if (ddim.size() == 4) { + N = ddim[0]; + if (N < 0) { + N = 1; + } + C = ddim[1]; + H = ddim[2]; + W = ddim[3]; + width = W * ((C + 3) / 4); + height = N * H; + } else if (ddim.size() == 2) { + width = ddim[1]; + height = ddim[0]; + N = 1; + C = 1; + H = ddim[0]; + W = ddim[1]; + } else if (ddim.size() == 1) { + width = ddim[0]; + height = 1; N = 1; + C = 1; + H = 1; + W = ddim[0]; + } + float *p = data; + half imageData[width * height * 4]; + cl_int err; + cl_mem image = cl_image.GetCLImage(); + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin, + region, 0, 0, imageData, 0, NULL, NULL); + size_t i0 = 0; + for (int n = 0; n < N; n++) { + for (int c = 0; c < C; c++) { + size_t i1 = i0 + (c / 4) * W; + for (int h = 0; h < H; h++) { + size_t i2 = (i1 << 2) + c % 4; + for (int w = 0; w < W; w++) { + *p = Half2Float(imageData[i2]); + i2 += 4; + p++; + } + i1 += width; + } + } + i0 += width * H; } - C = ddim[1]; - H = ddim[2]; - W = ddim[3]; - } else if (ddim.size() == 1) { - N = 1; - C = ddim[0]; - H = 1; - W = 1; - } - size_t width = W * ((C + 3) / 4); - size_t height = H * N; + CL_CHECK_ERRORS(err); + } else { + if (ddim.size() == 2) { + width = (ddim[1] + 3) / 4; + height = ddim[0]; + H = ddim[0]; + W = ddim[1]; - float *p = data; - half imageData[width * height * 4]; - cl_int err; - cl_mem image = cl_image.GetCLImage(); - size_t origin[3] = {0, 0, 0}; - size_t region[3] = {width, height, 1}; - err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin, - region, 0, 0, imageData, 0, NULL, NULL); - size_t i0 = 0; - for (int n = 0; n < N; n++) { - for (int c = 0; c < C; c++) { - size_t i1 = i0; - for (int h = 0; h < H; h++) { - size_t i2 = (i1 << 2) + c % 4; - for (int w = 0; w < W; w++) { - *p = Half2Float(imageData[i2]); - i2 += 4; - p++; - } - i1 += width; + } else if (ddim.size() == 1) { + width = (ddim[0] + 3) / 4; + height = 1; + H = 1; + W = ddim[0]; + } + float *p = data; + half imageData[width * height * 4]; + cl_int err; + cl_mem image = cl_image.GetCLImage(); + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {width, height, 1}; + err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin, + region, 0, 0, imageData, 0, NULL, NULL); + for (int h = 0; h < H; h++) { + for (int w = 0; w < W; w++) { + p[h * W + w] = Half2Float(imageData[(h * width + w / 4) * 4 + (w % 4)]); } } - i0 += width * H; - } - CL_CHECK_ERRORS(err); + CL_CHECK_ERRORS(err); + } for (int i = 0; i < cl_image.numel(); i += stride) { printer << data[i] << " "; diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index 4ee64d77c1..de2578e7cb 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -26,6 +26,8 @@ limitations under the License. */ namespace paddle_mobile { namespace framework { +enum ImageType { Normal, Folder }; + class CLImage { public: CLImage() = default; @@ -60,6 +62,19 @@ class CLImage { initialized_ = true; } + /* + * need call SetTensorData first + * */ + void InitCLImageNormal(cl_context context, cl_command_queue command_queue) { + if (tensor_data_ == nullptr) { + PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first"); + } + InitCLImage(context, command_queue, tensor_data_, tensor_dims_); + delete[](tensor_data_); + tensor_data_ = nullptr; + initialized_ = true; + } + void InitEmptyImage(cl_context context, cl_command_queue command_queue, const DDim &dim) { if (tensor_data_ != nullptr) { @@ -124,9 +139,13 @@ class CLImage { * */ const DDim &dims() const { return tensor_dims_; } + const ImageType GetImageType() const { type; } + private: + ImageType type; void InitCLImage2C(cl_context context, cl_command_queue command_queue, float *tensor_data, const DDim &dim) { + type = Folder; command_queue_ = command_queue; assert(dim.size() <= 2); int tdim[2] = {1, 1}; @@ -136,7 +155,7 @@ class CLImage { tdim[0] = dim[0]; tdim[1] = dim[1]; } - int width = tdim[1] + 3 / 4; + int width = (tdim[1] + 3) / 4; int height = tdim[0]; std::unique_ptr imageData{}; if (tensor_data) { @@ -181,6 +200,7 @@ class CLImage { } void InitCLImage(cl_context context, cl_command_queue command_queue, float *tensor_data, const DDim &dim) { + type = Normal; DLOG << " tensor dim: " << dim; // NCHW -> [W * (C+3)/4, H * N] tensor_dims_ = dim; @@ -224,7 +244,8 @@ class CLImage { for (int h = 0; h < H; h++) { size_t i2 = (i1 << 2) + c % 4; for (int w = 0; w < W; w++) { - // int x = (n * width * H + h * width + (c / 4) * W + w) * 4 + (c % 4); + // int x = (n * width * H + h * width + (c / 4) * W + w) * 4 + (c + // % 4); imageData[i2] = Float2Half(*p); i2 += 4; p++; diff --git a/src/operators/kernel/cl/cl_kernel/channel_add_kernel.cl b/src/operators/kernel/cl/cl_kernel/channel_add_kernel.cl index f3065844f8..54835ab897 100644 --- a/src/operators/kernel/cl/cl_kernel/channel_add_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/channel_add_kernel.cl @@ -21,7 +21,7 @@ __kernel void channel_add(__global image2d_t input, __global image2d_t bias,__wr coords.y = y; int2 coords_bias; coords_bias.x = x/w; - coords_bias.y = 1; + coords_bias.y = 0; half4 in = read_imageh(input, sampler, coords); half4 biase = read_imageh(bias, sampler, coords_bias); half4 output = in + biase; diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 04c4499f0a..9873721230 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1235,7 +1235,7 @@ class ReluParamBase : public OpParam { public: ReluParamBase(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, const Scope &scope) { + const AttributeMap &attrs, const Scope &scope) { input_x_ = InputXFrom(inputs, scope); out_ = OutFrom(outputs, scope); } @@ -1251,18 +1251,17 @@ class ReluParamBase : public OpParam { template class ReluParam : public ReluParamBase { -public: + public: using ReluParamBase::ReluParamBase; }; template <> class ReluParam : public ReluParamBase { -public: + public: using ReluParamBase::ReluParamBase; - framework::CLImage& getMidImage() { - return midImage; - } -private: + framework::CLImage &getMidImage() { return midImage; } + + private: framework::CLImage midImage; }; diff --git a/test/net/test_mobilenet_GPU.cpp b/test/net/test_mobilenet_GPU.cpp index a5a78f7f8f..f69334daf2 100644 --- a/test/net/test_mobilenet_GPU.cpp +++ b/test/net/test_mobilenet_GPU.cpp @@ -23,7 +23,7 @@ int main() { // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", // std::string(g_mobilenet_detect) + "/params", true); - auto isok = paddle_mobile.Load(g_mobilenet, true); + auto isok = paddle_mobile.Load(g_mobilenet, false); if (isok) { auto time2 = paddle_mobile::time(); std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms" -- GitLab