提交 cec396b3 编写于 作者: Y yangfei

imp CLImage printer function

上级 e9c54cbb
......@@ -124,51 +124,88 @@ Print &operator<<(Print &printer, const CLImage &cl_image) {
stride = stride > 0 ? stride : 1;
float *data = new float[cl_image.numel()];
DDim ddim = cl_image.dims();
size_t N, C, H, W;
if (ddim.size() == 4) {
N = ddim[0];
if (N < 0) {
size_t N, C, H, W, width, height;
if (cl_image.GetImageType() == Normal) {
if (ddim.size() == 4) {
N = ddim[0];
if (N < 0) {
N = 1;
}
C = ddim[1];
H = ddim[2];
W = ddim[3];
width = W * ((C + 3) / 4);
height = N * H;
} else if (ddim.size() == 2) {
width = ddim[1];
height = ddim[0];
N = 1;
C = 1;
H = ddim[0];
W = ddim[1];
} else if (ddim.size() == 1) {
width = ddim[0];
height = 1;
N = 1;
C = 1;
H = 1;
W = ddim[0];
}
float *p = data;
half imageData[width * height * 4];
cl_int err;
cl_mem image = cl_image.GetCLImage();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
region, 0, 0, imageData, 0, NULL, NULL);
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
*p = Half2Float(imageData[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
C = ddim[1];
H = ddim[2];
W = ddim[3];
} else if (ddim.size() == 1) {
N = 1;
C = ddim[0];
H = 1;
W = 1;
}
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
CL_CHECK_ERRORS(err);
} else {
if (ddim.size() == 2) {
width = (ddim[1] + 3) / 4;
height = ddim[0];
H = ddim[0];
W = ddim[1];
float *p = data;
half imageData[width * height * 4];
cl_int err;
cl_mem image = cl_image.GetCLImage();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
region, 0, 0, imageData, 0, NULL, NULL);
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
*p = Half2Float(imageData[i2]);
i2 += 4;
p++;
}
i1 += width;
} else if (ddim.size() == 1) {
width = (ddim[0] + 3) / 4;
height = 1;
H = 1;
W = ddim[0];
}
float *p = data;
half imageData[width * height * 4];
cl_int err;
cl_mem image = cl_image.GetCLImage();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
region, 0, 0, imageData, 0, NULL, NULL);
for (int h = 0; h < H; h++) {
for (int w = 0; w < W; w++) {
p[h * W + w] = Half2Float(imageData[(h * width + w / 4) * 4 + (w % 4)]);
}
}
i0 += width * H;
}
CL_CHECK_ERRORS(err);
CL_CHECK_ERRORS(err);
}
for (int i = 0; i < cl_image.numel(); i += stride) {
printer << data[i] << " ";
......
......@@ -26,6 +26,8 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
enum ImageType { Normal, Folder };
class CLImage {
public:
CLImage() = default;
......@@ -60,6 +62,19 @@ class CLImage {
initialized_ = true;
}
/*
* need call SetTensorData first
* */
void InitCLImageNormal(cl_context context, cl_command_queue command_queue) {
if (tensor_data_ == nullptr) {
PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
}
InitCLImage(context, command_queue, tensor_data_, tensor_dims_);
delete[](tensor_data_);
tensor_data_ = nullptr;
initialized_ = true;
}
void InitEmptyImage(cl_context context, cl_command_queue command_queue,
const DDim &dim) {
if (tensor_data_ != nullptr) {
......@@ -124,9 +139,13 @@ class CLImage {
* */
const DDim &dims() const { return tensor_dims_; }
const ImageType GetImageType() const { type; }
private:
ImageType type;
void InitCLImage2C(cl_context context, cl_command_queue command_queue,
float *tensor_data, const DDim &dim) {
type = Folder;
command_queue_ = command_queue;
assert(dim.size() <= 2);
int tdim[2] = {1, 1};
......@@ -136,7 +155,7 @@ class CLImage {
tdim[0] = dim[0];
tdim[1] = dim[1];
}
int width = tdim[1] + 3 / 4;
int width = (tdim[1] + 3) / 4;
int height = tdim[0];
std::unique_ptr<half_t[]> imageData{};
if (tensor_data) {
......@@ -181,6 +200,7 @@ class CLImage {
}
void InitCLImage(cl_context context, cl_command_queue command_queue,
float *tensor_data, const DDim &dim) {
type = Normal;
DLOG << " tensor dim: " << dim;
// NCHW -> [W * (C+3)/4, H * N]
tensor_dims_ = dim;
......@@ -224,7 +244,8 @@ class CLImage {
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
// int x = (n * width * H + h * width + (c / 4) * W + w) * 4 + (c % 4);
// int x = (n * width * H + h * width + (c / 4) * W + w) * 4 + (c
// % 4);
imageData[i2] = Float2Half(*p);
i2 += 4;
p++;
......
......@@ -21,7 +21,7 @@ __kernel void channel_add(__global image2d_t input, __global image2d_t bias,__wr
coords.y = y;
int2 coords_bias;
coords_bias.x = x/w;
coords_bias.y = 1;
coords_bias.y = 0;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords_bias);
half4 output = in + biase;
......
......@@ -1235,7 +1235,7 @@ class ReluParamBase : public OpParam {
public:
ReluParamBase(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
}
......@@ -1251,18 +1251,17 @@ class ReluParamBase : public OpParam {
template <typename Dtype>
class ReluParam : public ReluParamBase<Dtype> {
public:
public:
using ReluParamBase<Dtype>::ReluParamBase;
};
template <>
class ReluParam<GPU_CL> : public ReluParamBase<GPU_CL> {
public:
public:
using ReluParamBase<GPU_CL>::ReluParamBase;
framework::CLImage& getMidImage() {
return midImage;
}
private:
framework::CLImage &getMidImage() { return midImage; }
private:
framework::CLImage midImage;
};
......
......@@ -23,7 +23,7 @@ int main() {
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(g_mobilenet, true);
auto isok = paddle_mobile.Load(g_mobilenet, false);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册