提交 88c0ed3b 编写于 作者: L liuruilong

fix cl image error

上级 ac1c2581
...@@ -101,7 +101,7 @@ class CLImage { ...@@ -101,7 +101,7 @@ class CLImage {
T *data() const { T *data() const {
if (initialized_) { if (initialized_) {
PADDLE_MOBILE_THROW_EXCEPTION( PADDLE_MOBILE_THROW_EXCEPTION(
" cl image has initialized, tensor data has been deleted "); " cl image has initialized, tensor data has been deleted, can't use tensor data");
} }
return reinterpret_cast<T *>(tensor_data_); return reinterpret_cast<T *>(tensor_data_);
} }
...@@ -118,6 +118,7 @@ class CLImage { ...@@ -118,6 +118,7 @@ class CLImage {
private: private:
void InitCLImage(cl_context context, float *tensor_data, const DDim &dim) { void InitCLImage(cl_context context, float *tensor_data, const DDim &dim) {
DLOG << " tensor dim: " << dim;
cl_image_format cf = {.image_channel_order = CL_RGBA, cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT}; .image_channel_data_type = CL_HALF_FLOAT};
// NCHW -> [W * (C+3)/4, H * N] // NCHW -> [W * (C+3)/4, H * N]
...@@ -135,29 +136,23 @@ class CLImage { ...@@ -135,29 +136,23 @@ class CLImage {
tensor_data_[i] = 0; tensor_data_[i] = 0;
} }
} }
size_t N, C, H, W;
if (tensor_dims_.size() == 4) {
N = tensor_dims_[0];
if (N < 0) {
N = 1;
}
C = tensor_dims_[1];
H = tensor_dims_[2];
W = tensor_dims_[3];
width_of_one_block_ = W; size_t new_dims[] = {1, 1, 1, 1};
height_of_one_block_ = H;
} else if (tensor_dims_.size() == 1) { for (int j = 0; j < dim.size(); ++j) {
N = 1; new_dims[4 - dim.size() + j] = dim[j];
C = tensor_dims_[0];
H = 1;
W = 1;
width_of_one_block_ = W;
height_of_one_block_ = H;
} }
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
width_of_one_block_ = W;
height_of_one_block_ = H;
size_t width = W * ((C + 3) / 4); size_t width = W * ((C + 3) / 4);
size_t height = H * N; size_t height = H * N;
...@@ -196,6 +191,8 @@ class CLImage { ...@@ -196,6 +191,8 @@ class CLImage {
} }
} }
cl_int err; cl_int err;
DLOG << " image width: " << width;
DLOG << " image height: " << height;
cl_image_ = clCreateImage2D( cl_image_ = clCreateImage2D(
context, // cl_context context context, // cl_context context
CL_MEM_READ_WRITE | (imageData ? CL_MEM_COPY_HOST_PTR : 0), // cl_mem_flags flags CL_MEM_READ_WRITE | (imageData ? CL_MEM_COPY_HOST_PTR : 0), // cl_mem_flags flags
......
...@@ -60,6 +60,7 @@ void OperatorBase<Dtype>::Run() { ...@@ -60,6 +60,7 @@ void OperatorBase<Dtype>::Run() {
DLOG << " begin run " << type_; DLOG << " begin run " << type_;
RunImpl(); RunImpl();
DLOG << " end run " << type_; DLOG << " end run " << type_;
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
DLOG << "-------------" << type_ << "----------------------------"; DLOG << "-------------" << type_ << "----------------------------";
vector<string> input_keys = GetInputKeys(); vector<string> input_keys = GetInputKeys();
......
...@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,4 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "conv_kernel.inc.cl" //#include "conv_kernel.inc.cl"
__kernel void conv_3x3() {}
\ No newline at end of file
...@@ -42,18 +42,18 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -42,18 +42,18 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param->Filter()->HeightOfOneBlock() == 1) { param->Filter()->HeightOfOneBlock() == 1) {
DLOG << " here1 "; DLOG << " here1 ";
this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("conv_1x1", "conv_kernel.cl");
} else if (param->Filter()->dims()[1] == 1) { } else if (param->Filter()->dims()[1] == 1) {
DLOG << " here2 "; DLOG << " here2 ";
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("depth_conv_3x3", "conv_kernel.cl");
} else if (param->Filter()->WidthOfOneBlock() == 3 && } else if (param->Filter()->WidthOfOneBlock() == 3 &&
param->Filter()->HeightOfOneBlock() == 3) { param->Filter()->HeightOfOneBlock() == 3) {
DLOG << " here3 "; DLOG << " here3 ";
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
...@@ -64,64 +64,64 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { ...@@ -64,64 +64,64 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
template <> template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
DLOG << " Compute helper: " << &cl_helper_; // DLOG << " Compute helper: " << &cl_helper_;
DLOG << " begin compute "; // DLOG << " begin compute ";
auto kernel = this->cl_helper_.KernelAt(0); // auto kernel = this->cl_helper_.KernelAt(0);
DLOG << " get work size "; // DLOG << " get work size ";
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); // auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
DLOG << " end work size "; // DLOG << " end work size ";
int c_block = default_work_size[0]; // int c_block = default_work_size[0];
int w = default_work_size[1]; // int w = default_work_size[1];
int nh = default_work_size[2]; // int nh = default_work_size[2];
auto input = param.Input()->GetCLImage(); // auto input = param.Input()->GetCLImage();
//
DLOG << " get Input "; // DLOG << " get Input ";
//
auto filter = param.Filter()->GetCLImage(); // auto filter = param.Filter()->GetCLImage();
//
DLOG << " get Filter "; // DLOG << " get Filter ";
//
auto output = param.Output(); // auto output = param.Output();
//
DLOG << " get Output "; // DLOG << " get Output ";
//
int stride = param.Strides()[0]; // int stride = param.Strides()[0];
int offset = param.Offset(); // int offset = param.Offset();
int input_c = param.Input()->CBlock(); // int input_c = param.Input()->CBlock();
int dilation = param.Dilations()[0]; // int dilation = param.Dilations()[0];
int input_width = param.Input()->WidthOfOneBlock(); // int input_width = param.Input()->WidthOfOneBlock();
int input_height = param.Input()->HeightOfOneBlock(); // int input_height = param.Input()->HeightOfOneBlock();
//
cl_int status; // cl_int status;
//
DLOG << " begin set kernel arg "; // DLOG << " begin set kernel arg ";
//
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block); // status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w); // status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh); // status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); // status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); // status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); // status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
status = clSetKernelArg(kernel, 6, sizeof(int), &stride); // status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
status = clSetKernelArg(kernel, 7, sizeof(int), &offset); // status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
status = clSetKernelArg(kernel, 8, sizeof(int), &input_c); // status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
status = clSetKernelArg(kernel, 9, sizeof(int), &dilation); // status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width); // status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height); // status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
//
DLOG << " end set kernel arg "; // DLOG << " end set kernel arg ";
//
CL_CHECK_ERRORS(status); // CL_CHECK_ERRORS(status);
//
DLOG << " begin enqueue "; // DLOG << " begin enqueue ";
//
status = // status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL); // default_work_size.data(), NULL, 0, NULL, NULL);
//
DLOG << " end enqueue "; // DLOG << " end enqueue ";
//
CL_CHECK_ERRORS(status); // CL_CHECK_ERRORS(status);
} }
template class ConvKernel<GPU_CL, float>; template class ConvKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册