提交 15437eda 编写于 作者: J Jiaying Zhao 提交者: GitHub

Merge pull request #1126 from smilejames/opencl

update fetch_kernel with CLTensor
...@@ -13,51 +13,52 @@ See the License for the specific language governing permissions and ...@@ -13,51 +13,52 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/kernel/fetch_kernel.h" #include "operators/kernel/fetch_kernel.h"
#include "framework/cl/cl_tensor.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool FetchKernel<GPU_CL, float>::Init(FetchParam<GPU_CL> *param) { bool FetchKernel<GPU_CL, float>::Init(FetchParam<GPU_CL> *param) {
// this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl"); this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl");
return true; return true;
} }
template <> template <>
void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) { void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
// auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
// auto default_work_size = auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX());
// this->cl_helper_.DefaultWorkSize(*param.InputX());
// auto input = param.InputX()->GetCLImage();
// auto input = param.InputX()->GetCLImage(); auto *out = param.Out();
// auto *out = param.Out();
// const auto &dims = param.InputX()->dims();
// const auto &dims = param.InputX()->dims(); const int N = dims[0];
// const int N = dims[0]; const int C = dims[1];
// const int C = dims[1]; const int in_height = dims[2];
// const int in_height = dims[2]; const int in_width = dims[3];
// const int in_width = dims[3];
// int size_ch = in_height * in_width;
// int size_ch = in_height * in_width; int size_block = size_ch * 4;
// int size_block = size_ch * 4; int size_batch = size_ch * C;
// int size_batch = size_ch * C;
// CLTensor out_cl_tensor(this->cl_helper_.CLContext(),
// // need create outputBuffer this->cl_helper_.CLCommandQueue());
// cl_image_format imageFormat; out_cl_tensor.Resize(out->dims());
// imageFormat.image_channel_order = CL_RGBA; cl_mem outBuffer = out_cl_tensor.mutable_data<float>();
// imageFormat.image_channel_data_type = CL_FLOAT;
// cl_mem outputBuffer; clSetKernelArg(kernel, 0, sizeof(int), &in_height);
// clSetKernelArg(kernel, 1, sizeof(int), &in_width);
// clSetKernelArg(kernel, 0, sizeof(int), &in_height); clSetKernelArg(kernel, 2, sizeof(int), &size_ch);
// clSetKernelArg(kernel, 1, sizeof(int), &in_width); clSetKernelArg(kernel, 3, sizeof(int), &size_block);
// clSetKernelArg(kernel, 2, sizeof(int), &size_ch); clSetKernelArg(kernel, 4, sizeof(int), &size_batch);
// clSetKernelArg(kernel, 3, sizeof(int), &size_block); clSetKernelArg(kernel, 5, sizeof(cl_mem), &input);
// clSetKernelArg(kernel, 4, sizeof(int), &size_batch); clSetKernelArg(kernel, 6, sizeof(cl_mem), &outBuffer);
// clSetKernelArg(kernel, 5, sizeof(cl_mem), &input);
// clSetKernelArg(kernel, 6, sizeof(cl_mem), &outputBuffer); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
// default_work_size.data(), NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
// default_work_size.data(), NULL, 0, NULL, NULL); memcpy(out->data(), out_cl_tensor.Data(), out->memory_size());
} }
template class FetchKernel<GPU_CL, float>; template class FetchKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册