提交 e5f6ba21 编写于 作者: Y yangfei

imp feed op

上级 50a35735
...@@ -37,10 +37,10 @@ class CLTensor : TensorBase { ...@@ -37,10 +37,10 @@ class CLTensor : TensorBase {
} }
template <typename T> template <typename T>
inline T *mutable_with_data(void *data) { inline T mutable_with_data(void *data) {
int64_t size = numel() * sizeof(float); int64_t size = numel() * sizeof(float);
holder_.reset(new PlaceholderImpl(size, data, typeid(T), context_)); holder_.reset(new PlaceholderImpl(size, data, typeid(T), context_));
return reinterpret_cast<T *>( return reinterpret_cast<T>(
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(holder_->ptr()))); reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(holder_->ptr())));
} }
......
...@@ -32,16 +32,13 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -32,16 +32,13 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
const Tensor *input = param.InputX(); const Tensor *input = param.InputX();
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
int numel = input->numel(); int numel = input->numel();
DLOG << "numel = " << numel;
cl_mem cl_image = output->GetCLImage(); cl_mem cl_image = output->GetCLImage();
int height = output->dims()[2]; int height = output->dims()[2];
int width = output->dims()[3]; int width = output->dims()[3];
DLOG << output->dims();
CLTensor input_cl_tensor(this->cl_helper_.CLContext()); CLTensor input_cl_tensor(this->cl_helper_.CLContext());
input_cl_tensor.Resize(input->dims()); input_cl_tensor.Resize(input->dims());
cl_mem *inputBuffer = cl_mem inputBuffer =
input_cl_tensor.mutable_with_data<cl_mem>((void *)input_data); input_cl_tensor.mutable_with_data<cl_mem>((void *)input_data);
DLOG << "yangfei";
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer); status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
...@@ -53,21 +50,18 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { ...@@ -53,21 +50,18 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
size_t global_work_size[2] = {height, width}; size_t global_work_size[2] = {height, width};
DLOG << "yangfei";
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL); NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
int len = 4 * 224 * 224; int len = 4 * 224 * 224;
half *out = new half[len]; half *out = new half[len];
DLOG << "yangfei";
cl_command_queue commandQueue = this->cl_helper_.CLCommandQueue(); cl_command_queue commandQueue = this->cl_helper_.CLCommandQueue();
size_t origin[3] = {0, 0, 0}; size_t origin[3] = {0, 0, 0};
size_t region[3] = {height, width, 1}; size_t region[3] = {height, width, 1};
clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out, clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
0, NULL, NULL); 0, NULL, NULL);
DLOG << "yangfei"; for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
for (int i = 0; i < 100; i++) DLOG << out[i];
} }
template class FeedKernel<GPU_CL, float>; template class FeedKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册