提交 7fb2e727 编写于 作者: Y yangfei

imp some function

上级 af0be6e0
......@@ -36,13 +36,13 @@ class CLTensor : TensorBase {
return *this;
}
// template<typename T>
// inline T *mutable_with_data(void *data) {
// int64_t size = numel() * sizeof(float);
// holder_.reset(new PlaceholderImpl(size, data, typeid(T)));
// return reinterpret_cast<T *>(reinterpret_cast<void *>(
// reinterpret_cast<uintptr_t>(holder_->ptr())));
// }
template <typename T>
inline T *mutable_with_data(void *data) {
int64_t size = numel() * sizeof(float);
holder_.reset(new PlaceholderImpl(size, data, typeid(T), context_));
return reinterpret_cast<T *>(
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(holder_->ptr())));
}
inline void *mutable_data(std::type_index type) {
if (holder_ != nullptr) {
......@@ -51,7 +51,7 @@ class CLTensor : TensorBase {
PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.")
int64_t size = numel() * SizeOfType(type);
if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type));
holder_.reset(new PlaceholderImpl(size, type, context_));
offset_ = 0;
}
return reinterpret_cast<void *>(
......
__kernel void feed(__global float* in, __write_only image2d_t outputImage,int h,int w)
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void feed(__global float *in, __write_only image2d_t outputImage,int h,int w)
{
int j = get_global_id(0);
int i = get_global_id(1);
float4 pixel;
pixel.x = in[(i * w + j)];
pixel.y = in[h * w + (i * w + j)];
pixel.z = in[2 * h * w + (i * w + j)];
pixel.w = 0;
int2 coords;
coords.x = j;
coords.y = i;
int j = get_global_id(0);
int i = get_global_id(1);
half4 pixel;
pixel.x = convert_half(in[(i * w + j)]);
pixel.y = convert_half(in[h * w + (i * w + j)]);
pixel.z = convert_half(in[2 * h * w + (i * w + j)]);
pixel.w = 0.0;
int2 coords;
coords.x = j;
coords.y = i;
write_imagef(outputImage,coords,pixel);
write_imageh(outputImage,coords,pixel);
}
......@@ -13,8 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
#include "common/log.h"
#include "framework/cl/cl_tensor.h"
namespace paddle_mobile {
namespace operators {
......@@ -29,39 +28,46 @@ template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
cl_int status;
DLOG << " feed 0";
auto output = param.Out();
DLOG << " feed 1";
const Tensor *input = param.InputX();
DLOG << " feed 2";
const float *input_data = nullptr;
DLOG << " feed 3";
input_data = input->data<float>();
DLOG << " feed 4";
const float *input_data = input->data<float>();
int numel = input->numel();
DLOG << "numel = " << numel;
cl_mem cl_image = output->GetCLImage();
DLOG << " feed 5";
int height = output->dims()[2];
int width = output->dims()[3];
DLOG << output->dims();
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_data);
CL_CHECK_ERRORS(status);
CLTensor input_cl_tensor(this->cl_helper_.CLContext());
input_cl_tensor.Resize(input->dims());
cl_mem *inputBuffer =
input_cl_tensor.mutable_with_data<cl_mem>((void *)input_data);
DLOG << "yangfei";
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_image);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &width);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&cl_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &height);
status = clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&height);
CL_CHECK_ERRORS(status);
size_t global_work_size[2] = {height, width};
DLOG << "yangfei";
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
int len = 4 * 224 * 224;
half *out = new half[len];
DLOG << "yangfei";
cl_command_queue commandQueue = this->cl_helper_.CLCommandQueue();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {height, width, 1};
clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
0, NULL, NULL);
DLOG << "yangfei";
for (int i = 0; i < 100; i++) DLOG << out[i];
}
template class FeedKernel<GPU_CL, float>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册