未验证 提交 c7802c25 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #1061 from dolphin8/opencl

fix reshape & relu & softmax
......@@ -28,11 +28,13 @@ template <>
void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
const auto* input = param.InputX();
auto* output = parma.Out();
auto* output = param.Out();
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
clSetKernelArg((kernel, 0, sizeof(cl_mem), &input.getCLImage());
clSetKernelArg((kernel, 1, sizeof(cl_mem), &output.getCLImage());
int work_size[2] = { input.ImageWidth(), input.ImageHeight() };
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = { input->ImageWidth(), input->ImageHeight() };
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
work_size, NULL, 0, NULL, NULL);
}
......
......@@ -19,11 +19,37 @@ namespace operators {
template <>
bool ReshapeKernel<GPU_CL, float>::Init(ReshapeParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("reshape", "reshape.cl");
return true;
}
template <>
void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {}
void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
const auto * input = param.InputX();
auto * output = param.Out();
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const auto & inputDim = input->dims();
const auto & outputDim = output->dims();
int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]};
int odims[4] = {outputDim[0], outputDim[1], outputDim[2], outputDim[3]};
clSetKernelArg(kernel, 2, sizeof(int), dims);
clSetKernelArg(kernel, 3, sizeof(int), dims+1);
clSetKernelArg(kernel, 4, sizeof(int), dims+2);
clSetKernelArg(kernel, 5, sizeof(int), dims+3);
clSetKernelArg(kernel, 6, sizeof(int), odims);
clSetKernelArg(kernel, 7, sizeof(int), odims+1);
clSetKernelArg(kernel, 8, sizeof(int), odims+2);
clSetKernelArg(kernel, 9, sizeof(int), odims+3);
const size_t work_size[2] = { output->ImageWidth(), output->ImageHeight() };
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
work_size, NULL, 0, NULL, NULL);
}
template class ReshapeKernel<GPU_CL, float>;
......
......@@ -29,11 +29,13 @@ template <>
void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out()));
auto & input = param.InputX();
auto & output = param.Out();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &input.getCLImage());
clSetKernelArg(kernel, 1, sizeof(cl_mem), &output.getCLImage());
const auto & inputDim = input.dims();
const auto * input = param.InputX();
auto * output = param.Out();
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const auto & inputDim = input->dims();
int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]};
clSetKernelArg(kernel, 2, sizeof(int), dims);
clSetKernelArg(kernel, 3, sizeof(int), dims+1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册