提交 6b3a0ebe 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #1061 from dolphin8/opencl

fix reshape & relu & softmax
...@@ -28,11 +28,13 @@ template <> ...@@ -28,11 +28,13 @@ template <>
void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL> &param) { void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
const auto* input = param.InputX(); const auto* input = param.InputX();
auto* output = parma.Out(); auto* output = param.Out();
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
clSetKernelArg((kernel, 0, sizeof(cl_mem), &input.getCLImage()); auto inputImage = input->GetCLImage();
clSetKernelArg((kernel, 1, sizeof(cl_mem), &output.getCLImage()); auto outputImage = output->GetCLImage();
int work_size[2] = { input.ImageWidth(), input.ImageHeight() }; clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = { input->ImageWidth(), input->ImageHeight() };
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
work_size, NULL, 0, NULL, NULL); work_size, NULL, 0, NULL, NULL);
} }
......
...@@ -19,11 +19,37 @@ namespace operators { ...@@ -19,11 +19,37 @@ namespace operators {
template <> template <>
bool ReshapeKernel<GPU_CL, float>::Init(ReshapeParam<GPU_CL> *param) { bool ReshapeKernel<GPU_CL, float>::Init(ReshapeParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("reshape", "reshape.cl");
return true; return true;
} }
template <> template <>
void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {} void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
const auto * input = param.InputX();
auto * output = param.Out();
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const auto & inputDim = input->dims();
const auto & outputDim = output->dims();
int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]};
int odims[4] = {outputDim[0], outputDim[1], outputDim[2], outputDim[3]};
clSetKernelArg(kernel, 2, sizeof(int), dims);
clSetKernelArg(kernel, 3, sizeof(int), dims+1);
clSetKernelArg(kernel, 4, sizeof(int), dims+2);
clSetKernelArg(kernel, 5, sizeof(int), dims+3);
clSetKernelArg(kernel, 6, sizeof(int), odims);
clSetKernelArg(kernel, 7, sizeof(int), odims+1);
clSetKernelArg(kernel, 8, sizeof(int), odims+2);
clSetKernelArg(kernel, 9, sizeof(int), odims+3);
const size_t work_size[2] = { output->ImageWidth(), output->ImageHeight() };
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
work_size, NULL, 0, NULL, NULL);
}
template class ReshapeKernel<GPU_CL, float>; template class ReshapeKernel<GPU_CL, float>;
......
...@@ -29,11 +29,13 @@ template <> ...@@ -29,11 +29,13 @@ template <>
void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) { void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out())); auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out()));
auto & input = param.InputX(); const auto * input = param.InputX();
auto & output = param.Out(); auto * output = param.Out();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &input.getCLImage()); auto inputImage = input->GetCLImage();
clSetKernelArg(kernel, 1, sizeof(cl_mem), &output.getCLImage()); auto outputImage = output->GetCLImage();
const auto & inputDim = input.dims(); clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const auto & inputDim = input->dims();
int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]}; int dims[4] = {inputDim[0], inputDim[1], inputDim[2], inputDim[3]};
clSetKernelArg(kernel, 2, sizeof(int), dims); clSetKernelArg(kernel, 2, sizeof(int), dims);
clSetKernelArg(kernel, 3, sizeof(int), dims+1); clSetKernelArg(kernel, 3, sizeof(int), dims+1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册