elementwise_add_kernel.cpp 3.9 KB
Newer Older
Y
yangfei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifdef ELEMENTWISEADD_OP

#include "operators/kernel/elementwise_add_kernel.h"

namespace paddle_mobile {
20
namespace operators {
Y
yangfei 已提交
21

22 23 24
template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init(
    ElementwiseAddParam<GPU_CL> *param) {
L
liuruilong 已提交
25
  DLOG << "-----init add-----";
L
liuruilong 已提交
26
  CLImage *bias = (CLImage *)(param->InputY());
Y
yangfei 已提交
27 28 29 30 31
  if (!bias->isInit()) {
    bias->InitCLImage(cl_helper_.CLContext(),
                      this->cl_helper_.CLCommandQueue());
  }

L
liuruilong 已提交
32
  DLOG << " bias: " << *bias;
L
liuruilong 已提交
33 34 35 36 37 38 39
  if (bias->dims().size() == 4) {
    this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
  } else if (param->InputY()->dims().size() == 1) {
    this->cl_helper_.AddKernel("channel_add", "channel_add_kernel.cl");
  } else {
    DLOG << "error:bias dims is error";
  }
Y
yangfei 已提交
40

41 42
  return true;
}
Y
yangfei 已提交
43

44 45
template <>
void ElementwiseAddKernel<GPU_CL, float>::Compute(
Y
yangfei 已提交
46 47 48 49 50 51
    const ElementwiseAddParam<GPU_CL> &param) {
  auto input = param.InputX();
  auto bias = param.InputY();
  auto output = param.Out();
  cl_int status;
  auto kernel = this->cl_helper_.KernelAt(0);
L
liuruilong 已提交
52
  if (bias->dims().size() == 4) {
Y
yangfei 已提交
53 54 55
    cl_mem input_image = input->GetCLImage();
    cl_mem bias_image = bias->GetCLImage();
    cl_mem output_image = output->GetCLImage();
L
liuruilong 已提交
56 57
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
                            reinterpret_cast<void *>(&input_image));
Y
yangfei 已提交
58
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
59 60
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
                            reinterpret_cast<void *>(&bias_image));
Y
yangfei 已提交
61
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
62 63
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
                            reinterpret_cast<void *>(&output_image));
Y
yangfei 已提交
64 65 66 67
    CL_CHECK_ERRORS(status);
    int width = input->ImageWidth();
    int height = input->ImageHeight();
    size_t global_work_size[2] = {width, height};
L
liuruilong 已提交
68 69 70
    status =
        clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
                               NULL, global_work_size, NULL, 0, NULL, NULL);
Y
yangfei 已提交
71
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
72
  } else if (bias->dims().size() == 1) {
Y
yangfei 已提交
73 74 75
    cl_mem input_image = input->GetCLImage();
    cl_mem bias_image = bias->GetCLImage();
    cl_mem output_image = output->GetCLImage();
Y
yangfei 已提交
76
    int tensor_w = input->dims()[3];
L
liuruilong 已提交
77 78
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem),
                            reinterpret_cast<void *>(&input_image));
Y
yangfei 已提交
79
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
80 81
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem),
                            reinterpret_cast<void *>(&bias_image));
Y
yangfei 已提交
82
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
83 84
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem),
                            reinterpret_cast<void *>(&output_image));
Y
yangfei 已提交
85
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
86 87
    status = clSetKernelArg(kernel, 3, sizeof(cl_int),
                            reinterpret_cast<void *>(&tensor_w));
Y
yangfei 已提交
88 89 90 91
    CL_CHECK_ERRORS(status);
    int width = input->ImageWidth();
    int height = input->ImageHeight();
    size_t global_work_size[2] = {width, height};
Y
yangfei 已提交
92 93
    cl_event out_event = param.Out()->GetClEvent();
    cl_event wait_event = param.InputX()->GetClEvent();
L
liuruilong 已提交
94 95
    status =
        clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
L
liuruilong 已提交
96
                               NULL, global_work_size, NULL, 0, NULL, NULL);
Y
yangfei 已提交
97
    CL_CHECK_ERRORS(status);
L
liuruilong 已提交
98
  } else {
Y
yangfei 已提交
99 100 101
    DLOG << "error:bias dims is error";
  }
}
Y
yangfei 已提交
102

103
template class ElementwiseAddKernel<GPU_CL, float>;
Y
yangfei 已提交
104

105
}  // namespace operators
Y
yangfei 已提交
106 107 108
}  // namespace paddle_mobile

#endif