prior_box_kernel.cpp 7.7 KB
Newer Older
Y
yangfei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifdef PRIORBOX_OP

#include "operators/kernel/prior_box_kernel.h"
18
#include "framework/cl/cl_tensor.h"
Y
yangfei 已提交
19
namespace paddle_mobile {
Y
yangfei 已提交
20
namespace operators {
Y
yangfei 已提交
21

Y
yangfei 已提交
22 23
template <>
bool PriorBoxKernel<GPU_CL, float>::Init(PriorBoxParam<GPU_CL> *param) {
24
  this->cl_helper_.AddKernel("prior_box", "prior_box_kernel.cl");
Y
yangfei 已提交
25 26
  return true;
}
Y
yangfei 已提交
27

Y
yangfei 已提交
28 29
template <>
void PriorBoxKernel<GPU_CL, float>::Compute(
30 31 32 33 34 35 36 37 38 39 40 41
    const PriorBoxParam<GPU_CL> &param) {
  const auto *input_ = param.Input();
  const auto &input_dims = input_->dims();

  const auto &input_image_dims = param.InputImage()->dims();

  const auto &min_sizes = param.MinSizes();
  const auto &max_sizes = param.MaxSizes();
  const auto &variances = param.Variances();
  const auto &input_aspect_ratio = param.AspectRatios();
  const bool &flip = param.Flip();
  const bool &clip = param.Clip();
42 43 44 45
  int isclip = 0;
  if (clip) {
    isclip = 1;
  }
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
  const float &step_w = param.StepW();
  const float &step_h = param.StepH();
  const float &offset = param.Offset();
  const int C = param.OutputBoxes()->dims()[1];

  auto output_boxes = param.OutputBoxes()->GetCLImage();
  auto output_variances = param.OutputVariances()->GetCLImage();

  std::vector<float> aspect_ratios;
  ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios);

  auto img_width = input_image_dims[3];
  auto img_height = input_image_dims[2];

  auto feature_width = input_dims[3];
  auto feature_height = input_dims[2];

  float step_width, step_height;
  /// 300 / 19
  if (step_w == 0 || step_h == 0) {
    step_width = static_cast<float>(img_width) / feature_width;
    step_height = static_cast<float>(img_height) / feature_height;
  } else {
    step_width = step_w;
    step_height = step_h;
  }

  int num_priors = aspect_ratios.size() * min_sizes.size();
  if (!max_sizes.empty()) {
    num_priors += max_sizes.size();
  }

  float *box_width = static_cast<float *>(
      paddle_mobile::memory::Alloc(sizeof(float) * num_priors));
  float *box_height = static_cast<float *>(
      paddle_mobile::memory::Alloc(sizeof(float) * num_priors));
Y
yangfei 已提交
82 83
  float *variancesptr =
      static_cast<float *>(paddle_mobile::memory::Alloc(sizeof(float) * 4));
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
  int idx = 0;
  for (size_t s = 0; s < min_sizes.size(); ++s) {
    auto min_size = min_sizes[s];
    if (param.MinMaxAspectRatiosOrder()) {
      box_width[idx] = box_height[idx] = min_size / 2.;
      idx++;
      if (max_sizes.size() > 0) {
        auto max_size = max_sizes[s];
        box_width[idx] = box_height[idx] = sqrt(min_size * max_size) / 2.;
        idx++;
      }
      for (float ar : aspect_ratios) {
        if (fabs(ar - 1.) < 1e-6) {
          continue;
        }
        box_width[idx] = min_size * sqrt(ar) / 2.;
        box_height[idx] = min_size / sqrt(ar) / 2.;
        idx++;
      }

    } else {
      for (float ar : aspect_ratios) {
        box_width[idx] = min_size * sqrt(ar) / 2.;
        box_height[idx] = min_size / sqrt(ar) / 2.;
        idx++;
      }
      if (!max_sizes.empty()) {
        auto max_size = max_sizes[s];
        box_width[idx] = box_height[idx] = sqrt(min_size * max_size) / 2.;
        idx++;
      }
    }
  }
Y
yangfei 已提交
117 118 119
  for (int i = 0; i < variances.size(); i++) {
    variancesptr[i] = variances[i];
  }
120 121 122 123 124 125 126 127
  cl_int status;
  auto kernel = this->cl_helper_.KernelAt(0);
  auto default_work_size =
      this->cl_helper_.DefaultWorkSize(*param.OutputBoxes());
  int c_block = default_work_size[0];
  int w = default_work_size[1];
  int nh = default_work_size[2];

128
  std::vector<int64_t> box_shape({num_priors});
129 130 131 132 133 134 135 136 137 138 139 140 141 142
  framework::DDim ddim = framework::make_ddim(box_shape);

  framework::CLTensor box_width_cl_tensor(this->cl_helper_.CLContext(),
                                          this->cl_helper_.CLCommandQueue());
  box_width_cl_tensor.Resize(ddim);
  cl_mem box_width_Buffer =
      box_width_cl_tensor.mutable_with_data<float>(box_width);

  framework::CLTensor box_height_cl_tensor(this->cl_helper_.CLContext(),
                                           this->cl_helper_.CLCommandQueue());
  box_height_cl_tensor.Resize(ddim);
  cl_mem box_height_Buffer =
      box_height_cl_tensor.mutable_with_data<float>(box_height);

Y
yangfei 已提交
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
  framework::CLTensor variances_cl_tensor(this->cl_helper_.CLContext(),
                                          this->cl_helper_.CLCommandQueue());

  std::vector<int64_t> variances_shape({4});
  framework::DDim vddim = framework::make_ddim(variances_shape);

  variances_cl_tensor.Resize(vddim);
  cl_mem variances_Buffer =
      variances_cl_tensor.mutable_with_data<float>(variancesptr);

  //            DLOG << "c_block:" << c_block;
  //            DLOG << "w:" << w;
  //            DLOG << "nh:" << nh;
  //            DLOG << "step_width:" << step_width;
  //            DLOG << "step_height:" << step_height;
  //            DLOG << "offset:" << offset;
  //            DLOG << "img_width:" << img_width;
  //            DLOG << "img_height:" << img_height;
  //            DLOG << "num_priors:" << num_priors;
  //            DLOG << "C:" << C;
  //            DLOG << "isclip:" << isclip;
  //            printf("param.MinMaxAspectRatiosOrder() =
  //            %d\n",param.MinMaxAspectRatiosOrder()); for (int i = 0; i <
  //            num_priors; i++) {
  //                DLOG << box_width[i];
  //                DLOG << box_height[i];
  //            }
170 171 172 173 174 175 176 177 178 179
  status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 1, sizeof(int), &w);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &box_width_Buffer);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &box_height_Buffer);
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
180 181 182
  status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &variances_Buffer);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &output_boxes);
183
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
184
  status = clSetKernelArg(kernel, 7, sizeof(cl_mem), &output_variances);
185
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
186
  status = clSetKernelArg(kernel, 8, sizeof(float), &step_width);
187
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
188
  status = clSetKernelArg(kernel, 9, sizeof(float), &step_height);
189
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
190
  status = clSetKernelArg(kernel, 10, sizeof(float), &offset);
191
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
192
  status = clSetKernelArg(kernel, 11, sizeof(int), &img_width);
193
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
194
  status = clSetKernelArg(kernel, 12, sizeof(int), &img_height);
195
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
196
  status = clSetKernelArg(kernel, 13, sizeof(int), &num_priors);
197
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
198 199 200
  status = clSetKernelArg(kernel, 14, sizeof(int), &C);
  CL_CHECK_ERRORS(status);
  status = clSetKernelArg(kernel, 15, sizeof(int), &isclip);
201
  CL_CHECK_ERRORS(status);
202 203 204 205
  size_t global_work_size[2] = {c_block, nh};
  status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
                                  NULL, global_work_size, NULL, 0, NULL, NULL);
  CL_CHECK_ERRORS(status);
Y
yangfei 已提交
206

207 208
  paddle_mobile::memory::Free(box_width);
  paddle_mobile::memory::Free(box_height);
Y
yangfei 已提交
209
  paddle_mobile::memory::Free(variancesptr);
210
}
Y
yangfei 已提交
211
template class PriorBoxKernel<GPU_CL, float>;
Y
yangfei 已提交
212

Y
yangfei 已提交
213
}  // namespace operators
Y
yangfei 已提交
214 215 216
}  // namespace paddle_mobile

#endif