box_coder_op.h 9.8 KB
Newer Older
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
G
gaoyuan 已提交
2 3 4 5 6 7 8 9 10 11 12
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
S
Siddharth Goyal 已提交
13
#include <string>
14
#include <vector>
Y
Yi Wang 已提交
15 16
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
G
gaoyuan 已提交
17 18 19 20 21 22

namespace paddle {
namespace operators {

enum class BoxCodeType { kEncodeCenterSize = 0, kDecodeCenterSize = 1 };

G
guomingz 已提交
23
inline BoxCodeType GetBoxCodeType(const std::string &type) {
G
gaoyuan 已提交
24 25 26 27 28 29 30 31
  if (type == "encode_center_size") {
    return BoxCodeType::kEncodeCenterSize;
  } else if (type == "decode_center_size") {
    return BoxCodeType::kDecodeCenterSize;
  }
  PADDLE_THROW("Not support type %s.", type);
}

32
template <typename DeviceContext, typename T>
G
gaoyuan 已提交
33 34
class BoxCoderKernel : public framework::OpKernel<T> {
 public:
G
guomingz 已提交
35 36 37
  void EncodeCenterSize(const framework::Tensor *target_box,
                        const framework::Tensor *prior_box,
                        const framework::Tensor *prior_box_var,
38
                        const bool normalized,
G
guomingz 已提交
39
                        const std::vector<float> variance, T *output) const {
40 41 42
    int64_t row = target_box->dims()[0];
    int64_t col = prior_box->dims()[0];
    int64_t len = prior_box->dims()[1];
G
gaoyuan 已提交
43

L
luotao1 已提交
44 45 46
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
G
gaoyuan 已提交
47 48
    for (int64_t i = 0; i < row; ++i) {
      for (int64_t j = 0; j < col; ++j) {
G
guomingz 已提交
49 50 51
        auto *target_box_data = target_box->data<T>();
        auto *prior_box_data = prior_box->data<T>();
        size_t offset = i * col * len + j * len;
52 53 54 55 56
        T prior_box_width = prior_box_data[j * len + 2] -
                            prior_box_data[j * len] + (normalized == false);
        T prior_box_height = prior_box_data[j * len + 3] -
                             prior_box_data[j * len + 1] +
                             (normalized == false);
J
jerrywgz 已提交
57
        T prior_box_center_x = prior_box_data[j * len] + prior_box_width / 2;
G
gaoyuan 已提交
58
        T prior_box_center_y =
J
jerrywgz 已提交
59
            prior_box_data[j * len + 1] + prior_box_height / 2;
G
gaoyuan 已提交
60 61

        T target_box_center_x =
G
gaoyuan 已提交
62
            (target_box_data[i * len + 2] + target_box_data[i * len]) / 2;
G
gaoyuan 已提交
63
        T target_box_center_y =
G
gaoyuan 已提交
64
            (target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2;
65 66 67 68 69
        T target_box_width = target_box_data[i * len + 2] -
                             target_box_data[i * len] + (normalized == false);
        T target_box_height = target_box_data[i * len + 3] -
                              target_box_data[i * len + 1] +
                              (normalized == false);
G
gaoyuan 已提交
70

71 72 73 74
        output[offset] =
            (target_box_center_x - prior_box_center_x) / prior_box_width;
        output[offset + 1] =
            (target_box_center_y - prior_box_center_y) / prior_box_height;
G
gaoyuan 已提交
75
        output[offset + 2] =
76
            std::log(std::fabs(target_box_width / prior_box_width));
G
gaoyuan 已提交
77
        output[offset + 3] =
78
            std::log(std::fabs(target_box_height / prior_box_height));
G
guomingz 已提交
79 80 81 82 83 84 85 86 87 88
      }
    }

    if (prior_box_var) {
      const T *prior_box_var_data = prior_box_var->data<T>();
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(3)
#endif
      for (int64_t i = 0; i < row; ++i) {
        for (int64_t j = 0; j < col; ++j) {
89
          for (int k = 0; k < 4; ++k) {
G
guomingz 已提交
90 91 92 93 94 95 96 97 98 99 100 101 102 103
            size_t offset = i * col * len + j * len;
            int prior_var_offset = j * len;
            output[offset + k] /= prior_box_var_data[prior_var_offset + k];
          }
        }
      }
    } else if (!(variance.empty())) {
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(3)
#endif
      for (int64_t i = 0; i < row; ++i) {
        for (int64_t j = 0; j < col; ++j) {
          for (int k = 0; k < 4; ++k) {
            size_t offset = i * col * len + j * len;
104 105
            output[offset + k] /= static_cast<T>(variance[k]);
          }
106
        }
G
gaoyuan 已提交
107 108 109
      }
    }
  }
G
guomingz 已提交
110

111
  template <int axis, int var_size>
G
guomingz 已提交
112 113 114
  void DecodeCenterSize(const framework::Tensor *target_box,
                        const framework::Tensor *prior_box,
                        const framework::Tensor *prior_box_var,
115
                        const bool normalized, std::vector<float> variance,
G
guomingz 已提交
116
                        T *output) const {
117
    int64_t row = target_box->dims()[0];
J
jerrywgz 已提交
118 119
    int64_t col = target_box->dims()[1];
    int64_t len = target_box->dims()[2];
G
gaoyuan 已提交
120

L
luotao1 已提交
121 122 123
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
G
gaoyuan 已提交
124 125
    for (int64_t i = 0; i < row; ++i) {
      for (int64_t j = 0; j < col; ++j) {
G
guomingz 已提交
126 127 128 129 130
        auto *target_box_data = target_box->data<T>();
        auto *prior_box_data = prior_box->data<T>();

        T var_data[4] = {1., 1., 1., 1.};
        T *var_ptr = var_data;
Y
Yuan Gao 已提交
131
        size_t offset = i * col * len + j * len;
G
guomingz 已提交
132 133
        int prior_box_offset = axis == 0 ? j * len : i * len;

J
jerrywgz 已提交
134 135 136 137 138
        T prior_box_width = prior_box_data[prior_box_offset + 2] -
                            prior_box_data[prior_box_offset] +
                            (normalized == false);
        T prior_box_height = prior_box_data[prior_box_offset + 3] -
                             prior_box_data[prior_box_offset + 1] +
139
                             (normalized == false);
G
gaoyuan 已提交
140
        T prior_box_center_x =
J
jerrywgz 已提交
141
            prior_box_data[prior_box_offset] + prior_box_width / 2;
G
gaoyuan 已提交
142
        T prior_box_center_y =
J
jerrywgz 已提交
143
            prior_box_data[prior_box_offset + 1] + prior_box_height / 2;
G
gaoyuan 已提交
144

145 146
        T target_box_center_x = 0, target_box_center_y = 0;
        T target_box_width = 0, target_box_height = 0;
147 148
        int prior_var_offset = axis == 0 ? j * len : i * len;
        if (var_size == 2) {
G
guomingz 已提交
149
          std::memcpy(var_ptr, prior_box_var->data<T>() + prior_var_offset,
150 151
                      4 * sizeof(T));
        } else if (var_size == 1) {
G
guomingz 已提交
152
          var_ptr = reinterpret_cast<T *>(variance.data());
153
        }
154 155 156 157 158
        T box_var_x = *var_ptr;
        T box_var_y = *(var_ptr + 1);
        T box_var_w = *(var_ptr + 2);
        T box_var_h = *(var_ptr + 3);

J
jerrywgz 已提交
159 160 161 162 163 164 165 166 167 168
        target_box_center_x =
            box_var_x * target_box_data[offset] * prior_box_width +
            prior_box_center_x;
        target_box_center_y =
            box_var_y * target_box_data[offset + 1] * prior_box_height +
            prior_box_center_y;
        target_box_width =
            std::exp(box_var_w * target_box_data[offset + 2]) * prior_box_width;
        target_box_height = std::exp(box_var_h * target_box_data[offset + 3]) *
                            prior_box_height;
G
gaoyuan 已提交
169 170 171

        output[offset] = target_box_center_x - target_box_width / 2;
        output[offset + 1] = target_box_center_y - target_box_height / 2;
172 173 174 175
        output[offset + 2] =
            target_box_center_x + target_box_width / 2 - (normalized == false);
        output[offset + 3] =
            target_box_center_y + target_box_height / 2 - (normalized == false);
G
gaoyuan 已提交
176 177 178 179
      }
    }
  }

G
guomingz 已提交
180 181 182 183 184
  void Compute(const framework::ExecutionContext &context) const override {
    auto *prior_box = context.Input<framework::Tensor>("PriorBox");
    auto *prior_box_var = context.Input<framework::Tensor>("PriorBoxVar");
    auto *target_box = context.Input<framework::LoDTensor>("TargetBox");
    auto *output_box = context.Output<framework::Tensor>("OutputBox");
185
    std::vector<float> variance = context.Attr<std::vector<float>>("variance");
J
jerrywgz 已提交
186
    const int axis = context.Attr<int>("axis");
G
gaoyuan 已提交
187 188 189 190
    if (target_box->lod().size()) {
      PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL,
                        "Only support 1 level of LoD.");
    }
191 192 193 194 195 196 197 198 199
    if (prior_box_var) {
      PADDLE_ENFORCE(variance.empty(),
                     "Input 'PriorBoxVar' and attribute 'variance' should not"
                     "be used at the same time.");
    }
    if (!(variance.empty())) {
      PADDLE_ENFORCE(static_cast<int>(variance.size()) == 4,
                     "Size of attribute 'variance' should be 4");
    }
J
jerrywgz 已提交
200 201 202
    auto code_type = GetBoxCodeType(context.Attr<std::string>("code_type"));
    bool normalized = context.Attr<bool>("box_normalized");

G
gaoyuan 已提交
203 204
    auto row = target_box->dims()[0];
    auto col = prior_box->dims()[0];
J
jerrywgz 已提交
205 206 207
    if (code_type == BoxCodeType::kDecodeCenterSize) {
      col = target_box->dims()[1];
    }
G
gaoyuan 已提交
208
    auto len = prior_box->dims()[1];
G
gaoyuan 已提交
209

G
gaoyuan 已提交
210
    output_box->mutable_data<T>({row, col, len}, context.GetPlace());
G
gaoyuan 已提交
211

G
guomingz 已提交
212
    T *output = output_box->data<T>();
G
gaoyuan 已提交
213
    if (code_type == BoxCodeType::kEncodeCenterSize) {
214
      EncodeCenterSize(target_box, prior_box, prior_box_var, normalized,
215
                       variance, output);
G
gaoyuan 已提交
216
    } else if (code_type == BoxCodeType::kDecodeCenterSize) {
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
      if (prior_box_var) {
        if (axis == 0) {
          DecodeCenterSize<0, 2>(target_box, prior_box, prior_box_var,
                                 normalized, variance, output);
        } else {
          DecodeCenterSize<1, 2>(target_box, prior_box, prior_box_var,
                                 normalized, variance, output);
        }
      } else if (!(variance.empty())) {
        if (axis == 0) {
          DecodeCenterSize<0, 1>(target_box, prior_box, prior_box_var,
                                 normalized, variance, output);
        } else {
          DecodeCenterSize<1, 1>(target_box, prior_box, prior_box_var,
                                 normalized, variance, output);
        }
      } else {
        if (axis == 0) {
          DecodeCenterSize<0, 0>(target_box, prior_box, prior_box_var,
                                 normalized, variance, output);
        } else {
          DecodeCenterSize<1, 0>(target_box, prior_box, prior_box_var,
                                 normalized, variance, output);
        }
      }
G
gaoyuan 已提交
242 243 244 245 246 247
    }
  }
};

}  // namespace operators
}  // namespace paddle