box_coder_op.h 8.0 KB
Newer Older
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
G
gaoyuan 已提交
2 3 4 5 6 7 8 9 10 11 12
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once
S
Siddharth Goyal 已提交
13
#include <string>
Y
Yi Wang 已提交
14 15
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
G
gaoyuan 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

namespace paddle {
namespace operators {

enum class BoxCodeType { kEncodeCenterSize = 0, kDecodeCenterSize = 1 };

inline BoxCodeType GetBoxCodeType(const std::string& type) {
  if (type == "encode_center_size") {
    return BoxCodeType::kEncodeCenterSize;
  } else if (type == "decode_center_size") {
    return BoxCodeType::kDecodeCenterSize;
  }
  PADDLE_THROW("Not support type %s.", type);
}

31
template <typename DeviceContext, typename T>
G
gaoyuan 已提交
32 33
class BoxCoderKernel : public framework::OpKernel<T> {
 public:
34 35 36
  void EncodeCenterSize(const framework::Tensor* target_box,
                        const framework::Tensor* prior_box,
                        const framework::Tensor* prior_box_var,
37
                        const bool normalized, T* output) const {
38 39 40 41 42 43 44
    int64_t row = target_box->dims()[0];
    int64_t col = prior_box->dims()[0];
    int64_t len = prior_box->dims()[1];
    auto* target_box_data = target_box->data<T>();
    auto* prior_box_data = prior_box->data<T>();
    const T* prior_box_var_data = nullptr;
    if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
G
gaoyuan 已提交
45

L
luotao1 已提交
46 47 48
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
G
gaoyuan 已提交
49 50
    for (int64_t i = 0; i < row; ++i) {
      for (int64_t j = 0; j < col; ++j) {
51 52 53 54 55
        T prior_box_width = prior_box_data[j * len + 2] -
                            prior_box_data[j * len] + (normalized == false);
        T prior_box_height = prior_box_data[j * len + 3] -
                             prior_box_data[j * len + 1] +
                             (normalized == false);
G
gaoyuan 已提交
56
        T prior_box_center_x =
G
gaoyuan 已提交
57
            (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2;
G
gaoyuan 已提交
58
        T prior_box_center_y =
G
gaoyuan 已提交
59
            (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
G
gaoyuan 已提交
60 61

        T target_box_center_x =
G
gaoyuan 已提交
62
            (target_box_data[i * len + 2] + target_box_data[i * len]) / 2;
G
gaoyuan 已提交
63
        T target_box_center_y =
G
gaoyuan 已提交
64
            (target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2;
65 66 67 68 69
        T target_box_width = target_box_data[i * len + 2] -
                             target_box_data[i * len] + (normalized == false);
        T target_box_height = target_box_data[i * len + 3] -
                              target_box_data[i * len + 1] +
                              (normalized == false);
G
gaoyuan 已提交
70

G
gaoyuan 已提交
71
        size_t offset = i * col * len + j * len;
72 73 74 75
        output[offset] =
            (target_box_center_x - prior_box_center_x) / prior_box_width;
        output[offset + 1] =
            (target_box_center_y - prior_box_center_y) / prior_box_height;
G
gaoyuan 已提交
76
        output[offset + 2] =
77
            std::log(std::fabs(target_box_width / prior_box_width));
G
gaoyuan 已提交
78
        output[offset + 3] =
79 80 81 82 83 84 85
            std::log(std::fabs(target_box_height / prior_box_height));
        if (prior_box_var) {
          output[offset] /= prior_box_var_data[j * len];
          output[offset + 1] /= prior_box_var_data[j * len + 1];
          output[offset + 2] /= prior_box_var_data[j * len + 2];
          output[offset + 3] /= prior_box_var_data[j * len + 3];
        }
G
gaoyuan 已提交
86 87 88
      }
    }
  }
89 90 91
  void DecodeCenterSize(const framework::Tensor* target_box,
                        const framework::Tensor* prior_box,
                        const framework::Tensor* prior_box_var,
92
                        const bool normalized, T* output) const {
93 94 95
    int64_t row = target_box->dims()[0];
    int64_t col = prior_box->dims()[0];
    int64_t len = prior_box->dims()[1];
G
gaoyuan 已提交
96

97 98 99 100
    auto* target_box_data = target_box->data<T>();
    auto* prior_box_data = prior_box->data<T>();
    const T* prior_box_var_data = nullptr;
    if (prior_box_var) prior_box_var_data = prior_box_var->data<T>();
G
gaoyuan 已提交
101

L
luotao1 已提交
102 103 104
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for collapse(2)
#endif
G
gaoyuan 已提交
105 106
    for (int64_t i = 0; i < row; ++i) {
      for (int64_t j = 0; j < col; ++j) {
Y
Yuan Gao 已提交
107
        size_t offset = i * col * len + j * len;
108 109 110 111 112
        T prior_box_width = prior_box_data[j * len + 2] -
                            prior_box_data[j * len] + (normalized == false);
        T prior_box_height = prior_box_data[j * len + 3] -
                             prior_box_data[j * len + 1] +
                             (normalized == false);
G
gaoyuan 已提交
113
        T prior_box_center_x =
G
gaoyuan 已提交
114
            (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2;
G
gaoyuan 已提交
115
        T prior_box_center_y =
G
gaoyuan 已提交
116
            (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
G
gaoyuan 已提交
117

118 119 120 121
        T target_box_center_x = 0, target_box_center_y = 0;
        T target_box_width = 0, target_box_height = 0;
        if (prior_box_var) {
          target_box_center_x = prior_box_var_data[j * len] *
Y
Yuan Gao 已提交
122
                                    target_box_data[offset] * prior_box_width +
G
gaoyuan 已提交
123
                                prior_box_center_x;
124
          target_box_center_y = prior_box_var_data[j * len + 1] *
Y
Yuan Gao 已提交
125
                                    target_box_data[offset + 1] *
G
gaoyuan 已提交
126 127
                                    prior_box_height +
                                prior_box_center_y;
128
          target_box_width = std::exp(prior_box_var_data[j * len + 2] *
Y
Yuan Gao 已提交
129
                                      target_box_data[offset + 2]) *
G
gaoyuan 已提交
130
                             prior_box_width;
131
          target_box_height = std::exp(prior_box_var_data[j * len + 3] *
Y
Yuan Gao 已提交
132
                                       target_box_data[offset + 3]) *
G
gaoyuan 已提交
133
                              prior_box_height;
134 135 136 137 138 139 140 141 142 143
        } else {
          target_box_center_x =
              target_box_data[offset] * prior_box_width + prior_box_center_x;
          target_box_center_y = target_box_data[offset + 1] * prior_box_height +
                                prior_box_center_y;
          target_box_width =
              std::exp(target_box_data[offset + 2]) * prior_box_width;
          target_box_height =
              std::exp(target_box_data[offset + 3]) * prior_box_height;
        }
G
gaoyuan 已提交
144 145 146

        output[offset] = target_box_center_x - target_box_width / 2;
        output[offset + 1] = target_box_center_y - target_box_height / 2;
147 148 149 150
        output[offset + 2] =
            target_box_center_x + target_box_width / 2 - (normalized == false);
        output[offset + 3] =
            target_box_center_y + target_box_height / 2 - (normalized == false);
G
gaoyuan 已提交
151 152 153 154 155 156 157 158
      }
    }
  }

  void Compute(const framework::ExecutionContext& context) const override {
    auto* prior_box = context.Input<framework::Tensor>("PriorBox");
    auto* prior_box_var = context.Input<framework::Tensor>("PriorBoxVar");
    auto* target_box = context.Input<framework::LoDTensor>("TargetBox");
G
gaoyuan 已提交
159
    auto* output_box = context.Output<framework::Tensor>("OutputBox");
G
gaoyuan 已提交
160 161 162 163 164 165 166

    if (target_box->lod().size()) {
      PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL,
                        "Only support 1 level of LoD.");
    }
    auto row = target_box->dims()[0];
    auto col = prior_box->dims()[0];
G
gaoyuan 已提交
167
    auto len = prior_box->dims()[1];
G
gaoyuan 已提交
168

G
gaoyuan 已提交
169
    output_box->mutable_data<T>({row, col, len}, context.GetPlace());
G
gaoyuan 已提交
170 171

    auto code_type = GetBoxCodeType(context.Attr<std::string>("code_type"));
172
    bool normalized = context.Attr<bool>("box_normalized");
G
gaoyuan 已提交
173 174
    T* output = output_box->data<T>();
    if (code_type == BoxCodeType::kEncodeCenterSize) {
175
      EncodeCenterSize(target_box, prior_box, prior_box_var, normalized,
176
                       output);
G
gaoyuan 已提交
177
    } else if (code_type == BoxCodeType::kDecodeCenterSize) {
178
      DecodeCenterSize(target_box, prior_box, prior_box_var, normalized,
179
                       output);
G
gaoyuan 已提交
180 181 182 183 184 185
    }
  }
};

}  // namespace operators
}  // namespace paddle