box_coder_image_compute_test.cc 13.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <memory>
#include <random>
#include "lite/backends/opencl/target_wrapper.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/kernels/opencl/test_helper.h"

#define FP16_MAX_DIFF (5e-1)
namespace paddle {
namespace lite {
void box_coder_ref(float* proposals_data,
C
chenjiaoAngel 已提交
27 28 29 30 31 32 33 34
                   const float* anchors_data,
                   const float* bbox_deltas_data,
                   const float* variances_data,
                   int axis,
                   bool box_normalized,
                   std::string code_type,
                   int row,
                   int col) {
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
  if (code_type == "decode_center_size") {
    int anchor_len = 4;
    int out_len = 4;
    int var_len = 4;
    int delta_len = 4;
    float normalized = !box_normalized ? 1.f : 0;

    for (int64_t row_id = 0; row_id < row; ++row_id) {
      for (int64_t col_id = 0; col_id < col; ++col_id) {
        size_t delta_offset = row_id * col * delta_len + col_id * delta_len;
        size_t out_offset = row_id * col * out_len + col_id * out_len;
        int prior_box_offset =
            axis == 0 ? col_id * anchor_len : row_id * anchor_len;
        int var_offset = axis == 0 ? col_id * var_len : row_id * var_len;
        auto anchor_data_tmp = anchors_data + prior_box_offset;
        auto bbox_deltas_data_tmp = bbox_deltas_data + delta_offset;
        auto proposals_data_tmp = proposals_data + out_offset;
        auto anchor_width =
            anchor_data_tmp[2] - anchor_data_tmp[0] + normalized;
        auto anchor_height =
            anchor_data_tmp[3] - anchor_data_tmp[1] + normalized;
        auto anchor_center_x = anchor_data_tmp[0] + 0.5 * anchor_width;
        auto anchor_center_y = anchor_data_tmp[1] + 0.5 * anchor_height;
        float bbox_center_x = 0, bbox_center_y = 0;
        float bbox_width = 0, bbox_height = 0;

        auto variances_data_tmp = variances_data + var_offset;
        bbox_center_x =
            variances_data_tmp[0] * bbox_deltas_data_tmp[0] * anchor_width +
            anchor_center_x;
        bbox_center_y =
            variances_data_tmp[1] * bbox_deltas_data_tmp[1] * anchor_height +
            anchor_center_y;
        bbox_width = std::exp(variances_data_tmp[2] * bbox_deltas_data_tmp[2]) *
                     anchor_width;
        bbox_height =
            std::exp(variances_data_tmp[3] * bbox_deltas_data_tmp[3]) *
            anchor_height;
        proposals_data_tmp[0] = bbox_center_x - bbox_width / 2;
        proposals_data_tmp[1] = bbox_center_y - bbox_height / 2;
        proposals_data_tmp[2] = bbox_center_x + bbox_width / 2 - normalized;
        proposals_data_tmp[3] = bbox_center_y + bbox_height / 2 - normalized;
      }
    }
  } else if (code_type == "encode_center_size") {
    LOG(FATAL) << "not implemented type: " << code_type;
  } else {
    LOG(FATAL) << "not supported type: " << code_type;
  }
}
// #define BOXCODER_FP16_LOOP_TEST
// #define BOXCODER_FP16_PRINT_RESULT
TEST(box_coder_image2d, compute) {
#ifdef BOXCODER_FP16_LOOP_TEST
  for (auto n : {1, 2, 3, 4}) {
    for (auto m : {1, 3, 4, 8}) {
      for (auto norm : {true}) {
        for (auto code_type : {"decode_center_size"}) {
          for (auto axis : {0}) {
#else
  const int n = 1;
  const int m = 1;
  const bool norm = true;
  const std::string code_type = "decode_center_size";
  const int axis = 0;
#endif  // BOXCODER_FP16_LOOP_TEST

C
chenjiaoAngel 已提交
102 103 104 105
            LOG(INFO) << "======== input shape[n,c,h,w]:" << n << " " << m
                      << " ========";
            LOG(INFO) << "======== parameters: norm = " << norm
                      << ", axis = " << axis << "code_type: " << code_type;
106

C
chenjiaoAngel 已提交
107 108 109 110
            auto kernels = KernelRegistry::Global().Create("box_coder",
                                                           TARGET(kOpenCL),
                                                           PRECISION(kFP16),
                                                           DATALAYOUT(kImageDefault));
C
chenjiaoAngel 已提交
111 112 113
            ASSERT_FALSE(kernels.empty());
            auto kernel = std::move(kernels.front());
            LOG(INFO) << "get kernel:" << kernel->doc();
114

C
chenjiaoAngel 已提交
115 116 117 118 119 120 121 122 123
            lite::Tensor prior_box, prior_box_var, target_box, output_box;
            operators::BoxCoderParam param;
            param.prior_box = &prior_box;
            param.prior_box_var = &prior_box_var;
            param.target_box = &target_box;
            param.proposals = &output_box;
            param.axis = axis;
            param.box_normalized = norm;
            param.code_type = code_type;
124

C
chenjiaoAngel 已提交
125 126
            std::unique_ptr<KernelContext> context(new KernelContext);
            context->As<OpenCLContext>().InitOnce();
127

C
chenjiaoAngel 已提交
128 129 130 131 132
            kernel->SetParam(param);
            std::unique_ptr<KernelContext> boxcoder_context(new KernelContext);
            context->As<OpenCLContext>().CopySharedTo(
                &(boxcoder_context->As<OpenCLContext>()));
            kernel->SetContext(std::move(boxcoder_context));
133

C
chenjiaoAngel 已提交
134 135 136 137 138 139
            const DDim prior_box_dims =
                DDim(std::vector<DDim::value_type>{1, 1, m, 4});
            const DDim prior_box_var_dims =
                DDim(std::vector<DDim::value_type>{1, 1, m, 4});
            const DDim target_box_dims =
                DDim(std::vector<DDim::value_type>{1, n, m, 4});
140
                  
C
chenjiaoAngel 已提交
141 142 143 144 145 146
            const DDim out_dim =
                DDim(std::vector<DDim::value_type>{1, n, m, 4});
            prior_box.Resize(prior_box_dims);
            prior_box_var.Resize(prior_box_var_dims);
            target_box.Resize(target_box_dims);
            output_box.Resize(out_dim);
147

C
chenjiaoAngel 已提交
148
            std::vector<float> prior_box_data(prior_box_dims.production());
C
chenjiaoAngel 已提交
149 150
            std::vector<float> prior_box_var_data(
                prior_box_var_dims.production());
C
chenjiaoAngel 已提交
151 152 153 154 155 156 157 158 159 160
            std::vector<float> target_box_data(target_box_dims.production());
            for (int i = 0; i < prior_box_dims.production(); i++) {
              prior_box_data[i] = i * 1.1 / prior_box_dims.production();
            }
            for (int i = 0; i < prior_box_var_dims.production(); i++) {
              prior_box_var_data[i] = i * 1.2 / prior_box_var_dims.production();
            }
            for (int i = 0; i < target_box_dims.production(); i++) {
              target_box_data[i] = i * 1.3 / target_box_dims.production();
            }
161

C
chenjiaoAngel 已提交
162 163 164 165 166
            LOG(INFO) << "prepare input";
            CLImageConverterDefault* default_converter =
                new CLImageConverterDefault();
            DDim prior_box_image_shape =
                default_converter->InitImageDimInfoWith(prior_box_dims);
C
chenjiaoAngel 已提交
167 168
            LOG(INFO) << "prior_box_image_shape = " << prior_box_image_shape[0]
                      << " " << prior_box_image_shape[1];
C
chenjiaoAngel 已提交
169 170
            std::vector<half_t> prior_box_image_data(
                prior_box_image_shape.production() * 4);  // 4 : RGBA
C
chenjiaoAngel 已提交
171 172 173
            default_converter->NCHWToImage(prior_box_data.data(),
                                           prior_box_image_data.data(),
                                           prior_box_dims);
C
chenjiaoAngel 已提交
174 175 176 177
            auto* prior_box_image = prior_box.mutable_data<half_t, cl::Image2D>(
                prior_box_image_shape[0],
                prior_box_image_shape[1],
                prior_box_image_data.data());
178

C
chenjiaoAngel 已提交
179 180
            DDim prior_box_var_image_shape =
                default_converter->InitImageDimInfoWith(prior_box_var_dims);
C
chenjiaoAngel 已提交
181 182
            LOG(INFO) << "prior_box_var_image_shape = " << prior_box_var_image_shape[0]
                      << " " << prior_box_var_image_shape[1];
C
chenjiaoAngel 已提交
183 184
            std::vector<half_t> prior_box_var_image_data(
                prior_box_var_image_shape.production() * 4);  // 4 : RGBA
C
chenjiaoAngel 已提交
185 186 187 188 189 190 191 192
            default_converter->NCHWToImage(prior_box_var_data.data(),
                                           prior_box_var_image_data.data(),
                                           prior_box_var_dims);
            auto* prior_box_var_image = 
                prior_box_var.mutable_data<half_t, cl::Image2D>(
                    prior_box_var_image_shape[0],
                    prior_box_var_image_shape[1],
                    prior_box_var_image_data.data());
193

C
chenjiaoAngel 已提交
194 195
            DDim target_box_image_shape =
                default_converter->InitImageDimInfoWith(target_box_dims);
C
chenjiaoAngel 已提交
196 197
            LOG(INFO) << "target_box_image_shape = "
                      << target_box_image_shape[0] << " "
C
chenjiaoAngel 已提交
198 199 200
                      << target_box_image_shape[1];
            std::vector<half_t> target_box_image_data(
                target_box_image_shape.production() * 4);  // 4 : RGBA
C
chenjiaoAngel 已提交
201 202 203 204 205 206 207 208
            default_converter->NCHWToImage(target_box_data.data(),
                                           target_box_image_data.data(),
                                           target_box_dims);
            auto* target_box_image =
                target_box.mutable_data<half_t, cl::Image2D>(
                    target_box_image_shape[0],
                    target_box_image_shape[1],
                    target_box_image_data.data());
209

C
chenjiaoAngel 已提交
210 211 212 213 214 215 216
            DDim out_image_shape =
                default_converter->InitImageDimInfoWith(out_dim);
            LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " "
                      << out_image_shape[1];
            auto* out_image = output_box.mutable_data<half_t, cl::Image2D>(
                out_image_shape[0], out_image_shape[1]);
            kernel->Launch();
217

C
chenjiaoAngel 已提交
218 219 220 221 222
            auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
            auto* out_ptr = param.proposals->data<half_t, cl::Image2D>();
            auto it = wait_list->find(out_ptr);
            if (it != wait_list->end()) {
              VLOG(4) << "--- Find the sync event for the target cl "
C
chenjiaoAngel 已提交
223
                         "tensor. ---";
C
chenjiaoAngel 已提交
224 225 226 227
              auto& event = *(it->second);
              event.wait();
            } else {
              LOG(FATAL) << "Could not find the sync event for the "
C
chenjiaoAngel 已提交
228
                            "target cl tensor.";
C
chenjiaoAngel 已提交
229
            }
230

C
chenjiaoAngel 已提交
231 232 233 234 235 236 237 238 239 240 241
            lite::Tensor out_ref_tensor;
            out_ref_tensor.Resize(out_dim);
            box_coder_ref(out_ref_tensor.mutable_data<float>(),
                          prior_box_data.data(),
                          target_box_data.data(),
                          prior_box_var_data.data(),
                          axis,
                          norm,
                          code_type,
                          target_box_dims[0],
                          target_box_dims[1]);
242

C
chenjiaoAngel 已提交
243 244 245
            const size_t cl_image2d_row_pitch{0};
            const size_t cl_image2d_slice_pitch{0};
            half_t* out_image_data =
C
chenjiaoAngel 已提交
246
                new half_t[40000]; // [out_image_shape.production() * 4];
C
chenjiaoAngel 已提交
247 248 249 250 251 252 253 254 255 256
            TargetWrapperCL::ImgcpySync(out_image_data,
                                        out_image,
                                        out_image_shape[0],
                                        out_image_shape[1],
                                        cl_image2d_row_pitch,
                                        cl_image2d_slice_pitch,
                                        IoDirection::DtoH);
            float* out_data = new float[out_image_shape.production() * 4];
            default_converter->ImageToNCHW(
                out_image_data, out_data, out_image_shape, out_dim);
257 258
// result
#ifdef BOXCODER_FP16_PRINT_RESULT
C
chenjiaoAngel 已提交
259 260 261 262 263
            LOG(INFO) << "---- print kernel result (input -> output) ----";
            for (int eidx = 0; eidx < out_dim.production(); ++eidx) {
              std::cout << target_box_data[eidx] << " -> " << out_data[eidx]
                        << std::endl;
            }
264
#endif  // BOXCODER_FP16_PRINT_RESULT
C
chenjiaoAngel 已提交
265 266 267 268 269 270 271
            const float* out_ref = out_ref_tensor.data<float>();
            for (int i = 0; i < out_dim.production(); i++) {
              auto abs_diff = abs(out_data[i] - out_ref[i]);
              auto relative_diff =
                  COMPUTE_RELATIVE_DIFF(out_data[i], out_ref[i]);
              EXPECT_EQ((relative_diff <= FP16_MAX_DIFF) ||
                            (abs_diff <= FP16_MAX_DIFF),
C
chenjiaoAngel 已提交
272
                         true);
C
chenjiaoAngel 已提交
273 274 275 276 277
              if ((relative_diff > FP16_MAX_DIFF) &&
                  (abs_diff > FP16_MAX_DIFF)) {
                LOG(ERROR) << "error idx:" << i << ", in_data[" << i
                           << "]: " << target_box_data[i] << ", out_data[" << i
                           << "]: " << out_data[i] << ", out_ref[" << i
C
chenjiaoAngel 已提交
278
                           << "]: " << out_ref[i] << ", abs_diff: " << abs_diff
C
chenjiaoAngel 已提交
279 280 281
                           << ", relative_diff: " << relative_diff
                           << ", FP16_MAX_DIFF: " << FP16_MAX_DIFF;
              }
C
chenjiaoAngel 已提交
282
            }
283
#ifdef BOXCODER_FP16_LOOP_TEST
C
chenjiaoAngel 已提交
284 285 286 287 288
          }        // axis
        }          // code_type
      }            // norm
    }              // m
  }                // n
289 290 291 292 293 294 295 296 297
#else
// nothing to do.
#endif
}

}  // namespace lite
}  // namespace paddle

USE_LITE_KERNEL(box_coder, kOpenCL, kFP16, kImageDefault, ImageDefault);