detection_predictor.cpp 20.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "detection_predictor.h"
#include <cstring>
#include <cmath>
#include <fstream>
#include "utils/detection_result.pb.h"
20
#undef min
21 22 23 24 25

namespace PaddleSolution {
    /* lod_buffer: every item in lod_buffer is an image matrix after preprocessing
     * input_buffer: same data with lod_buffer after flattening to 1-D vector and padding, needed to be empty before using this function
     */
26 27 28 29 30
    void padding_minibatch(const std::vector<std::vector<float>> &lod_buffer,
                           std::vector<float> &input_buffer,
                           std::vector<int> &resize_heights,
                           std::vector<int> &resize_widths,
                           int channels, int coarsest_stride = 1) {
31 32 33
        int batch_size = lod_buffer.size();
        int max_h = -1;
        int max_w = -1;
34
        for (int i = 0; i < batch_size; ++i) {
B
Bin Long 已提交
35 36
            max_h = (max_h > resize_heights[i])? max_h : resize_heights[i];
            max_w = (max_w > resize_widths[i])? max_w : resize_widths[i];
37
        }
B
Bin Long 已提交
38

39
        max_h = static_cast<int>(ceil(static_cast<float>(max_h)
B
Bin Long 已提交
40
            / static_cast<float>(coarsest_stride)) * coarsest_stride);
41
        max_w = static_cast<int>(ceil(static_cast<float>(max_w)
B
Bin Long 已提交
42
            / static_cast<float>(coarsest_stride)) * coarsest_stride);
43 44
        input_buffer.insert(input_buffer.end(),
                            batch_size * channels * max_h * max_w, 0);
45
        // flatten tensor and padding
B
Bin Long 已提交
46
        #pragma omp parallel for
47 48 49
        for (int i = 0; i < lod_buffer.size(); ++i) {
            float *input_buffer_ptr = input_buffer.data()
                                    + i * channels * max_h * max_w;
50
            const float *lod_ptr = lod_buffer[i].data();
51 52 53 54
            for (int c = 0; c < channels; ++c) {
                for (int h = 0; h < resize_heights[i]; ++h) {
                    memcpy(input_buffer_ptr, lod_ptr,
                           resize_widths[i] * sizeof(float));
55 56 57 58 59 60 61
                    lod_ptr += resize_widths[i];
                    input_buffer_ptr += max_w;
                }
                input_buffer_ptr += (max_h - resize_heights[i]) * max_w;
            }
        }
        // change resize w, h
62
        for (int i = 0; i < batch_size; ++i) {
63 64 65 66 67
            resize_widths[i] = max_w;
            resize_heights[i] = max_h;
        }
    }

68 69 70 71
    void output_detection_result(const float* out_addr,
                             const std::vector<std::vector<size_t>> &lod_vector,
                             const std::vector<std::string> &imgs_batch) {
        for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
72 73 74 75 76
            DetectionResult detection_result;
            detection_result.set_filename(imgs_batch[i]);
            std::cout << imgs_batch[i] << ":" << std::endl;
            for (int j = lod_vector[0][i]; j < lod_vector[0][i+1]; ++j) {
                DetectionBox *box_ptr = detection_result.add_detection_boxes();
77 78
                box_ptr->set_class_(
                         static_cast<int>(round(out_addr[0 + j * 6])));
79 80 81 82 83 84
                box_ptr->set_score(out_addr[1 + j * 6]);
                box_ptr->set_left_top_x(out_addr[2 + j * 6]);
                box_ptr->set_left_top_y(out_addr[3 + j * 6]);
                box_ptr->set_right_bottom_x(out_addr[4 + j * 6]);
                box_ptr->set_right_bottom_y(out_addr[5 + j * 6]);
                printf("Class %d, score = %f, left top = [%f, %f], right bottom = [%f, %f]\n",
85 86 87 88 89 90
                        static_cast<int>(round(out_addr[0 + j * 6])),
                        out_addr[1 + j * 6],
                        out_addr[2 + j * 6],
                        out_addr[3 + j * 6],
                        out_addr[4 + j * 6],
                        out_addr[5 + j * 6]);
91 92
            }
            printf("\n");
93 94
            std::ofstream output(imgs_batch[i] + ".pb",
                    std::ios::out | std::ios::trunc | std::ios::binary);
95 96 97 98
            detection_result.SerializeToOstream(&output);
            output.close();
        }
    }
99

100 101
    int DetectionPredictor::init(const std::string& conf) {
        if (!_model_config.load_config(conf)) {
102 103 104 105 106 107 108 109
        #ifdef _WIN32
            std::cerr << "Fail to load config file: [" << conf << "], " 
                      << "please check whether the config file path is correct"
                      << std::endl;
        #else
            LOG(FATAL) << "Fail to load config file: [" << conf << "], "
                      << "please check whether the config file path is correct";
        #endif
110 111 112 113
            return -1;
        }
        _preprocessor = PaddleSolution::create_processor(conf);
        if (_preprocessor == nullptr) {
114 115 116 117 118 119 120
        #ifdef _WIN32
            std::cerr << "Failed to create_processor, please check whether you"
                      << " write a correct config file." << std::endl;
        #else
            LOG(FATAL) << "Failed to create_processor, please check whether"
                      << " you write a correct config file.";
        #endif
121 122 123 124
            return -1;
        }

        bool use_gpu = _model_config._use_gpu;
B
Bin Long 已提交
125 126
        bool enable_trt = _model_config._enable_trt & use_gpu;
        auto trt_precision = _model_config._trt_precision;
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
        const auto& model_dir = _model_config._model_path;
        const auto& model_filename = _model_config._model_file_name;
        const auto& params_filename = _model_config._param_file_name;

        // load paddle model file
        if (_model_config._predictor_mode == "NATIVE") {
            paddle::NativeConfig config;
            auto prog_file = utils::path_join(model_dir, model_filename);
            auto param_file = utils::path_join(model_dir, params_filename);
            config.prog_file = prog_file;
            config.param_file = param_file;
            config.fraction_of_gpu_memory = 0;
            config.use_gpu = use_gpu;
            config.device = 0;
            _main_predictor = paddle::CreatePaddlePredictor(config);
B
Bin Long 已提交
142

143 144 145 146 147
        } else if (_model_config._predictor_mode == "ANALYSIS") {
            paddle::AnalysisConfig config;
            if (use_gpu) {
                config.EnableUseGpu(100, 0);
            }
B
Bin Long 已提交
148 149 150 151 152
            if (enable_trt) {
                auto use_cab = (trt_precision == paddle::AnalysisConfig::Precision::kInt8);
                config.EnableTensorRtEngine(1 << 20, _model_config._batch_size, 40,
                    trt_precision, false, use_cab);
            }
153 154 155 156 157
            auto prog_file = utils::path_join(model_dir, model_filename);
            auto param_file = utils::path_join(model_dir, params_filename);
            config.SetModel(prog_file, param_file);
            config.SwitchUseFeedFetchOps(false);
            config.SwitchSpecifyInputNames(true);
158 159 160
            config.EnableMemoryOptim();
            // config.SwitchIrOptim(true);
            // config.EnableTensorRtEngine(1<<4, 30, 3);
161 162 163 164 165 166 167 168
            _main_predictor = paddle::CreatePaddlePredictor(config);
        } else {
            return -1;
        }
        return 0;
    }

    int DetectionPredictor::predict(const std::vector<std::string>& imgs) {
169 170 171 172 173 174 175 176 177 178 179 180
        if (imgs.size() == 0) {
        #ifdef _WIN32
            std::cerr << "No image found! Please check whether the images path"
                      << " is correct or the format of images is correct\n"
                      << "Supporting format: [.jpeg|.jpg|.JPEG|.JPG|.bmp|.BMP|.png|.PNG]" << std::endl;
        #else
            LOG(ERROR) << "No image found! Please check whether the images path"
                       << " is correct or the format of images is correct\n"
                       << "Supporting format: [.jpeg|.jpg|.JPEG|.JPG|.bmp|.BMP|.png|.PNG]";
        #endif
            return -1;
        }
181 182
        if (_model_config._predictor_mode == "NATIVE") {
            return native_predict(imgs);
183
        } else if (_model_config._predictor_mode == "ANALYSIS") {
184 185 186 187 188 189 190 191 192 193 194 195
            return analysis_predict(imgs);
        }
        return -1;
    }

    int DetectionPredictor::native_predict(const std::vector<std::string>& imgs) {
        int config_batch_size = _model_config._batch_size;

        int channels = _model_config._channels;
        int eval_width = _model_config._resize[0];
        int eval_height = _model_config._resize[1];
        std::size_t total_size = imgs.size();
196 197 198 199 200 201
        int default_batch_size = std::min(config_batch_size,
                                          static_cast<int>(total_size));
        int batch = total_size / default_batch_size +
                    ((total_size % default_batch_size) != 0);
        int batch_buffer_size = default_batch_size * channels
                              * eval_width * eval_height;
202 203 204 205 206 207 208 209 210 211

        auto& input_buffer = _buffer;
        auto& imgs_batch = _imgs_batch;
        float sr;
        for (int u = 0; u < batch; ++u) {
            int batch_size = default_batch_size;
            if (u == (batch - 1) && (total_size % default_batch_size)) {
                batch_size = total_size % default_batch_size;
            }

212 213
            int real_buffer_size = batch_size * channels
                                 * eval_width * eval_height;
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
            std::vector<paddle::PaddleTensor> feeds;
            input_buffer.clear();
            imgs_batch.clear();
            for (int i = 0; i < batch_size; ++i) {
                int idx = u * default_batch_size + i;
                imgs_batch.push_back(imgs[idx]);
            }
            std::vector<int> ori_widths;
            std::vector<int> ori_heights;
            std::vector<int> resize_widths;
            std::vector<int> resize_heights;
            std::vector<float> scale_ratios;
            ori_widths.resize(batch_size);
            ori_heights.resize(batch_size);
            resize_widths.resize(batch_size);
            resize_heights.resize(batch_size);
            scale_ratios.resize(batch_size);
            std::vector<std::vector<float>> lod_buffer(batch_size);
232 233 234 235 236 237
            if (!_preprocessor->batch_process(imgs_batch, lod_buffer,
                                              ori_widths.data(),
                                              ori_heights.data(),
                                              resize_widths.data(),
                                              resize_heights.data(),
                                              scale_ratios.data())) {
238 239
                return -1;
            }
240 241 242 243
            // flatten and padding
            padding_minibatch(lod_buffer, input_buffer, resize_heights,
                              resize_widths, channels,
                              _model_config._coarsest_stride);
244 245 246
            paddle::PaddleTensor im_tensor, im_size_tensor, im_info_tensor;

            im_tensor.name = "image";
247 248 249 250 251 252
            im_tensor.shape = std::vector<int>({ batch_size,
                                                 channels,
                                                 resize_heights[0],
                                                 resize_widths[0] });
            im_tensor.data.Reset(input_buffer.data(),
                                 input_buffer.size() * sizeof(float));
253
            im_tensor.dtype = paddle::PaddleDType::FLOAT32;
254

255
            std::vector<float> image_infos;
256
            for (int i = 0; i < batch_size; ++i) {
257 258 259 260 261 262
                image_infos.push_back(resize_heights[i]);
                image_infos.push_back(resize_widths[i]);
                image_infos.push_back(scale_ratios[i]);
            }
            im_info_tensor.name = "info";
            im_info_tensor.shape = std::vector<int>({batch_size, 3});
263 264
            im_info_tensor.data.Reset(image_infos.data(),
                                      batch_size * 3 * sizeof(float));
265
            im_info_tensor.dtype = paddle::PaddleDType::FLOAT32;
266

267
            std::vector<int> image_size;
268
            for (int i = 0; i < batch_size; ++i) {
269 270 271 272 273
                image_size.push_back(ori_heights[i]);
                image_size.push_back(ori_widths[i]);
            }

           std::vector<float> image_size_f;
274
           for (int i = 0; i < batch_size; ++i) {
275 276 277 278
               image_size_f.push_back(ori_heights[i]);
               image_size_f.push_back(ori_widths[i]);
               image_size_f.push_back(1.0);
           }
279

280 281
           int feeds_size = _model_config._feeds_size;
           im_size_tensor.name = "im_size";
282
           if (feeds_size == 2) {
283
                im_size_tensor.shape = std::vector<int>({ batch_size, 2});
284 285
                im_size_tensor.data.Reset(image_size.data(),
                                          batch_size * 2 * sizeof(int));
286
                im_size_tensor.dtype = paddle::PaddleDType::INT32;
287
           } else if (feeds_size == 3) {
288
                im_size_tensor.shape = std::vector<int>({ batch_size, 3});
289 290
                im_size_tensor.data.Reset(image_size_f.data(),
                                          batch_size * 3 * sizeof(float));
291 292 293 294
                im_size_tensor.dtype = paddle::PaddleDType::FLOAT32;
           }
           std::cout << "Feed size = " << feeds_size << std::endl;
           feeds.push_back(im_tensor);
295
           if (_model_config._feeds_size > 2) {
296 297 298 299 300 301
                feeds.push_back(im_info_tensor);
           }
           feeds.push_back(im_size_tensor);
           _outputs.clear();
            auto t1 = std::chrono::high_resolution_clock::now();
            if (!_main_predictor->Run(feeds, &_outputs, batch_size)) {
302 303 304
            #ifdef _WIN32
                std::cerr << "Failed: NativePredictor->Run() return false at batch: " << u;
            #else            
305
                LOG(ERROR) << "Failed: NativePredictor->Run() return false at batch: " << u;
306
            #endif
307 308 309 310 311 312 313 314 315 316 317 318 319 320
                continue;
            }
            auto t2 = std::chrono::high_resolution_clock::now();
            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
            std::cout << "runtime = " << duration << std::endl;
            std::cout << "Number of outputs:"  << _outputs.size() << std::endl;
            int out_num = 1;
            // print shape of first output tensor for debugging
            std::cout << "size of outputs[" << 0 << "]: (";
            for (int j = 0; j < _outputs[0].shape.size(); ++j) {
                out_num *= _outputs[0].shape[j];
                std::cout << _outputs[0].shape[j] << ",";
            }
            std::cout << ")" << std::endl;
321

322 323 324 325 326
        //    const size_t nums = _outputs.front().data.length() / sizeof(float);
        //    if (out_num % batch_size != 0 || out_num != nums) {
        //        LOG(ERROR) << "outputs data size mismatch with shape size.";
        //        return -1;
        //    }
327
            float* out_addr = reinterpret_cast<float *>(_outputs[0].data.data());
328 329 330 331 332
            output_detection_result(out_addr, _outputs[0].lod, imgs_batch);
        }
        return 0;
    }

333 334
    int DetectionPredictor::analysis_predict(
                const std::vector<std::string>& imgs) {
335 336 337 338 339
        int config_batch_size = _model_config._batch_size;
        int channels = _model_config._channels;
        int eval_width = _model_config._resize[0];
        int eval_height = _model_config._resize[1];
        auto total_size = imgs.size();
340 341 342 343 344 345
        int default_batch_size = std::min(config_batch_size,
                                          static_cast<int>(total_size));
        int batch = total_size / default_batch_size
                + ((total_size % default_batch_size) != 0);
        int batch_buffer_size = default_batch_size * channels
                              * eval_width * eval_height;
346 347 348 349 350 351 352 353 354

        auto& input_buffer = _buffer;
        auto& imgs_batch = _imgs_batch;
        for (int u = 0; u < batch; ++u) {
            int batch_size = default_batch_size;
            if (u == (batch - 1) && (total_size % default_batch_size)) {
                batch_size = total_size % default_batch_size;
            }

355 356
            int real_buffer_size = batch_size * channels *
                                   eval_width * eval_height;
357
            std::vector<paddle::PaddleTensor> feeds;
358
            // input_buffer.resize(real_buffer_size);
359 360 361 362 363 364
            input_buffer.clear();
            imgs_batch.clear();
            for (int i = 0; i < batch_size; ++i) {
                int idx = u * default_batch_size + i;
                imgs_batch.push_back(imgs[idx]);
            }
365

366 367 368 369 370 371 372 373 374 375
            std::vector<int> ori_widths;
            std::vector<int> ori_heights;
            std::vector<int> resize_widths;
            std::vector<int> resize_heights;
            std::vector<float> scale_ratios;
            ori_widths.resize(batch_size);
            ori_heights.resize(batch_size);
            resize_widths.resize(batch_size);
            resize_heights.resize(batch_size);
            scale_ratios.resize(batch_size);
376

377
            std::vector<std::vector<float>> lod_buffer(batch_size);
378 379 380 381 382 383
            if (!_preprocessor->batch_process(imgs_batch, lod_buffer,
                                              ori_widths.data(),
                                              ori_heights.data(),
                                              resize_widths.data(),
                                              resize_heights.data(),
                                              scale_ratios.data())) {
384 385 386
                std::cout << "Failed to preprocess!" << std::endl;
                return -1;
            }
387 388 389 390
            // flatten tensor
            padding_minibatch(lod_buffer, input_buffer, resize_heights,
                              resize_widths, channels,
                              _model_config._coarsest_stride);
391 392

            std::vector<std::string> input_names = _main_predictor->GetInputNames();
393 394 395 396
            auto im_tensor = _main_predictor->GetInputTensor(
                                              input_names.front());
            im_tensor->Reshape({ batch_size, channels,
                                resize_heights[0], resize_widths[0] });
397
            im_tensor->copy_from_cpu(input_buffer.data());
398 399

            if (input_names.size() > 2) {
400
                std::vector<float> image_infos;
401
                for (int i = 0; i < batch_size; ++i) {
402 403 404
                    image_infos.push_back(resize_heights[i]);
                    image_infos.push_back(resize_widths[i]);
                    image_infos.push_back(scale_ratios[i]);
405 406 407
                }
                auto im_info_tensor = _main_predictor->GetInputTensor(
                                      input_names[1]);
408 409 410 411 412
                im_info_tensor->Reshape({batch_size, 3});
                im_info_tensor->copy_from_cpu(image_infos.data());
            }

            std::vector<int> image_size;
413
            for (int i = 0; i < batch_size; ++i) {
414 415 416 417
                image_size.push_back(ori_heights[i]);
                image_size.push_back(ori_widths[i]);
            }
            std::vector<float> image_size_f;
418
            for (int i = 0; i < batch_size; ++i) {
419 420 421 422
                image_size_f.push_back(static_cast<float>(ori_heights[i]));
                image_size_f.push_back(static_cast<float>(ori_widths[i]));
                image_size_f.push_back(1.0);
            }
423 424 425 426

            auto im_size_tensor = _main_predictor->GetInputTensor(
                                                    input_names.back());
            if (input_names.size() > 2) {
427 428
                im_size_tensor->Reshape({batch_size, 3});
                im_size_tensor->copy_from_cpu(image_size_f.data());
429
            } else {
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
                im_size_tensor->Reshape({batch_size, 2});
                im_size_tensor->copy_from_cpu(image_size.data());
            }
            auto t1 = std::chrono::high_resolution_clock::now();
            _main_predictor->ZeroCopyRun();
            auto t2 = std::chrono::high_resolution_clock::now();
            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
            std::cout << "runtime = " << duration << std::endl;

            auto output_names = _main_predictor->GetOutputNames();
            auto output_t = _main_predictor->GetOutputTensor(output_names[0]);
            std::vector<float> out_data;
            std::vector<int> output_shape = output_t->shape();

            int out_num = 1;
            std::cout << "size of outputs[" << 0 << "]: (";
            for (int j = 0; j < output_shape.size(); ++j) {
                out_num *= output_shape[j];
                std::cout << output_shape[j] << ",";
            }
            std::cout << ")" << std::endl;

            out_data.resize(out_num);
            output_t->copy_to_cpu(out_data.data());

455
            float* out_addr = reinterpret_cast<float *>(out_data.data());
456
            auto lod_vector = output_t->lod();
457
            output_detection_result(out_addr, lod_vector, imgs_batch);
458 459 460
        }
        return 0;
    }
461
}  // namespace PaddleSolution