main.cc 8.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <glog/logging.h>

W
wangguanzhong 已提交
17 18 19
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
20 21 22 23
#include <iostream>
#include <string>
#include <vector>

C
cnn 已提交
24 25 26 27 28 29 30 31
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#elif LINUX
#include <stdarg.h>
#include <sys/stat.h>
#endif

32
#include <gflags/gflags.h>
W
wangguanzhong 已提交
33
#include "include/object_detector.h"
34 35

DEFINE_string(model_dir, "", "Path of inference model");
36
DEFINE_string(image_file, "", "Path of input image");
37
DEFINE_string(video_path, "", "Path of input video");
W
wangguanzhong 已提交
38 39 40 41 42 43 44 45
DEFINE_bool(
    use_gpu,
    false,
    "Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device,
              "CPU",
              "Choose the device you want to run, it can be: CPU/GPU/XPU, "
              "default is CPU.");
46
DEFINE_bool(use_camera, false, "Use camera or not");
C
channings 已提交
47 48
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
C
channings 已提交
49
DEFINE_int32(camera_id, -1, "Device id of camera to predict");
W
wangguanzhong 已提交
50 51 52
DEFINE_bool(run_benchmark,
            false,
            "Whether to predict a image_file repeatedly for benchmark");
53 54
DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files.");
W
wangguanzhong 已提交
55 56 57 58
DEFINE_bool(trt_calib_mode,
            false,
            "If the model is produced by TRT offline quantitative calibration, "
            "trt_calib_mode need to set True");
59

W
wangguanzhong 已提交
60
static std::string DirName(const std::string& filepath) {
61 62 63 64 65 66 67
  auto pos = filepath.rfind(OS_PATH_SEP);
  if (pos == std::string::npos) {
    return "";
  }
  return filepath.substr(0, pos);
}

W
wangguanzhong 已提交
68
static bool PathExists(const std::string& path) {
69 70 71 72 73 74 75 76 77 78
#ifdef _WIN32
  struct _stat buffer;
  return (_stat(path.c_str(), &buffer) == 0);
#else
  struct stat buffer;
  return (stat(path.c_str(), &buffer) == 0);
#endif  // !_WIN32
}

static void MkDir(const std::string& path) {
79
  if (PathExists(path)) return;
80 81 82 83 84 85 86
  int ret = 0;
#ifdef _WIN32
  ret = _mkdir(path.c_str());
#else
  ret = mkdir(path.c_str(), 0755);
#endif  // !_WIN32
  if (ret != 0) {
87 88
    std::string path_error(path);
    path_error += " mkdir failed!";
89 90 91 92 93 94 95 96 97 98 99 100
    throw std::runtime_error(path_error);
  }
}

static void MkDirs(const std::string& path) {
  if (path.empty()) return;
  if (PathExists(path)) return;

  MkDirs(DirName(path));
  MkDir(path);
}

101 102 103 104
void PredictVideo(const std::string& video_path,
                  PaddleDetection::ObjectDetector* det) {
  // Open video
  cv::VideoCapture capture;
W
wangguanzhong 已提交
105
  if (FLAGS_camera_id != -1) {
C
channings 已提交
106
    capture.open(FLAGS_camera_id);
W
wangguanzhong 已提交
107
  } else {
C
channings 已提交
108 109
    capture.open(video_path.c_str());
  }
110 111 112 113 114 115 116 117 118 119 120 121
  if (!capture.isOpened()) {
    printf("can not open video : %s\n", video_path.c_str());
    return;
  }

  // Get Video info : resolution, fps
  int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
  int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
  int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));

  // Create VideoWriter for output
  cv::VideoWriter video_out;
C
channings 已提交
122
  std::string video_out_path = "output.mp4";
123
  video_out.open(video_out_path.c_str(),
C
channings 已提交
124
                 0x00000021,
125 126 127 128 129 130 131 132 133 134 135 136 137
                 video_fps,
                 cv::Size(video_width, video_height),
                 true);
  if (!video_out.isOpened()) {
    printf("create video writer failed!\n");
    return;
  }

  std::vector<PaddleDetection::ObjectResult> result;
  auto labels = det->GetLabelList();
  auto colormap = PaddleDetection::GenerateColorMap(labels.size());
  // Capture all frames and do inference
  cv::Mat frame;
C
channings 已提交
138
  int frame_id = 0;
139 140 141 142
  while (capture.read(frame)) {
    if (frame.empty()) {
      break;
    }
143
    det->Predict(frame, 0.5, 0, 1, false, &result);
W
wangguanzhong 已提交
144 145
    cv::Mat out_im =
        PaddleDetection::VisualizeResult(frame, result, labels, colormap);
C
channings 已提交
146
    for (const auto& item : result) {
W
wangguanzhong 已提交
147 148 149 150 151 152 153 154 155 156 157
      printf(
          "In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d "
          "%d]\n",
          frame_id,
          item.class_id,
          item.confidence,
          item.rect[0],
          item.rect[1],
          item.rect[2],
          item.rect[3]);
    }
158
    video_out.write(out_im);
C
channings 已提交
159
    frame_id += 1;
160 161 162 163 164 165
  }
  capture.release();
  video_out.release();
}

void PredictImage(const std::string& image_path,
166 167 168 169
                  const double threshold,
                  const bool run_benchmark,
                  PaddleDetection::ObjectDetector* det,
                  const std::string& output_dir = "output") {
170 171 172 173
  // Open input image as an opencv cv::Mat object
  cv::Mat im = cv::imread(image_path, 1);
  // Store all detected result
  std::vector<PaddleDetection::ObjectResult> result;
W
wangguanzhong 已提交
174
  if (run_benchmark) {
175
    det->Predict(im, threshold, 100, 100, run_benchmark, &result);
W
wangguanzhong 已提交
176
  } else {
177 178 179
    det->Predict(im, 0.5, 0, 1, run_benchmark, &result);
    for (const auto& item : result) {
      printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
W
wangguanzhong 已提交
180 181 182 183 184 185
             item.class_id,
             item.confidence,
             item.rect[0],
             item.rect[1],
             item.rect[2],
             item.rect[3]);
186 187 188 189
    }
    // Visualization result
    auto labels = det->GetLabelList();
    auto colormap = PaddleDetection::GenerateColorMap(labels.size());
W
wangguanzhong 已提交
190 191
    cv::Mat vis_img =
        PaddleDetection::VisualizeResult(im, result, labels, colormap);
192 193 194
    std::vector<int> compression_params;
    compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
    compression_params.push_back(95);
195 196 197 198 199 200 201
    std::string output_path(output_dir);
    if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
      output_path += OS_PATH_SEP;
    }
    output_path += "output.jpg";
    cv::imwrite(output_path, vis_img, compression_params);
    printf("Visualized output saved as %s\n", output_path.c_str());
202 203 204 205 206 207
  }
}

int main(int argc, char** argv) {
  // Parsing command-line
  google::ParseCommandLineFlags(&argc, &argv, true);
W
wangguanzhong 已提交
208 209
  if (FLAGS_model_dir.empty() ||
      (FLAGS_image_file.empty() && FLAGS_video_path.empty())) {
210
    std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
W
wangguanzhong 已提交
211
              << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl;
212 213
    return -1;
  }
W
wangguanzhong 已提交
214 215 216 217
  if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" ||
        FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
    std::cout
        << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
218
    return -1;
219
  }
W
wangguanzhong 已提交
220 221 222 223 224 225
  transform(FLAGS_device.begin(),
            FLAGS_device.end(),
            FLAGS_device.begin(),
            ::toupper);
  if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
        FLAGS_device == "XPU")) {
G
Guanghua Yu 已提交
226 227 228 229
    std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
    return -1;
  }
  if (FLAGS_use_gpu) {
W
wangguanzhong 已提交
230 231
    std::cout << "Deprecated, please use `--device` to set the device you want "
                 "to run.";
G
Guanghua Yu 已提交
232 233
    return -1;
  }
234 235

  // Load model and create a object detector
W
wangguanzhong 已提交
236 237 238 239 240
  PaddleDetection::ObjectDetector det(FLAGS_model_dir,
                                      FLAGS_device,
                                      FLAGS_run_mode,
                                      FLAGS_gpu_id,
                                      FLAGS_trt_calib_mode);
241
  // Do inference on input video or image
242
  if (!FLAGS_video_path.empty() || FLAGS_use_camera) {
243
    PredictVideo(FLAGS_video_path, &det);
244
  } else if (!FLAGS_image_file.empty()) {
245 246 247
    if (!PathExists(FLAGS_output_dir)) {
      MkDirs(FLAGS_output_dir);
    }
W
wangguanzhong 已提交
248 249 250 251 252
    PredictImage(FLAGS_image_file,
                 FLAGS_threshold,
                 FLAGS_run_benchmark,
                 &det,
                 FLAGS_output_dir);
253 254 255
  }
  return 0;
}