[MOT] Add stream input (#4489)

* add stream input * add stream input & add save_result

[MOT] Add stream input (#4489)
* add stream input * add stream input & add save_result
9378909c · wangguanzhong · GitHub · b2f3ad7c · 9378909c · 9378909c
6 changed file
--- a/deploy/pptracking/CMakeLists.txt
+++ b/deploy/pptracking/CMakeLists.txt
@@ -19,7 +19,7 @@ include_directories("${CMAKE_SOURCE_DIR}/")
 include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
 link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
-set(SRCS src/main.cc src/preprocess_op.cc src/pipeline.cc src/predictor.cc src/jde_predictor.cc src/sde_predictor.cc src/tracker.cc src/trajectory.cc src/lapjv.cpp src/base_predictor.cc src/postprocess.cc)
+set(SRCS src/main.cc src/preprocess_op.cc src/pipeline.cc src/jde_predictor.cc src/sde_predictor.cc src/tracker.cc src/trajectory.cc src/lapjv.cpp src/base_predictor.cc src/postprocess.cc)
 macro(safe_set_static_flag)
    foreach(flag_var

--- a/deploy/pptracking/include/pipeline.h
+++ b/deploy/pptracking/include/pipeline.h
@@ -31,7 +31,8 @@
 #include <sys/stat.h>
 #endif
-#include "include/predictor.h"
+#include "include/jde_predictor.h"
+#include "include/sde_predictor.h"
 namespace PaddleDetection {
@@ -46,7 +47,10 @@ class Pipeline {
                  const int cpu_threads=1,
                  const bool trt_calib_mode=false,
                  const bool count=false,
-                  const bool save_result=false) {
+                  const bool save_result=false,
+                  const std::string& scene="pedestrian",
+                  const bool tiny_obj=false,
+                  const bool is_mtmct=false) {
    std::vector<std::string> input;
    this->input_ = input;
    this->device_ = device;
@@ -59,34 +63,45 @@ class Pipeline {
    this->trt_calib_mode_ = trt_calib_mode;
    this->count_ = count;
    this->save_result_ = save_result;
+    SelectModel(scene, tiny_obj, is_mtmct);
+    InitPredictor();
  }
-  // Select model according to scenes, it must execute before Run()
-  void SelectModel(const std::string& scene="pedestrian",
-                   const bool tiny_obj=false,
-                   const bool is_mct=false);
  // Set input, it must execute before Run()
  void SetInput(std::string& input_video);
+  void ClearInput();
-  // Run pipeline
+  // Run pipeline in video
  void Run();
+  void PredictMOT(const std::string& video_path);
+  void PredictMTMCT(const std::vector<std::string> video_inputs);
-  void PredictSCT(const std::string& video_path);
+  // Run pipeline in stream
-  void PredictMCT(const std::vector<std::string> video_inputs);
+  void RunMOTStream(const cv::Mat img, const int frame_id, cv::Mat out_img, std::vector<std::string>& records, std::vector<int>& count_list, std::vector<int>& in_count_list, std::vector<int>& out_count_list);
+  void RunMTMCTStream(const std::vector<cv::Mat> imgs, std::vector<std::string>& records);
  void PrintBenchmarkLog(std::vector<double> det_time, int img_num);
 private:
+  // Select model according to scenes, it must execute before Run()
+  void SelectModel(const std::string& scene="pedestrian",
+                   const bool tiny_obj=false,
+                   const bool is_mtmct=false);
+  void InitPredictor();
+  std::shared_ptr<PaddleDetection::JDEPredictor> jde_sct_;
+  std::shared_ptr<PaddleDetection::SDEPredictor> sde_sct_;
  std::vector<std::string> input_;
+  std::vector<cv::Mat> stream_;
  std::string device_;
  double threshold_;
  std::string output_dir_;
  std::string track_model_dir_;
  std::string det_model_dir_;
  std::string reid_model_dir_;
-  std::string mct_model_dir_;
  std::string run_mode_ = "fluid";
  int gpu_id_ = 0;
  bool use_mkldnn_ = false;

--- a/deploy/pptracking/include/postprocess.h
+++ b/deploy/pptracking/include/postprocess.h
@@ -43,6 +43,6 @@ void FlowStatistic(const MOTResult& results, const int frame_id,
                   std::vector<int>* out_count_list);
 // Save Tracking Results
-void SaveResult(const MOTResult& results, const std::string& output_dir);
+void SaveMOTResult(const MOTResult& results, const int frame_id, std::vector<std::string>& records);
 } // namespace PaddleDetection
--- a/deploy/pptracking/src/main.cc
+++ b/deploy/pptracking/src/main.cc
@@ -36,7 +36,7 @@
 DEFINE_string(video_file, "", "Path of input video.");
-DEFINE_string(video_other_file, "", "Path of other input video used for MCT.");
+DEFINE_string(video_other_file, "", "Path of other input video used for MTMCT.");
 DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.");
 DEFINE_double(threshold, 0.5, "Threshold of score.");
 DEFINE_string(output_dir, "output", "Directory of output visualization files.");
@@ -49,7 +49,7 @@ DEFINE_bool(tiny_obj, false, "Whether tracking tiny object");
 DEFINE_bool(count, false, "Whether counting after tracking");
 DEFINE_bool(save_result, false, "Whether saving result after tracking");
 DEFINE_string(scene, "", "scene of tracking system, it can be : pedestrian/vehicle/multiclass");
-DEFINE_bool(is_mct, false, "Whether use multi-camera tracking");
+DEFINE_bool(is_mtmct, false, "Whether use multi-target multi-camera tracking");
 static std::string DirName(const std::string &filepath) {
  auto pos = filepath.rfind(OS_PATH_SEP);
@@ -112,13 +112,17 @@ int main(int argc, char** argv) {
    return -1;
  }
+  if (!PathExists(FLAGS_output_dir)) {
+    MkDirs(FLAGS_output_dir);
+  }
  PaddleDetection::Pipeline pipeline(
                    FLAGS_device, FLAGS_threshold, 
                    FLAGS_output_dir, FLAGS_run_mode, FLAGS_gpu_id, 
-                    FLAGS_use_mkldnn, FLAGS_cpu_threads,
+                    FLAGS_use_mkldnn, FLAGS_cpu_threads, FLAGS_trt_calib_mode,
-                    FLAGS_count, FLAGS_save_result);
+                    FLAGS_count, FLAGS_save_result, FLAGS_scene, FLAGS_tiny_obj, 
+                    FLAGS_is_mtmct);
-  pipeline.SelectModel(FLAGS_scene, FLAGS_tiny_obj, FLAGS_is_mct);
  pipeline.SetInput(FLAGS_video_file);
  if (!FLAGS_video_other_file.empty()) {
    pipeline.SetInput(FLAGS_video_other_file);

--- a/deploy/pptracking/src/pipeline.cc
+++ b/deploy/pptracking/src/pipeline.cc
@@ -29,34 +29,64 @@ void Pipeline::SetInput(std::string& input_video) {
  input_.push_back(input_video);
 }
+void Pipeline::ClearInput() {
+  input_.clear();
+  stream_.clear();
+}
 void Pipeline::SelectModel(const std::string& scene,
                           const bool tiny_obj,
-                           const bool is_mct) {
+                           const bool is_mtmct) {
-  // Single camera model
+  // Single camera model, based on FairMot
-  // use deepsort for multiclass tracking
-  // use fairmot for single class tracking
  if (scene == "pedestrian") {
+    if (tiny_obj) {
+      track_model_dir_ = "../pedestrian_track_tiny";
+    } else {
      track_model_dir_ = "../pedestrian_track";
+    }
  } else if (scene != "vehicle") {
+    if (tiny_obj) {
+      track_model_dir_ = "../vehicle_track_tiny";
+    } else {
      track_model_dir_ = "../vehicle_track";
+    }
  } else if (scene == "multiclass") {
-      det_model_dir_ = "../multiclass_det";
+    if (tiny_obj) {
-      reid_model_dir_ = "../multiclass_reid";
+      track_model_dir_ = "../multiclass_track_tiny";
+    } else {
+      track_model_dir_ = "../multiclass_track";
+    }
  }
-  // Multi-camera model
+  // Multi-camera model, based on PicoDet & LCNet
-  if (is_mct && scene == "pedestrian") {
+  if (is_mtmct && scene == "pedestrian") {
-      mct_model_dir_ = "../pedestrian_mct";
+    det_model_dir_ = "../pedestrian_det";
-  } else if (is_mct && scene == "vehicle") {
+    reid_model_dir_ = "../pedestrian_reid";
-      mct_model_dir_ = "../vehicle_mct";
+  } else if (is_mtmct && scene == "vehicle") {
-  } else if (is_mct && scene == "multiclass") {
+    det_model_dir_ = "../vehicle_det";
+    reid_model_dir_ = "../vehicle_reid";
+  } else if (is_mtmct && scene == "multiclass") {
      throw "Multi-camera tracking is not supported in multiclass scene now.";
-  } 
+  }  
 }
+void Pipeline::InitPredictor() {
+  if (track_model_dir_.empty() && det_model_dir_.empty()) {
+    throw "Predictor must receive track_model or det_model!";
+  }
+  if (!track_model_dir_.empty()) {
+    jde_sct_ = std::make_shared<PaddleDetection::JDEPredictor>(device_, track_model_dir_, threshold_, run_mode_, gpu_id_, use_mkldnn_, cpu_threads_, trt_calib_mode_);
+  }
+  if (!det_model_dir_.empty()) {
+    sde_sct_ = std::make_shared<PaddleDetection::SDEPredictor>(device_, det_model_dir_, reid_model_dir_, threshold_, run_mode_, gpu_id_, use_mkldnn_, cpu_threads_, trt_calib_mode_);
+  }
+}
 void Pipeline::Run() {
-  if (track_model_dir_.empty()) {
+  if (track_model_dir_.empty() && det_model_dir_.empty()) {
    std::cout << "Pipeline must use SelectModel before Run";
    return;
  }
@@ -65,24 +95,24 @@ void Pipeline::Run() {
    return;
  }
-  if (mct_model_dir_.empty()) {
+  if (!track_model_dir_.empty()) {
    // single camera
    if (input_.size() > 1) {
      throw "Single camera tracking except single video, but received %d", input_.size();
    }
-    PredictSCT(input_[0]);
+    PredictMOT(input_[0]);
  } else {
    // multi cameras
    if (input_.size() != 2) {
      throw "Multi camera tracking except two videos, but received %d", input_.size();
    }
-    PredictMCT(input_);
+    PredictMTMCT(input_);
  }
 }
-void Pipeline::PredictSCT(const std::string& video_path) {
-  PaddleDetection::Predictor sct(device_, track_model_dir_, det_model_dir_, reid_model_dir_, threshold_, run_mode_, gpu_id_, use_mkldnn_, cpu_threads_, trt_calib_mode_);
+void Pipeline::PredictMOT(const std::string& video_path) {
  // Open video
  cv::VideoCapture capture;
  capture.open(video_path.c_str());
@@ -96,9 +126,14 @@ void Pipeline::PredictSCT(const std::string& video_path) {
  int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
  int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
+  LOG(INFO) << "----------------------- Input info -----------------------";
+  LOG(INFO) << "video_width: " << video_width;
+  LOG(INFO) << "video_height: " << video_height;
+  LOG(INFO) << "input fps: " << video_fps;
  // Create VideoWriter for output
  cv::VideoWriter video_out;
-  std::string video_out_path = "mot_output.mp4";
+  std::string video_out_path = output_dir_ + OS_PATH_SEP + "mot_output.mp4";
  int fcc = cv::VideoWriter::fourcc('m','p','4','v');
  video_out.open(video_out_path.c_str(),
                 fcc, //0x00000021,
@@ -116,21 +151,30 @@ void Pipeline::PredictSCT(const std::string& video_path) {
  std::vector<int> in_count_list;
  std::vector<int> out_count_list;
  double times;
+  double total_time;
  // Capture all frames and do inference
  cv::Mat frame;
  int frame_id = 0;
+  std::vector<std::string> records;
+  records.push_back("result format: frame_id, track_id, x1, y1, w, h\n");
+  LOG(INFO) << "------------------- Predict info ------------------------";
  while (capture.read(frame)) {
    if (frame.empty()) {
      break;
    }
    std::vector<cv::Mat> imgs;
    imgs.push_back(frame);
-    printf("frame_id: %d\n", frame_id);
+    jde_sct_->Predict(imgs, threshold_, &result, &det_times);
-    sct.Predict(imgs, threshold_, &result, &det_times);
    frame_id += 1;
-    times = std::accumulate(det_times.begin(), det_times.end(), 0) / frame_id;
+    total_time = std::accumulate(det_times.begin(), det_times.end(), 0.);
+    times = total_time / frame_id;
+    LOG(INFO) << "frame_id: " << frame_id
+              << " predict time(s): "<< total_time / 1000;
-    cv::Mat out_im = PaddleDetection::VisualizeTrackResult(
+    cv::Mat out_img = PaddleDetection::VisualizeTrackResult(
        frame, result, 1000./times, frame_id);
    if (count_) {
@@ -139,17 +183,69 @@ void Pipeline::PredictSCT(const std::string& video_path) {
      PaddleDetection::FlowStatistic(result, frame_id, &count_list, &in_count_list, &out_count_list);
    }
    if (save_result_) {
-      PaddleDetection::SaveResult(result, output_dir_);
+      PaddleDetection::SaveMOTResult(result, frame_id, records);
    }
-    video_out.write(out_im);
+    video_out.write(out_img);
  }
  capture.release();
  video_out.release();
  PrintBenchmarkLog(det_times, frame_id);
-  printf("Visualized output saved as %s\n", video_out_path.c_str());
+  LOG(INFO) << "-------------------- Final Output info -------------------";
+  LOG(INFO) << "Total frame: " << frame_id;
+  LOG(INFO) << "Visualized output saved as " << video_out_path.c_str();
+  if (save_result_) {
+    FILE * fp;
+    std::string result_output_path = output_dir_ + OS_PATH_SEP + "mot_output.txt";
+    if((fp = fopen(result_output_path.c_str(), "w+")) == NULL) {
+      printf("Open %s error.\n", result_output_path.c_str());
+      return;
+    }
+    for (int l; l < records.size(); ++l) {
+      fprintf(fp, records[l].c_str());
+    }
+    fclose(fp);
+    LOG(INFO) << "txt result output saved as " << result_output_path.c_str();
+  }
+}
+void Pipeline::PredictMTMCT(const std::vector<std::string> video_path) {
+  throw "Not Implement!";
+}
+void Pipeline::RunMOTStream(const cv::Mat img, const int frame_id, cv::Mat out_img, std::vector<std::string>& records, std::vector<int>& count_list, std::vector<int>& in_count_list, std::vector<int>& out_count_list) {
+  PaddleDetection::MOTResult result;
+  std::vector<double> det_times(3);
+  double times;
+  double total_time;
+  LOG(INFO) << "------------------- Predict info ------------------------";
+  std::vector<cv::Mat> imgs;
+  imgs.push_back(img);
+  jde_sct_->Predict(imgs, threshold_, &result, &det_times);
+  total_time = std::accumulate(det_times.begin(), det_times.end(), 0.);
+  times = total_time / frame_id;
+  LOG(INFO) << "frame_id: " << frame_id
+            << " predict time(s): "<< total_time / 1000;
+  out_img = PaddleDetection::VisualizeTrackResult(
+    img, result, 1000./times, frame_id);
+  if (count_) {
+    // Count total number 
+    // Count in & out number
+    PaddleDetection::FlowStatistic(result, frame_id, &count_list, &in_count_list, &out_count_list);
+  }
+  PrintBenchmarkLog(det_times, frame_id);
+  if (save_result_) {
+    PaddleDetection::SaveMOTResult(result, frame_id, records);
+  }
 }
-void Pipeline::PredictMCT(const std::vector<std::string> video_path) {
+void Pipeline::RunMTMCTStream(const std::vector<cv::Mat> imgs, std::vector<std::string>& records) {
  throw "Not Implement!";
 }
@@ -171,8 +267,8 @@ void Pipeline::PrintBenchmarkLog(std::vector<double> det_time, int img_num){
  LOG(INFO) << "cpu_math_library_num_threads: " << cpu_threads_;
  LOG(INFO) << "----------------------- Perf info ------------------------";
  LOG(INFO) << "Total number of predicted data: " << img_num
-            << " and total time spent(ms): "
+            << " and total time spent(s): "
-            << std::accumulate(det_time.begin(), det_time.end(), 0.);
+            << std::accumulate(det_time.begin(), det_time.end(), 0.) / 1000;
  img_num = std::max(1, img_num);
  LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num
            << ", inference_time(ms): " << det_time[1] / img_num

--- a/deploy/pptracking/src/postprocess.cc
+++ b/deploy/pptracking/src/postprocess.cc
@@ -104,9 +104,30 @@ void FlowStatistic(const MOTResult& results, const int frame_id,
  throw "Not Implement";
 }
-void SaveResult(const MOTResult& results, const std::string& output_dir) {
+void SaveMOTResult(const MOTResult& results, const int frame_id, std::vector<std::string>& records) {
-  throw "Not Implement";
+  // result format: frame_id, track_id, x1, y1, w, h
+  std::string record;
+  for (int i = 0; i < results.size(); ++i) {
+    MOTTrack mot_track = results[i];
+    int ids = mot_track.ids;
+    float score = mot_track.score;
+    Rect rects = mot_track.rects;
+    float x1 = rects.left;
+    float y1 = rects.top;
+    float x2 = rects.right;
+    float y2 = rects.bottom;
+    float w = x2 - x1;
+    float h = y2 - y1;
+    if (w == 0 || h == 0) {
+      continue;
+    }
+    std::ostringstream os;
+    os << frame_id << " " << ids << ""
+       << x1 << " " << y1 << " "
+       << w << " " << h <<"\n";
+    record = os.str();
+    records.push_back(record);
+  }
 }
 } // namespace PaddleDetection