From 9737390102600d2e9fd440553d857ebdc6d1698f Mon Sep 17 00:00:00 2001
From: shangliang Xu <ghostxsl@users.noreply.github.com>
Date: Thu, 28 Oct 2021 20:23:26 +0800
Subject: [PATCH] [depoly] fix inconsistency between cpp and python (#4351)

---
 deploy/cpp/src/main.cc                    | 35 +++++++++++++------
 deploy/cpp/src/main_jde.cc                | 26 ++++++++++----
 deploy/cpp/src/main_keypoint.cc           | 41 ++++++++++++++++-------
 deploy/python/det_keypoint_unite_infer.py | 14 ++++----
 deploy/python/infer.py                    | 20 +++++------
 deploy/python/keypoint_infer.py           | 14 ++++----
 deploy/python/mot_jde_infer.py            | 16 ++++-----
 deploy/python/mot_keypoint_unite_infer.py | 14 ++++----
 deploy/python/mot_sde_infer.py            | 14 ++++----
 9 files changed, 120 insertions(+), 74 deletions(-)
diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc
index f9a267667..058d7556d 100644
--- a/deploy/cpp/src/main.cc
+++ b/deploy/cpp/src/main.cc
@@ -128,27 +128,36 @@ static void MkDirs(const std::string& path) {
 }
 
 void PredictVideo(const std::string& video_path,
-                  PaddleDetection::ObjectDetector* det) {
+                  PaddleDetection::ObjectDetector* det,
+                  const std::string& output_dir = "output") {
   // Open video
   cv::VideoCapture capture;
+  std::string video_out_name = "output.mp4";
   if (FLAGS_camera_id != -1){
     capture.open(FLAGS_camera_id);
   }else{
     capture.open(video_path.c_str());
+    video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
   }
   if (!capture.isOpened()) {
     printf("can not open video : %s\n", video_path.c_str());
     return;
   }
 
-  // Get Video info : resolution, fps
+  // Get Video info : resolution, fps, frame count
   int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
   int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
   int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
+  int video_frame_count = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
+  printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
 
   // Create VideoWriter for output
   cv::VideoWriter video_out;
-  std::string video_out_path = "output.mp4";
+  std::string video_out_path(output_dir);
+  if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
+    video_out_path += OS_PATH_SEP;
+  }
+  video_out_path += video_out_name;
   video_out.open(video_out_path.c_str(),
                  0x00000021,
                  video_fps,
@@ -166,7 +175,7 @@ void PredictVideo(const std::string& video_path,
   auto colormap = PaddleDetection::GenerateColorMap(labels.size());
   // Capture all frames and do inference
   cv::Mat frame;
-  int frame_id = 0;
+  int frame_id = 1;
   bool is_rbox = false;
   while (capture.read(frame)) {
     if (frame.empty()) {
@@ -174,8 +183,14 @@ void PredictVideo(const std::string& video_path,
     }
     std::vector<cv::Mat> imgs;
     imgs.push_back(frame);
-    det->Predict(imgs, 0.5, 0, 1, &result, &bbox_num, &det_times);
+    printf("detect frame: %d\n", frame_id);
+    det->Predict(imgs, FLAGS_threshold, 0, 1, &result, &bbox_num, &det_times);
+    std::vector<PaddleDetection::ObjectResult> out_result;
     for (const auto& item : result) {
+      if (item.confidence < FLAGS_threshold || item.class_id == -1) {
+            continue;
+      }
+      out_result.push_back(item);
       if (item.rect.size() > 6){
       is_rbox = true;
       printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
@@ -202,7 +217,7 @@ void PredictVideo(const std::string& video_path,
    }
 
    cv::Mat out_im = PaddleDetection::VisualizeResult(
-        frame, result, labels, colormap, is_rbox);
+        frame, out_result, labels, colormap, is_rbox);
 
     video_out.write(out_im);
     frame_id += 1;
@@ -337,12 +352,12 @@ int main(int argc, char** argv) {
                         FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape,
 			FLAGS_trt_calib_mode);
   // Do inference on input video or image
+  if (!PathExists(FLAGS_output_dir)) {
+      MkDirs(FLAGS_output_dir);
+  }
   if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
-    PredictVideo(FLAGS_video_file, &det);
+    PredictVideo(FLAGS_video_file, &det, FLAGS_output_dir);
   } else if (!FLAGS_image_file.empty() || !FLAGS_image_dir.empty()) {
-    if (!PathExists(FLAGS_output_dir)) {
-      MkDirs(FLAGS_output_dir);
-    }
     std::vector<std::string> all_img_paths;
     std::vector<cv::String> cv_all_img_paths;
     if (!FLAGS_image_file.empty()) {
diff --git a/deploy/cpp/src/main_jde.cc b/deploy/cpp/src/main_jde.cc
index 213e2043d..8010f8086 100644
--- a/deploy/cpp/src/main_jde.cc
+++ b/deploy/cpp/src/main_jde.cc
@@ -128,27 +128,36 @@ static void MkDirs(const std::string& path) {
 }
 
 void PredictVideo(const std::string& video_path,
-                  PaddleDetection::JDEDetector* mot) {
+                  PaddleDetection::JDEDetector* mot,
+                  const std::string& output_dir = "output") {
   // Open video
   cv::VideoCapture capture;
+  std::string video_out_name = "output.mp4";
   if (FLAGS_camera_id != -1){
     capture.open(FLAGS_camera_id);
   }else{
     capture.open(video_path.c_str());
+    video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
   }
   if (!capture.isOpened()) {
     printf("can not open video : %s\n", video_path.c_str());
     return;
   }
 
-  // Get Video info : resolution, fps
+  // Get Video info : resolution, fps, frame count
   int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
   int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
   int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
+  int video_frame_count = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
+  printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
 
   // Create VideoWriter for output
   cv::VideoWriter video_out;
-  std::string video_out_path = "mot_output.mp4";
+  std::string video_out_path(output_dir);
+  if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
+    video_out_path += OS_PATH_SEP;
+  }
+  video_out_path += video_out_name;
   video_out.open(video_out_path.c_str(),
                  0x00000021,
                  video_fps,
@@ -164,14 +173,15 @@ void PredictVideo(const std::string& video_path,
   double times;
   // Capture all frames and do inference
   cv::Mat frame;
-  int frame_id = 0;
+  int frame_id = 1;
   while (capture.read(frame)) {
     if (frame.empty()) {
       break;
     }
     std::vector<cv::Mat> imgs;
     imgs.push_back(frame);
-    mot->Predict(imgs, 0.5, 0, 1, &result, &det_times);
+    printf("detect frame: %d\n", frame_id);
+    mot->Predict(imgs, FLAGS_threshold, 0, 1, &result, &det_times);
     frame_id += 1;
     times = std::accumulate(det_times.begin(), det_times.end(), 0) / frame_id;
 
@@ -215,7 +225,9 @@ int main(int argc, char** argv) {
                         FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id,
                         FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape,
 			FLAGS_trt_calib_mode);
-     
-  PredictVideo(FLAGS_video_file, &mot);
+  if (!PathExists(FLAGS_output_dir)) {
+      MkDirs(FLAGS_output_dir);
+  }
+  PredictVideo(FLAGS_video_file, &mot, FLAGS_output_dir);
   return 0;
 }
diff --git a/deploy/cpp/src/main_keypoint.cc b/deploy/cpp/src/main_keypoint.cc
index facd787dd..a404813f4 100644
--- a/deploy/cpp/src/main_keypoint.cc
+++ b/deploy/cpp/src/main_keypoint.cc
@@ -138,27 +138,36 @@ static void MkDirs(const std::string& path) {
 
 void PredictVideo(const std::string& video_path,
                   PaddleDetection::ObjectDetector* det,
-                  PaddleDetection::KeyPointDetector* keypoint) {
+                  PaddleDetection::KeyPointDetector* keypoint,
+                  const std::string& output_dir = "output") {
   // Open video
   cv::VideoCapture capture;
+  std::string video_out_name = "output.mp4";
   if (FLAGS_camera_id != -1){
     capture.open(FLAGS_camera_id);
   }else{
     capture.open(video_path.c_str());
+    video_out_name = video_path.substr(video_path.find_last_of(OS_PATH_SEP) + 1);
   }
   if (!capture.isOpened()) {
     printf("can not open video : %s\n", video_path.c_str());
     return;
   }
 
-  // Get Video info : resolution, fps
+  // Get Video info : resolution, fps, frame count
   int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
   int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
   int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
+  int video_frame_count = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_COUNT));
+  printf("fps: %d, frame_count: %d\n", video_fps, video_frame_count);
 
   // Create VideoWriter for output
   cv::VideoWriter video_out;
-  std::string video_out_path = "output.mp4";
+  std::string video_out_path(output_dir);
+  if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
+    video_out_path += OS_PATH_SEP;
+  }
+  video_out_path += video_out_name;
   video_out.open(video_out_path.c_str(),
                  0x00000021,
                  video_fps,
@@ -184,7 +193,7 @@ void PredictVideo(const std::string& video_path,
   std::vector<int> colormap_kpts = PaddleDetection::GenerateColorMap(20);
   // Capture all frames and do inference
   cv::Mat frame;
-  int frame_id = 0;
+  int frame_id = 1;
   bool is_rbox = false;
   while (capture.read(frame)) {
     if (frame.empty()) {
@@ -192,8 +201,14 @@ void PredictVideo(const std::string& video_path,
     }
     std::vector<cv::Mat> imgs;
     imgs.push_back(frame);
-    det->Predict(imgs, 0.5, 0, 1, &result, &bbox_num, &det_times);
+    printf("detect frame: %d\n", frame_id);
+    det->Predict(imgs, FLAGS_threshold, 0, 1, &result, &bbox_num, &det_times);
+    std::vector<PaddleDetection::ObjectResult> out_result;
     for (const auto& item : result) {
+      if (item.confidence < FLAGS_threshold || item.class_id == -1) {
+          continue;
+      }
+      out_result.push_back(item);
       if (item.rect.size() > 6){
       is_rbox = true;
       printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
@@ -221,9 +236,9 @@ void PredictVideo(const std::string& video_path,
 
     if(keypoint)
     {
-      int imsize = result.size();
+      int imsize = out_result.size();
       for (int i=0; i<imsize; i++){
-        auto item = result[i];
+        auto item = out_result[i];
         cv::Mat crop_img;
         std::vector<double> keypoint_times;
         std::vector<int> rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]};
@@ -239,7 +254,7 @@ void PredictVideo(const std::string& video_path,
 
         if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty()))
         {
-          keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 0, 1, &result_kpts, &keypoint_times);
+          keypoint->Predict(imgs_kpts, center_bs, scale_bs, FLAGS_threshold, 0, 1, &result_kpts, &keypoint_times);
           imgs_kpts.clear();
           center_bs.clear();
           scale_bs.clear();
@@ -251,7 +266,7 @@ void PredictVideo(const std::string& video_path,
     else{
       // Visualization result
       cv::Mat out_im = PaddleDetection::VisualizeResult(
-          frame, result, labels, colormap, is_rbox);
+          frame, out_result, labels, colormap, is_rbox);
       video_out.write(out_im);
     }
 
@@ -450,12 +465,12 @@ int main(int argc, char** argv) {
 			FLAGS_trt_calib_mode, FLAGS_use_dark);
   }
   // Do inference on input video or image
+  if (!PathExists(FLAGS_output_dir)) {
+      MkDirs(FLAGS_output_dir);
+  }
   if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
-    PredictVideo(FLAGS_video_file, &det, keypoint);
+    PredictVideo(FLAGS_video_file, &det, keypoint, FLAGS_output_dir);
   } else if (!FLAGS_image_file.empty() || !FLAGS_image_dir.empty()) {
-    if (!PathExists(FLAGS_output_dir)) {
-      MkDirs(FLAGS_output_dir);
-    }
     std::vector<std::string> all_img_paths;
     std::vector<cv::String> cv_all_img_paths;
     if (!FLAGS_image_file.empty()) {
diff --git a/deploy/python/det_keypoint_unite_infer.py b/deploy/python/det_keypoint_unite_infer.py
index c5d319d4a..6b81a6f78 100644
--- a/deploy/python/det_keypoint_unite_infer.py
+++ b/deploy/python/det_keypoint_unite_infer.py
@@ -133,22 +133,24 @@ def topdown_unite_predict_video(detector,
                                 topdown_keypoint_detector,
                                 camera_id,
                                 keypoint_batch_size=1):
+    video_name = 'output.mp4'
     if camera_id != -1:
         capture = cv2.VideoCapture(camera_id)
-        video_name = 'output.mp4'
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
         video_name = os.path.splitext(os.path.basename(FLAGS.video_file))[
             0] + '.mp4'
-    fps = 30
+    # Get Video info : resolution, fps, frame count
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # yapf: disable
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    # yapf: enable
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     index = 0
     while (1):
@@ -156,7 +158,7 @@ def topdown_unite_predict_video(detector,
         if not ret:
             break
         index += 1
-        print('detect frame:%d' % (index))
+        print('detect frame: %d' % (index))
 
         frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         results = detector.predict([frame2], FLAGS.det_threshold)
diff --git a/deploy/python/infer.py b/deploy/python/infer.py
index ed7eb66f8..bc268c565 100644
--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -664,30 +664,30 @@ def predict_image(detector, image_list, batch_size=1):
 
 
 def predict_video(detector, camera_id):
+    video_out_name = 'output.mp4'
     if camera_id != -1:
         capture = cv2.VideoCapture(camera_id)
-        video_name = 'output.mp4'
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
-        video_name = os.path.split(FLAGS.video_file)[-1]
-    fps = 30
-    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
-    print('frame_count', frame_count)
+        video_out_name = os.path.split(FLAGS.video_file)[-1]
+    # Get Video info : resolution, fps, frame count
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # yapf: disable
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    # yapf: enable
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
-    out_path = os.path.join(FLAGS.output_dir, video_name)
+    out_path = os.path.join(FLAGS.output_dir, video_out_name)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     index = 1
     while (1):
         ret, frame = capture.read()
         if not ret:
             break
-        print('detect frame:%d' % (index))
+        print('detect frame: %d' % (index))
         index += 1
         results = detector.predict([frame], FLAGS.threshold)
         im = visualize_box_mask(
diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py
index 6d325cc6f..6594bdbfd 100644
--- a/deploy/python/keypoint_infer.py
+++ b/deploy/python/keypoint_infer.py
@@ -284,28 +284,30 @@ def predict_image(detector, image_list):
 
 
 def predict_video(detector, camera_id):
+    video_name = 'output.mp4'
     if camera_id != -1:
         capture = cv2.VideoCapture(camera_id)
-        video_name = 'output.mp4'
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
         video_name = os.path.split(FLAGS.video_file)[-1]
-    fps = 30
+    # Get Video info : resolution, fps, frame count
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # yapf: disable
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    # yapf: enable
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name + '.mp4')
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     index = 1
     while (1):
         ret, frame = capture.read()
         if not ret:
             break
-        print('detect frame:%d' % (index))
+        print('detect frame: %d' % (index))
         index += 1
         results = detector.predict([frame], FLAGS.threshold)
         im = draw_pose(
diff --git a/deploy/python/mot_jde_infer.py b/deploy/python/mot_jde_infer.py
index f4369fbcf..80de133b0 100644
--- a/deploy/python/mot_jde_infer.py
+++ b/deploy/python/mot_jde_infer.py
@@ -212,24 +212,24 @@ def predict_image(detector, image_list):
 
 
 def predict_video(detector, camera_id):
+    video_name = 'mot_output.mp4'
     if camera_id != -1:
         capture = cv2.VideoCapture(camera_id)
-        video_name = 'mot_output.mp4'
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
         video_name = os.path.split(FLAGS.video_file)[-1]
-    fps = 30
-    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
-    print('frame_count', frame_count)
+    # Get Video info : resolution, fps, frame count
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # yapf: disable
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    # yapf: enable
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     if not FLAGS.save_images:
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     frame_id = 0
     timer = MOTTimer()
@@ -270,7 +270,7 @@ def predict_video(detector, camera_id):
             write_mot_results(result_filename, [results[-1]])
 
         frame_id += 1
-        print('detect frame:%d' % (frame_id))
+        print('detect frame: %d' % (frame_id))
         if camera_id != -1:
             cv2.imshow('Tracking Detection', im)
             if cv2.waitKey(1) & 0xFF == ord('q'):
diff --git a/deploy/python/mot_keypoint_unite_infer.py b/deploy/python/mot_keypoint_unite_infer.py
index f8cb363fa..58d28237a 100644
--- a/deploy/python/mot_keypoint_unite_infer.py
+++ b/deploy/python/mot_keypoint_unite_infer.py
@@ -126,18 +126,18 @@ def mot_keypoint_unite_predict_video(mot_model,
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
         video_name = os.path.split(FLAGS.video_file)[-1]
-    fps = 30
-    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
-    print('frame_count', frame_count)
+    # Get Video info : resolution, fps, frame count
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # yapf: disable
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    # yapf: enable
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     if not FLAGS.save_images:
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     frame_id = 0
     timer_mot = FPSTimer()
@@ -195,7 +195,7 @@ def mot_keypoint_unite_predict_video(mot_model,
         im = np.array(online_im)
 
         frame_id += 1
-        print('detect frame:%d' % (frame_id))
+        print('detect frame: %d' % (frame_id))
 
         if FLAGS.save_images:
             save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
diff --git a/deploy/python/mot_sde_infer.py b/deploy/python/mot_sde_infer.py
index dd1dbe928..ca08569bd 100644
--- a/deploy/python/mot_sde_infer.py
+++ b/deploy/python/mot_sde_infer.py
@@ -355,18 +355,18 @@ def predict_video(detector, reid_model, camera_id):
     else:
         capture = cv2.VideoCapture(FLAGS.video_file)
         video_name = os.path.split(FLAGS.video_file)[-1]
-    fps = 30
-    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
-    print('frame_count', frame_count)
+    # Get Video info : resolution, fps, frame count
     width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # yapf: disable
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    # yapf: enable
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
     if not os.path.exists(FLAGS.output_dir):
         os.makedirs(FLAGS.output_dir)
     out_path = os.path.join(FLAGS.output_dir, video_name)
     if not FLAGS.save_images:
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
     frame_id = 0
     timer = MOTTimer()
@@ -425,7 +425,7 @@ def predict_video(detector, reid_model, camera_id):
             write_mot_results(result_filename, [result])
 
         frame_id += 1
-        print('detect frame:%d' % (frame_id))
+        print('detect frame: %d' % (frame_id))
         if camera_id != -1:
             cv2.imshow('Tracking Detection', im)
             if cv2.waitKey(1) & 0xFF == ord('q'):
-- 
GitLab