Add files via upload

2b0dff94 · Bubbliiiing · GitHub · 0860d85d · 2b0dff94 · 2b0dff94
隐藏空白更改
内联并排

Showing with 128 addition and 27 deletion

kmeans_for_anchors.py kmeans_for_anchors.py +5 -3

predict.py predict.py +93 -24

yolo.py yolo.py +30 -0

未找到文件。
--- a/kmeans_for_anchors.py
+++ b/kmeans_for_anchors.py
+#-------------------------------------------------------------------------------------------------#
+#   kmeans虽然会对数据集中的框进行聚类，但是很多数据集由于框的大小相近，聚类出来的9个框相差不大，
+#   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框，越浅的特征层适合越大的先验框
+#   原始网络的先验框已经按大中小比例分配好了，不进行聚类也会有非常好的效果。
+#-------------------------------------------------------------------------------------------------#
 import glob
-import random
 import xml.etree.ElementTree as ET

 import numpy as np

-
 def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
    y = np.minimum(cluster[:,1],box[1])
@@ -20,7 +23,6 @@ def cas_iou(box,cluster):
 def avg_iou(box,cluster):
    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])

-
 def kmeans(box,k):
    # 取出一共有多少框
    row = box.shape[0]

--- a/predict.py
+++ b/predict.py
-'''
-predict.py有几个注意点
-1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
-具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
-2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
-3、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
-4、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
-在原图上利用矩阵的方式进行截取。
-5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
-比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
-'''
-from keras.layers import Input
+#----------------------------------------------------#
+#   对视频中的predict.py进行了修改，
+#   将单张图片预测、摄像头检测和FPS测试功能
+#   整合到了一个py文件中，通过指定mode进行模式的修改。
+#----------------------------------------------------#
+import time
+
+import cv2
+import numpy as np
 from PIL import Image

-from nets.yolo4 import yolo_body
 from yolo import YOLO

-yolo = YOLO()
+if __name__ == "__main__":
+    yolo = YOLO()
+    #-------------------------------------------------------------------------#
+    #   mode用于指定测试的模式：
+    #   'predict'表示单张图片预测
+    #   'video'表示视频检测
+    #   'fps'表示测试fps
+    #-------------------------------------------------------------------------#
+    mode = "predict"
+    #-------------------------------------------------------------------------#
+    #   video_path用于指定视频的路径，当video_path=0时表示检测摄像头
+    #   video_save_path表示视频保存的路径，当video_save_path=""时表示不保存
+    #   video_fps用于保存的视频的fps
+    #   video_path、video_save_path和video_fps仅在mode='video'时有效
+    #   保存视频时需要ctrl+c退出才会完成完整的保存步骤，不可直接结束程序。
+    #-------------------------------------------------------------------------#
+    video_path      = 0
+    video_save_path = ""
+    video_fps       = 25.0
+
+    if mode == "predict":
+        '''
+        1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
+        具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
+        2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
+        3、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
+        4、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
+        在原图上利用矩阵的方式进行截取。
+        5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
+        比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
+        '''
+        while True:
+            img = input('Input image filename:')
+            try:
+                image = Image.open(img)
+            except:
+                print('Open Error! Try again!')
+                continue
+            else:
+                r_image = yolo.detect_image(image)
+                r_image.show()
+
+    elif mode == "video":
+        capture=cv2.VideoCapture(video_path)
+        if video_save_path!="":
+            fourcc = cv2.VideoWriter_fourcc(*'XVID')
+            size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+            out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
+
+        fps = 0.0
+        while(True):
+            t1 = time.time()
+            # 读取某一帧
+            ref,frame=capture.read()
+            # 格式转变，BGRtoRGB
+            frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
+            # 转变成Image
+            frame = Image.fromarray(np.uint8(frame))
+            # 进行检测
+            frame = np.array(yolo.detect_image(frame))
+            # RGBtoBGR满足opencv显示格式
+            frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
+            
+            fps  = ( fps + (1./(time.time()-t1)) ) / 2
+            print("fps= %.2f"%(fps))
+            frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+            
+            cv2.imshow("video",frame)
+            c= cv2.waitKey(1) & 0xff 
+            if video_save_path!="":
+                out.write(frame)
+
+            if c==27:
+                capture.release()
+                break
+        capture.release()
+        out.release()
+        cv2.destroyAllWindows()

-while True:
-    img = input('Input image filename:')
-    try:
-        image = Image.open(img)
-    except:
-        print('Open Error! Try again!')
-        continue
+    elif mode == "fps":
+        test_interval = 100
+        img = Image.open('img/street.jpg')
+        tact_time = yolo.get_FPS(img, test_interval)
+        print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
    else:
-        r_image = yolo.detect_image(image)
-        r_image.show()
-yolo.close_session()
+        raise AssertionError("Please specify the correct mode: 'predict', 'video' or 'fps'.")
--- a/yolo.py
+++ b/yolo.py
 import colorsys
 import os
+import time

 import numpy as np
 from keras import backend as K
@@ -209,5 +210,34 @@ class YOLO(object):

        return image

+    def get_FPS(self, image, test_interval):
+        if self.letterbox_image:
+            boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
+        else:
+            boxed_image = image.convert('RGB')
+            boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
+        image_data = np.array(boxed_image, dtype='float32')
+        image_data /= 255.
+        image_data = np.expand_dims(image_data, 0)
+
+        out_boxes, out_scores, out_classes = self.sess.run(
+            [self.boxes, self.scores, self.classes],
+            feed_dict={
+                self.yolo_model.input: image_data,
+                self.input_image_shape: [image.size[1], image.size[0]],
+                K.learning_phase(): 0})
+
+        t1 = time.time()
+        for _ in range(test_interval):
+            out_boxes, out_scores, out_classes = self.sess.run(
+                [self.boxes, self.scores, self.classes],
+                feed_dict={
+                    self.yolo_model.input: image_data,
+                    self.input_image_shape: [image.size[1], image.size[0]],
+                    K.learning_phase(): 0})
+        t2 = time.time()
+        tact_time = (t2 - t1) / test_interval
+        return tact_time
+
    def close_session(self):
        self.sess.close()