Merge branch 'zhzhi-master' into 'master'

上传车辆检测题目 See merge request !30

Merge branch 'zhzhi-master' into 'master'
上传车辆检测题目 See merge request !30
c9967a0f · 幻灰龙 · cbe28620 · 699801ef · c9967a0f · c9967a0f
8 changed file
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/MY_TEST/8.h264
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/MY_TEST/8.h264
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/config.json
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/config.json
+{
+    "keywords": [],
+    "children": [],
+    "export": []
+  }
\ No newline at end of file
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/obj.names
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/obj.names
+person
+car
+bus
+truck
+2wheel
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/opencv-yolo-inference-vehicle.md
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/opencv-yolo-inference-vehicle.md
+# opencv-yolo-tiny车辆检测
+
+`opencv.dnn`模块已经支持大部分格式的深度学习模型推理，该模块可以直接加载`tensorflow`、`darknet`、`pytorch`等常见深度学习框架训练出来的模型，并运行推理得到模型输出结果。`opecnv.dnn`模块已经作为一种模型部署方式，应用在工业落地实际场景中。
+
+![](./vehicle-detection.gif)
+
+模型具体加载和使用流程如下：
+
+1. 加载网络，读取模型、网络结构配置等文件
+2. 创建输入，`opencv.dnn`模块对图片输入有特殊格式要求
+3. 运行推理
+4. 解析输出
+5. 应用输出、显示输出
+
+下面是`opencv.dnn`模块加载`yolov3-tiny`车辆检测模型并运行推理的代码，请你补充TO-DO相关代码（本题考察`yolo系列`检测模型输出解析）：
+
+```python
+import numpy as np
+import cv2
+import os
+import time
+from numpy import array
+
+# some variables
+weightsPath = './yolov3-tiny.weights'
+configPath = './yolov3-tiny.cfg'
+labelsPath = './obj.names'
+
+LABELS = open(labelsPath).read().strip().split("\n")
+colors = [(255, 255, 0), (255, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 255)]
+min_score = 0.3
+
+# read darknet weights using opencv.dnn module
+net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
+
+# read video using opencv
+cap = cv2.VideoCapture('./MY_TEST/8.h264') 
+
+# loop for inference
+while True:
+    boxes = []
+    confidences = []
+    classIDs = []
+    start = time.time()
+
+    ret, frame = cap.read()
+    frame = cv2.resize(frame, (744, 416), interpolation=cv2.INTER_CUBIC)
+    image = frame
+    (H, W) = image.shape[0: 2]
+
+
+    # get output layer names
+    ln = net.getLayerNames()
+    out = net.getUnconnectedOutLayers()
+    x = []
+    for i in out:
+        x.append(ln[i[0]-1])
+    ln = x
+
+    # create input data package with current frame
+    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
+    # set as input
+    net.setInput(blob)
+
+    # run!
+    layerOutputs = net.forward(ln)
+
+    # post-process
+    # parsing the output and run nms
+
+    # TO-DO your code...
+
+    cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
+    cv2.imshow("Image", image)
+
+    # print fps 
+    stop = time.time()
+    fps = 1/(stop - start)
+    print('fps>>> :', fps)
+
+    # normal codes when displaying video
+    c = cv2.waitKey(1) & 0xff
+    if c == 27:
+        cap.release()
+        break
+
+cv2.destroyAllWindows()
+```
+
+## 答案
+
+```python
+    for output in layerOutputs:
+        for detection in output:
+            scores = detection[5:]
+            # class id
+            classID = np.argmax(scores)
+            # get score by classid
+            score = scores[classID]
+
+            # ignore if score is too low
+            if score >= min_score:
+                box = detection[0:4] * np.array([W, H, W, H])
+                (centerX, centerY, width, height)= box.astype("int")
+
+                x = int(centerX - (width / 2))
+                y = int(centerY - (height / 2))
+                
+                boxes.append([x, y, int(width), int(height)])
+                confidences.append(float(score))
+                classIDs.append(classID)
+    
+    # run nms using opencv.dnn module
+    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
+    
+    # render on image
+    idxs = array(idxs)
+    box_seq = idxs.flatten()
+    if len(idxs) > 0:
+        for seq in box_seq:
+            (x, y) = (boxes[seq][0], boxes[seq][1])
+            (w, h) = (boxes[seq][2], boxes[seq][3])
+
+            # draw what you want
+            color = colors[classIDs[seq]]
+            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+            text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
+            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)  
+```
+
+
+## scores解析错误
+
+```python
+    for output in layerOutputs:
+        for detection in output:
+            scores = detection[5:]
+            # class id
+            classID = np.argmax(scores)
+            # get score
+            score = detection[4]
+
+            # ignore if score is too low
+            if score >= min_score:
+                box = detection[0:4] * np.array([W, H, W, H])
+                (centerX, centerY, width, height)= box.astype("int")
+
+                x = int(centerX - (width / 2))
+                y = int(centerY - (height / 2))
+                
+                boxes.append([x, y, int(width), int(height)])
+                confidences.append(float(score))
+                classIDs.append(classID)
+    
+    # run nms using opencv.dnn module
+    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
+    
+    # render on image
+    idxs = array(idxs)
+    box_seq = idxs.flatten()
+    if len(idxs) > 0:
+        for seq in box_seq:
+            (x, y) = (boxes[seq][0], boxes[seq][1])
+            (w, h) = (boxes[seq][2], boxes[seq][3])
+
+            # draw what you want
+            color = colors[classIDs[seq]]
+            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+            text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
+            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)  
+```
+
+## box坐标没有还原到原始输入尺寸
+
+```python
+    for output in layerOutputs:
+        for detection in output:
+            scores = detection[5:]
+            # class id
+            classID = np.argmax(scores)
+            # get score by classid
+            score = scores[classID]
+
+            # ignore if score is too low
+            if score >= min_score:
+                box = detection[0:4]
+                (centerX, centerY, width, height)= box.astype("int")
+
+                x = int(centerX - (width / 2))
+                y = int(centerY - (height / 2))
+                
+                boxes.append([x, y, int(width), int(height)])
+                confidences.append(float(score))
+                classIDs.append(classID)
+    
+    # run nms using opencv.dnn module
+    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
+    
+    # render on image
+    idxs = array(idxs)
+    box_seq = idxs.flatten()
+    if len(idxs) > 0:
+        for seq in box_seq:
+            (x, y) = (boxes[seq][0], boxes[seq][1])
+            (w, h) = (boxes[seq][2], boxes[seq][3])
+
+            # draw what you want
+            color = colors[classIDs[seq]]
+            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+            text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
+            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)  
+```
+
+## box左上角坐标解析错误
+
+```python
+    for output in layerOutputs:
+        for detection in output:
+            scores = detection[5:]
+            # class id
+            classID = np.argmax(scores)
+            # get score by classid
+            score = scores[classID]
+
+            # ignore if score is too low
+            if score >= min_score:
+                box = detection[0:4] * np.array([W, H, W, H])
+                (x, y, width, height)= box.astype("int")
+               
+                boxes.append([x, y, int(width), int(height)])
+                confidences.append(float(score))
+                classIDs.append(classID)
+    
+    # run nms using opencv.dnn module
+    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
+    
+    # render on image
+    idxs = array(idxs)
+    box_seq = idxs.flatten()
+    if len(idxs) > 0:
+        for seq in box_seq:
+            (x, y) = (boxes[seq][0], boxes[seq][1])
+            (w, h) = (boxes[seq][2], boxes[seq][3])
+
+            # draw what you want
+            color = colors[classIDs[seq]]
+            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+            text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
+            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)  
+```
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/opencv-yolo-inference-vehicle.py
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/opencv-yolo-inference-vehicle.py
+import numpy as np
+import cv2
+import os
+import time
+from numpy import array
+
+# some variables
+weightsPath = './yolov3-tiny.weights'
+configPath = './yolov3-tiny.cfg'
+labelsPath = './obj.names'
+LABELS = open(labelsPath).read().strip().split("\n")
+colors = [(255, 255, 0), (255, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 255)]
+min_score = 0.3
+
+# read darknet weights using opencv.dnn module
+net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
+
+# read video using opencv
+cap = cv2.VideoCapture('./MY_TEST/8.h264') 
+
+# loop for inference
+while True:
+    boxes = []
+    confidences = []
+    classIDs = []
+    start = time.time()
+
+    ret, frame = cap.read()
+    frame = cv2.resize(frame, (744, 416), interpolation=cv2.INTER_CUBIC)
+    image = frame
+    (H, W) = image.shape[0: 2]
+
+
+    # get output layer names
+    ln = net.getLayerNames()
+    out = net.getUnconnectedOutLayers()
+    x = []
+    for i in out:
+        x.append(ln[i[0]-1])
+    ln = x
+
+    # create input data package with current frame
+    blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
+    # set as input
+    net.setInput(blob)
+
+    # run!
+    layerOutputs = net.forward(ln)
+
+    # post-process
+    # parsing the output and run nms
+    for output in layerOutputs:
+        for detection in output:
+            scores = detection[5:]
+            # class id
+            classID = np.argmax(scores)
+            # get score by classid
+            score = scores[classID]
+
+            # ignore if score is too low
+            if score >= min_score:
+                box = detection[0:4] * np.array([W, H, W, H])
+                (centerX, centerY, width, height)= box.astype("int")
+
+                x = int(centerX - (width / 2))
+                y = int(centerY - (height / 2))
+                
+                boxes.append([x, y, int(width), int(height)])
+                confidences.append(float(score))
+                classIDs.append(classID)
+    
+    # run nms using opencv.dnn module
+    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
+    
+    # render on image
+    idxs = array(idxs)
+    box_seq = idxs.flatten()
+    if len(idxs) > 0:
+        for seq in box_seq:
+            (x, y) = (boxes[seq][0], boxes[seq][1])
+            (w, h) = (boxes[seq][2], boxes[seq][3])
+
+            # draw what you want
+            color = colors[classIDs[seq]]
+            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
+            text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
+            cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)  
+
+    cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
+    cv2.imshow("Image", image)
+
+    # print fps 
+    stop = time.time()
+    fps = 1/(stop - start)
+    print('fps>>> :', fps)
+
+    # normal codes when displaying video
+    c = cv2.waitKey(1) & 0xff
+    if c == 27:
+        cap.release()
+        break
+
+cv2.destroyAllWindows()
\ No newline at end of file
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/vehicle-detection.gif
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/vehicle-detection.gif
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/yolov3-tiny.cfg
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/yolov3-tiny.cfg
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=4
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 300000
+policy=steps
+steps=50000,100000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=30
+activation=linear
+
+
+
+[yolo]
+mask = 3,4,5
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=5
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 8
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=30
+activation=linear
+
+[yolo]
+mask = 0,1,2
+anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
+classes=5
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/yolov3-tiny.weights
+++ b/data/1.OpenCV初阶/7.OpenCV中的深度学习/5.车辆检测/yolov3-tiny.weights