提交 c9967a0f 编写于 作者: 幻灰龙's avatar 幻灰龙

Merge branch 'zhzhi-master' into 'master'

上传车辆检测题目

See merge request !30
{
"keywords": [],
"children": [],
"export": []
}
\ No newline at end of file
# opencv-yolo-tiny车辆检测
`opencv.dnn`模块已经支持大部分格式的深度学习模型推理,该模块可以直接加载`tensorflow``darknet``pytorch`等常见深度学习框架训练出来的模型,并运行推理得到模型输出结果。`opecnv.dnn`模块已经作为一种模型部署方式,应用在工业落地实际场景中。
![](./vehicle-detection.gif)
模型具体加载和使用流程如下:
1. 加载网络,读取模型、网络结构配置等文件
2. 创建输入,`opencv.dnn`模块对图片输入有特殊格式要求
3. 运行推理
4. 解析输出
5. 应用输出、显示输出
下面是`opencv.dnn`模块加载`yolov3-tiny`车辆检测模型并运行推理的代码,请你补充TO-DO相关代码(本题考察`yolo系列`检测模型输出解析):
```python
import numpy as np
import cv2
import os
import time
from numpy import array
# some variables
weightsPath = './yolov3-tiny.weights'
configPath = './yolov3-tiny.cfg'
labelsPath = './obj.names'
LABELS = open(labelsPath).read().strip().split("\n")
colors = [(255, 255, 0), (255, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 255)]
min_score = 0.3
# read darknet weights using opencv.dnn module
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# read video using opencv
cap = cv2.VideoCapture('./MY_TEST/8.h264')
# loop for inference
while True:
boxes = []
confidences = []
classIDs = []
start = time.time()
ret, frame = cap.read()
frame = cv2.resize(frame, (744, 416), interpolation=cv2.INTER_CUBIC)
image = frame
(H, W) = image.shape[0: 2]
# get output layer names
ln = net.getLayerNames()
out = net.getUnconnectedOutLayers()
x = []
for i in out:
x.append(ln[i[0]-1])
ln = x
# create input data package with current frame
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
# set as input
net.setInput(blob)
# run!
layerOutputs = net.forward(ln)
# post-process
# parsing the output and run nms
# TO-DO your code...
cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
cv2.imshow("Image", image)
# print fps
stop = time.time()
fps = 1/(stop - start)
print('fps>>> :', fps)
# normal codes when displaying video
c = cv2.waitKey(1) & 0xff
if c == 27:
cap.release()
break
cv2.destroyAllWindows()
```
## 答案
```python
for output in layerOutputs:
for detection in output:
scores = detection[5:]
# class id
classID = np.argmax(scores)
# get score by classid
score = scores[classID]
# ignore if score is too low
if score >= min_score:
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height)= box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(score))
classIDs.append(classID)
# run nms using opencv.dnn module
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
# render on image
idxs = array(idxs)
box_seq = idxs.flatten()
if len(idxs) > 0:
for seq in box_seq:
(x, y) = (boxes[seq][0], boxes[seq][1])
(w, h) = (boxes[seq][2], boxes[seq][3])
# draw what you want
color = colors[classIDs[seq]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)
```
## scores解析错误
```python
for output in layerOutputs:
for detection in output:
scores = detection[5:]
# class id
classID = np.argmax(scores)
# get score
score = detection[4]
# ignore if score is too low
if score >= min_score:
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height)= box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(score))
classIDs.append(classID)
# run nms using opencv.dnn module
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
# render on image
idxs = array(idxs)
box_seq = idxs.flatten()
if len(idxs) > 0:
for seq in box_seq:
(x, y) = (boxes[seq][0], boxes[seq][1])
(w, h) = (boxes[seq][2], boxes[seq][3])
# draw what you want
color = colors[classIDs[seq]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)
```
## box坐标没有还原到原始输入尺寸
```python
for output in layerOutputs:
for detection in output:
scores = detection[5:]
# class id
classID = np.argmax(scores)
# get score by classid
score = scores[classID]
# ignore if score is too low
if score >= min_score:
box = detection[0:4]
(centerX, centerY, width, height)= box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(score))
classIDs.append(classID)
# run nms using opencv.dnn module
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
# render on image
idxs = array(idxs)
box_seq = idxs.flatten()
if len(idxs) > 0:
for seq in box_seq:
(x, y) = (boxes[seq][0], boxes[seq][1])
(w, h) = (boxes[seq][2], boxes[seq][3])
# draw what you want
color = colors[classIDs[seq]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)
```
## box左上角坐标解析错误
```python
for output in layerOutputs:
for detection in output:
scores = detection[5:]
# class id
classID = np.argmax(scores)
# get score by classid
score = scores[classID]
# ignore if score is too low
if score >= min_score:
box = detection[0:4] * np.array([W, H, W, H])
(x, y, width, height)= box.astype("int")
boxes.append([x, y, int(width), int(height)])
confidences.append(float(score))
classIDs.append(classID)
# run nms using opencv.dnn module
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
# render on image
idxs = array(idxs)
box_seq = idxs.flatten()
if len(idxs) > 0:
for seq in box_seq:
(x, y) = (boxes[seq][0], boxes[seq][1])
(w, h) = (boxes[seq][2], boxes[seq][3])
# draw what you want
color = colors[classIDs[seq]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)
```
import numpy as np
import cv2
import os
import time
from numpy import array
# some variables
weightsPath = './yolov3-tiny.weights'
configPath = './yolov3-tiny.cfg'
labelsPath = './obj.names'
LABELS = open(labelsPath).read().strip().split("\n")
colors = [(255, 255, 0), (255, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 255)]
min_score = 0.3
# read darknet weights using opencv.dnn module
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# read video using opencv
cap = cv2.VideoCapture('./MY_TEST/8.h264')
# loop for inference
while True:
boxes = []
confidences = []
classIDs = []
start = time.time()
ret, frame = cap.read()
frame = cv2.resize(frame, (744, 416), interpolation=cv2.INTER_CUBIC)
image = frame
(H, W) = image.shape[0: 2]
# get output layer names
ln = net.getLayerNames()
out = net.getUnconnectedOutLayers()
x = []
for i in out:
x.append(ln[i[0]-1])
ln = x
# create input data package with current frame
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
# set as input
net.setInput(blob)
# run!
layerOutputs = net.forward(ln)
# post-process
# parsing the output and run nms
for output in layerOutputs:
for detection in output:
scores = detection[5:]
# class id
classID = np.argmax(scores)
# get score by classid
score = scores[classID]
# ignore if score is too low
if score >= min_score:
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height)= box.astype("int")
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
boxes.append([x, y, int(width), int(height)])
confidences.append(float(score))
classIDs.append(classID)
# run nms using opencv.dnn module
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
# render on image
idxs = array(idxs)
box_seq = idxs.flatten()
if len(idxs) > 0:
for seq in box_seq:
(x, y) = (boxes[seq][0], boxes[seq][1])
(w, h) = (boxes[seq][2], boxes[seq][3])
# draw what you want
color = colors[classIDs[seq]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)
cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
cv2.imshow("Image", image)
# print fps
stop = time.time()
fps = 1/(stop - start)
print('fps>>> :', fps)
# normal codes when displaying video
c = cv2.waitKey(1) & 0xff
if c == 27:
cap.release()
break
cv2.destroyAllWindows()
\ No newline at end of file
因为 它太大了无法显示 image diff 。你可以改为 查看blob
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=4
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 300000
policy=steps
steps=50000,100000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=30
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=5
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=30
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=5
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册