diff --git "a/data/1.OpenCV\345\210\235\351\230\266/6.\350\247\206\351\242\221\345\210\206\346\236\220/2.\347\233\256\346\240\207\350\267\237\350\270\252/obj_tracker.md" "b/data/1.OpenCV\345\210\235\351\230\266/6.\350\247\206\351\242\221\345\210\206\346\236\220/2.\347\233\256\346\240\207\350\267\237\350\270\252/obj_tracker.md" index 69fd53277598c6ec2d545f6d64fa78e643fe78ef..2090750da911ade9c1efc4cc88bb97bc31a5868e 100755 --- "a/data/1.OpenCV\345\210\235\351\230\266/6.\350\247\206\351\242\221\345\210\206\346\236\220/2.\347\233\256\346\240\207\350\267\237\350\270\252/obj_tracker.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/6.\350\247\206\351\242\221\345\210\206\346\236\220/2.\347\233\256\346\240\207\350\267\237\350\270\252/obj_tracker.md" @@ -107,7 +107,9 @@ class Tracker(object): self.trackers.pop(i) ``` -## 未考虑unmatched_detections +## 选项 + +### 未考虑unmatched_detections ```python # 更新matched_tracks @@ -128,7 +130,7 @@ class Tracker(object): self.trackers.pop(i) ``` -## 未考虑移除长时间未检测到的缓存数据 +### 未考虑移除长时间未检测到的缓存数据 ```python # 更新matched_tracks @@ -148,7 +150,7 @@ class Tracker(object): ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) ``` -## 未考虑需要连续检出并匹配self.min_hints次才生效 +### 未考虑需要连续检出并匹配self.min_hints次才生效 ```python # 更新matched_tracks diff --git "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" index 44b5a565ccfb0176c4c27b983a71783f76095cb4..8fd55258da5507d5c235427431840c860d7b8551 100755 --- "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" @@ -5,6 +5,7 @@ ![](https://gitcode.net/csdn/skill_tree_opencv/-/raw/master/data/1.OpenCV初阶/7.OpenCV中的深度学习/1.图像分类/result.png) 具体到图像分类任务而言,其具体流程如下: + 1. 输入指定大小RGB图像,1/3通道,宽高一般相等 2. 通过卷积神经网络进行多尺度特征提取,生成高维特征值 3. 利用全连接网络、或其他结构对高维特征进行分类,输出各目标分类的概率值(概率和为1) @@ -21,41 +22,42 @@ import cv2 import numpy as np from labels import LABEL_MAP # 1000 labels in imagenet dataset -# caffe model, googlenet aglo -weights = "bvlc_googlenet.caffemodel" -protxt = "bvlc_googlenet.prototxt" - -# read caffe model from disk -net = cv2.dnn.readNetFromCaffe(protxt, weights) +if __name__=='__main__': + # caffe model, googlenet aglo + weights = "bvlc_googlenet.caffemodel" + protxt = "bvlc_googlenet.prototxt" -# create input -image = cv2.imread("ocean-liner.jpg") -blob = cv2.dnn.blobFromImage(image, 1.0, (224, 224), (104, 117, 123), False, crop=False) -result = np.copy(image) + # read caffe model from disk + net = cv2.dnn.readNetFromCaffe(protxt, weights) -# run! -net.setInput(blob) -out = net.forward() + # create input + image = cv2.imread("ocean-liner.jpg") + blob = cv2.dnn.blobFromImage(image, 1.0, (224, 224), (104, 117, 123), False, crop=False) + result = np.copy(image) -# TO-DO your code... + # run! + net.setInput(blob) + out = net.forward() -# time cost -t, _ = net.getPerfProfile() -label = 'cost time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency()) -cv2.putText(result, label, (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2) + # TODO(You): 请在此实现代码 -# render on image -label = '%s: %.4f' % (LABEL_MAP[classId] if LABEL_MAP else 'Class #%d' % classId, confidence) -cv2.putText(result, label, (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + # time cost + t, _ = net.getPerfProfile() + label = 'cost time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency()) + cv2.putText(result, label, (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2) -show_img = np.hstack((image, result)) + # render on image + label = '%s: %.4f' % (LABEL_MAP[classId] if LABEL_MAP else 'Class #%d' % classId, confidence) + cv2.putText(result, label, (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) -# normal codes in opencv -cv2.imshow("Image", show_img) -cv2.waitKey(0) + show_img = np.hstack((image, result)) + # normal codes in opencv + cv2.imshow("Image", show_img) + cv2.waitKey(0) ``` +以下对TODO部分实现正确的是? ## 答案 @@ -76,7 +78,6 @@ confidence = out[1] ``` - ## 输出维度理解错误 ```python @@ -86,8 +87,7 @@ confidence = out[classId] ``` - -## 输出理解错误 +## 输出理解错误2 ```python # output probability, find the right index diff --git "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/2.\347\233\256\346\240\207\346\243\200\346\265\213/deep_learning_object_detection.md" "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/2.\347\233\256\346\240\207\346\243\200\346\265\213/deep_learning_object_detection.md" index 6d26b8d630ebcdaddc3832ad2afcd66271a35a2e..b65b7602088218be99caf91c4f9b442dcbc0fcd8 100644 --- "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/2.\347\233\256\346\240\207\346\243\200\346\265\213/deep_learning_object_detection.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/2.\347\233\256\346\240\207\346\243\200\346\265\213/deep_learning_object_detection.md" @@ -6,147 +6,160 @@ ![output](https://gitee.com/wanghao1090220084/cloud-image/raw/master/output.jpg) -# 框架代码 +**框架代码**: -```Python +```python import numpy as np import cv2 -if __name__=="__main__": - image_name = '11.jpg' - prototxt = 'MobileNetSSD_deploy.prototxt.txt' - model_path = 'MobileNetSSD_deploy.caffemodel' - confidence_ta = 0.2 - # 初始化MobileNet SSD训练的类标签列表 - # 检测,然后为每个类生成一组边界框颜色 - CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", - "dog", "horse", "motorbike", "person", "pottedplant", "sheep", - "sofa", "train", "tvmonitor"] - COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) - # load our serialized model from disk - print("[INFO] loading model...") - net = cv2.dnn.readNetFromCaffe(prototxt, model_path) - # 加载输入图像并为图像构造一个输入blob - # 将大小调整为固定的300x300像素。 - # (注意:SSD模型的输入是300x300像素) - image = cv2.imread(image_name) - (h, w) = image.shape[:2] - blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, - (300, 300), 127.5) - # 通过网络传递blob并获得检测结果和 - # 预测 - print("[INFO] computing object detections...") - net.setInput(blob) - detections = net.forward() - # 循环检测结果 - - # show the output image - cv2.imshow("Output", image) - cv2.imwrite("output.jpg", image) - cv2.waitKey(0) - +if __name__=="__main__": + image_name = '11.jpg' + prototxt = 'MobileNetSSD_deploy.prototxt.txt' + model_path = 'MobileNetSSD_deploy.caffemodel' + confidence_ta = 0.2 + + # 初始化MobileNet SSD训练的类标签列表 + # 检测,然后为每个类生成一组边界框颜色 + CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", + "dog", "horse", "motorbike", "person", "pottedplant", "sheep", + "sofa", "train", "tvmonitor"] + COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) + + # load our serialized model from disk + print("[INFO] loading model...") + net = cv2.dnn.readNetFromCaffe(prototxt, model_path) + + # 加载输入图像并为图像构造一个输入blob + # 将大小调整为固定的300x300像素。 + # (注意:SSD模型的输入是300x300像素) + image = cv2.imread(image_name) + (h, w) = image.shape[:2] + blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, + (300, 300), 127.5) + + # 通过网络传递blob并获得检测结果和 + # 预测 + print("[INFO] computing object detections...") + net.setInput(blob) + detections = net.forward() + + # TODO(You): 请在此编写循环检测结果 + + # show the output image + cv2.imshow("Output", image) + cv2.imwrite("output.jpg", image) + cv2.waitKey(0) ``` -# 答案: +以下对“循环检测结果”代码实现正确的是? + +## 答案 -```Python +```python for i in np.arange(0, detections.shape[2]): - # 提取与数据相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > confidence_ta: - # 从`detections`中提取类标签的索引, - # 然后计算物体边界框的 (x, y) 坐标 - idx = int(detections[0, 0, i, 1]) - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, startY, endX, endY) = box.astype("int") - # 显示预测 - label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) - print("[INFO] {}".format(label)) - cv2.rectangle(image, (startX, startY), (endX, endY), - COLORS[idx], 2) - y = startY - 15 if startY - 15 > 15 else startY + 15 - cv2.putText(image, label, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) + # 提取与数据相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > confidence_ta: + # 从`detections`中提取类标签的索引, + # 然后计算物体边界框的 (x, y) 坐标 + idx = int(detections[0, 0, i, 1]) + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + + # 显示预测 + label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) + print("[INFO] {}".format(label)) + cv2.rectangle(image, (startX, startY), (endX, endY), + COLORS[idx], 2) + y = startY - 15 if startY - 15 > 15 else startY + 15 + cv2.putText(image, label, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) ``` -# 选项 +## 选项 -## 宽和高颠倒 +### 宽和高颠倒 -```Python +```python for i in np.arange(0, detections.shape[2]): - # 提取与数据相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > confidence_ta: - # 从`detections`中提取类标签的索引, - # 然后计算物体边界框的 (x, y) 坐标 - idx = int(detections[0, 0, i, 1]) - box = detections[0, 0, i, 3:7] * np.array([h, w, h, w]) - (startX, startY, endX, endY) = box.astype("int") - # 显示预测 - label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) - print("[INFO] {}".format(label)) - cv2.rectangle(image, (startX, startY), (endX, endY), - COLORS[idx], 2) - y = startY - 15 if startY - 15 > 15 else startY + 15 - cv2.putText(image, label, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) + # 提取与数据相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > confidence_ta: + # 从`detections`中提取类标签的索引, + # 然后计算物体边界框的 (x, y) 坐标 + idx = int(detections[0, 0, i, 1]) + box = detections[0, 0, i, 3:7] * np.array([h, w, h, w]) + (startX, startY, endX, endY) = box.astype("int") + + # 显示预测 + label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) + print("[INFO] {}".format(label)) + cv2.rectangle(image, (startX, startY), (endX, endY), + COLORS[idx], 2) + y = startY - 15 if startY - 15 > 15 else startY + 15 + cv2.putText(image, label, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) ``` -## box框顺序错误 +### box框顺序错误 -``` +```python for i in np.arange(0, detections.shape[2]): - # 提取与数据相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > confidence_ta: - # 从`detections`中提取类标签的索引, - # 然后计算物体边界框的 (x, y) 坐标 - idx = int(detections[0, 0, i, 1]) - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, endX, startY, endY) = box.astype("int") - # 显示预测 - label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) - print("[INFO] {}".format(label)) - cv2.rectangle(image, (startX, startY), (endX, endY), - COLORS[idx], 2) - y = startY - 15 if startY - 15 > 15 else startY + 15 - cv2.putText(image, label, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) + # 提取与数据相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > confidence_ta: + # 从`detections`中提取类标签的索引, + # 然后计算物体边界框的 (x, y) 坐标 + idx = int(detections[0, 0, i, 1]) + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, endX, startY, endY) = box.astype("int") + + # 显示预测 + label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) + print("[INFO] {}".format(label)) + cv2.rectangle(image, (startX, startY), (endX, endY), + COLORS[idx], 2) + y = startY - 15 if startY - 15 > 15 else startY + 15 + cv2.putText(image, label, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) ``` -## 置信度设置成大于 +### 置信度设置成大于 -``` +```python for i in np.arange(0, detections.shape[2]): - # 提取与数据相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence < confidence_ta: - # 从`detections`中提取类标签的索引, - # 然后计算物体边界框的 (x, y) 坐标 - idx = int(detections[0, 0, i, 1]) - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, startY, endX, endY) = box.astype("int") - # 显示预测 - label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) - print("[INFO] {}".format(label)) - cv2.rectangle(image, (startX, startY), (endX, endY), - COLORS[idx], 2) - y = startY - 15 if startY - 15 > 15 else startY + 15 - cv2.putText(image, label, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) + # 提取与数据相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence < confidence_ta: + # 从`detections`中提取类标签的索引, + # 然后计算物体边界框的 (x, y) 坐标 + idx = int(detections[0, 0, i, 1]) + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + + # 显示预测 + label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) + print("[INFO] {}".format(label)) + cv2.rectangle(image, (startX, startY), (endX, endY), + COLORS[idx], 2) + y = startY - 15 if startY - 15 > 15 else startY + 15 + cv2.putText(image, label, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) ``` - diff --git "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/3.\344\272\272\350\204\270\346\243\200\346\265\213/detect_faces.md" "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/3.\344\272\272\350\204\270\346\243\200\346\265\213/detect_faces.md" index ad881c927fe58da81e20ed2fbcfbd20570c9abac..bbaef29f2be88e6c3e399420937dd0b742c64ea6 100644 --- "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/3.\344\272\272\350\204\270\346\243\200\346\265\213/detect_faces.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/3.\344\272\272\350\204\270\346\243\200\346\265\213/detect_faces.md" @@ -7,135 +7,146 @@ 遍历代码的逻辑如下: -- 遍历检测结果。 +- 遍历检测结果。 - 然后,我们提取置信度并将其与置信度阈值进行比较。 我们执行此检查以过滤掉弱检测。 如果置信度满足最小阈值,我们继续绘制一个矩形以及检测概率。 - 为此,我们首先计算边界框的 (x, y) 坐标。 然后我们构建包含检测概率的置信文本字符串。 如果我们的文本偏离图像(例如当面部检测发生在图像的最顶部时),我们将其向下移动 10 个像素。 我们的面部矩形和置信文本绘制在图像上。 - 然后,我们再次循环执行该过程后的其他检测。 如果没有检测到,我们准备在屏幕上显示我们的输出图像)。 - 框架代码如下: -``` +```python import numpy as np import cv2 -low_confidence=0.5 -image_path='2.jpg' -proto_txt='deploy.proto.txt' -model_path='res10_300x300_ssd_iter_140000_fp16.caffemodel' -# 加载模型 -print("[INFO] loading model...") -net = cv2.dnn.readNetFromCaffe(proto_txt, model_path) -# 加载输入图像并为图像构建一个输入 blob -# 将大小调整为固定的 300x300 像素,然后对其进行标准化 -image = cv2.imread(image_path) -(h, w) = image.shape[:2] -blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, - (300, 300), (104.0, 177.0, 123.0)) -# 通过网络传递blob并获得检测和预测 -print("[INFO] computing object detections...") -net.setInput(blob) -detections = net.forward() -# 循环检测 -# TODO(You):请实现循环检测的代码。 - -# 展示图片并保存 -cv2.imshow("Output", image) -cv2.imwrite("01.jpg",image) -cv2.waitKey(0) +if __name__=='__main__': + low_confidence=0.5 + image_path='2.jpg' + proto_txt='deploy.proto.txt' + model_path='res10_300x300_ssd_iter_140000_fp16.caffemodel' + + # 加载模型 + print("[INFO] loading model...") + net = cv2.dnn.readNetFromCaffe(proto_txt, model_path) + + # 加载输入图像并为图像构建一个输入 blob + # 将大小调整为固定的 300x300 像素,然后对其进行标准化 + image = cv2.imread(image_path) + (h, w) = image.shape[:2] + blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, + (300, 300), (104.0, 177.0, 123.0)) + + # 通过网络传递blob并获得检测和预测 + print("[INFO] computing object detections...") + net.setInput(blob) + detections = net.forward() + + # 循环检测 + # TODO(You):请实现循环检测的代码。 + + # 展示图片并保存 + cv2.imshow("Output", image) + cv2.imwrite("01.jpg",image) + cv2.waitKey(0) ``` -# 答案: +以下对TODO部分代码实现正确的是? -``` +## 答案 + +```python # 循环检测 for i in range(0, detections.shape[2]): - # 提取与相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > low_confidence: - # 计算边界框的 (x, y) 坐标 - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, startY, endX, endY) = box.astype("int") - # 绘制人脸的边界框以及概率 - text = "{:.2f}%".format(confidence * 100) - y = startY - 10 if startY - 10 > 10 else startY + 10 - cv2.rectangle(image, (startX, startY), (endX, endY), - (0, 0, 255), 2) - cv2.putText(image, text, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) + # 提取与相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > low_confidence: + # 计算边界框的 (x, y) 坐标 + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + # 绘制人脸的边界框以及概率 + text = "{:.2f}%".format(confidence * 100) + y = startY - 10 if startY - 10 > 10 else startY + 10 + cv2.rectangle(image, (startX, startY), (endX, endY), + (0, 0, 255), 2) + cv2.putText(image, text, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) ``` -# 选项 +## 选项 -## 宽和高颠倒 +### 宽和高颠倒 -``` +```python # 循环检测 for i in range(0, detections.shape[2]): - # 提取与相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > low_confidence: - # 计算边界框的 (x, y) 坐标 - box = detections[0, 0, i, 3:7] * np.array([h, w, h, w]) - (startX, startY, endX, endY) = box.astype("int") - # 绘制人脸的边界框以及概率 - text = "{:.2f}%".format(confidence * 100) - y = startY - 10 if startY - 10 > 10 else startY + 10 - cv2.rectangle(image, (startX, startY), (endX, endY), - (0, 0, 255), 2) - cv2.putText(image, text, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) + # 提取与相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > low_confidence: + # 计算边界框的 (x, y) 坐标 + box = detections[0, 0, i, 3:7] * np.array([h, w, h, w]) + (startX, startY, endX, endY) = box.astype("int") + + # 绘制人脸的边界框以及概率 + text = "{:.2f}%".format(confidence * 100) + y = startY - 10 if startY - 10 > 10 else startY + 10 + cv2.rectangle(image, (startX, startY), (endX, endY), + (0, 0, 255), 2) + cv2.putText(image, text, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) ``` -## box框顺序错误 +### box框顺序错误 -``` +```python # 循环检测 for i in range(0, detections.shape[2]): - # 提取与相关的置信度(即概率) - # 预测 - confidence = detections[0, 0, i, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > low_confidence: - # 计算边界框的 (x, y) 坐标 - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, endX, startY, endY) = box.astype("int") - # 绘制人脸的边界框以及概率 - text = "{:.2f}%".format(confidence * 100) - y = startY - 10 if startY - 10 > 10 else startY + 10 - cv2.rectangle(image, (startX, startY), (endX, endY), - (0, 0, 255), 2) - cv2.putText(image, text, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) + # 提取与相关的置信度(即概率) + # 预测 + confidence = detections[0, 0, i, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > low_confidence: + # 计算边界框的 (x, y) 坐标 + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, endX, startY, endY) = box.astype("int") + + # 绘制人脸的边界框以及概率 + text = "{:.2f}%".format(confidence * 100) + y = startY - 10 if startY - 10 > 10 else startY + 10 + cv2.rectangle(image, (startX, startY), (endX, endY), + (0, 0, 255), 2) + cv2.putText(image, text, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) ``` -## detections取值错误 +### detections取值错误 -``` +```python # 循环检测 for i in range(0, detections.shape[2]): - # 提取与相关的置信度(即概率) - # 预测 - confidence = detections[0, i, 0, 2] - # 通过确保“置信度”来过滤掉弱检测 - # 大于最小置信度 - if confidence > low_confidence: - # 计算边界框的 (x, y) 坐标 - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - (startX, startY, endX, endY) = box.astype("int") - # 绘制人脸的边界框以及概率 - text = "{:.2f}%".format(confidence * 100) - y = startY - 10 if startY - 10 > 10 else startY + 10 - cv2.rectangle(image, (startX, startY), (endX, endY), - (0, 0, 255), 2) - cv2.putText(image, text, (startX, y), - cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) + # 提取与相关的置信度(即概率) + # 预测 + confidence = detections[0, i, 0, 2] + + # 通过确保“置信度”来过滤掉弱检测 + # 大于最小置信度 + if confidence > low_confidence: + # 计算边界框的 (x, y) 坐标 + box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) + (startX, startY, endX, endY) = box.astype("int") + + # 绘制人脸的边界框以及概率 + text = "{:.2f}%".format(confidence * 100) + y = startY - 10 if startY - 10 > 10 else startY + 10 + cv2.rectangle(image, (startX, startY), (endX, endY), + (0, 0, 255), 2) + cv2.putText(image, text, (startX, y), + cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) ``` - diff --git "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/4.\345\247\277\346\200\201\344\274\260\350\256\241/attitude_estimation.md" "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/4.\345\247\277\346\200\201\344\274\260\350\256\241/attitude_estimation.md" index e64fc80d63d0ea6e0b04c0b57edf9c1a443aadcd..5c8018d16d9d40c419f2e7926ffaa69bf999e947 100644 --- "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/4.\345\247\277\346\200\201\344\274\260\350\256\241/attitude_estimation.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/4.\345\247\277\346\200\201\344\274\260\350\256\241/attitude_estimation.md" @@ -1,7 +1,5 @@ # 使用Python+OpenCV实现姿态估计 - - 姿态估计使用Opencv+Mediapipe来时实现 **什么是Mediapipe?** @@ -10,7 +8,7 @@ Mediapipe是主要用于构建多模式音频,视频或任何时间序列数 安装命令: -``` +```bash pip install mediapipe ``` @@ -28,9 +26,9 @@ Media Pipe Pose是用于高保真人体姿势跟踪的框架,该框架从RGB 我们使用OpenCV+mediapipe实现姿态估计,我已经实现了代码,请大家找出能够正确执行的代码! -# 框架代码 +**框架代码**: -``` +```python import cv2 import mediapipe as mp import time @@ -40,18 +38,19 @@ pose = mpPose.Pose() mpDraw = mp.solutions.drawing_utils cap = cv2.VideoCapture('1.mp4') pTime = 0 -#输出检测结果 + +#TODO(You): 请在此实现并输出检测结果 # do a bit of cleanup cv2.destroyAllWindows() cap.release() - - ``` -# 答案: +以下对TODO部分代码实现正确的是? -``` +## 答案 + +```python while True: success, img = cap.read() if success is False: @@ -76,11 +75,11 @@ while True: key = cv2.waitKey(1) & 0xFF ``` -# 选项 +## 选项 -## 读取帧失败后没有终止逻辑 +### 读取帧失败后没有终止逻辑 -``` +```python while True: success, img = cap.read() imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) @@ -103,9 +102,9 @@ while True: key = cv2.waitKey(1) & 0xFF ``` -## results为None时没有判断 +### results为None时没有判断 -``` +```python while True: success, img = cap.read() if success is False: @@ -128,9 +127,9 @@ while True: key = cv2.waitKey(1) & 0xFF ``` -img的shape顺序不对 +### img的shape顺序不对 -``` +```python while True: success, img = cap.read() if success is False: @@ -154,4 +153,3 @@ while True: cv2.imshow("Image", img) key = cv2.waitKey(1) & 0xFF ``` - diff --git "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/5.\350\275\246\350\276\206\346\243\200\346\265\213/opencv-yolo-inference-vehicle.md" "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/5.\350\275\246\350\276\206\346\243\200\346\265\213/opencv-yolo-inference-vehicle.md" index 4a91f3f3f727d161d31de12b904207ef52fe3271..879cb1cf417a3100eef67122aa0ea421dedafd20 100755 --- "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/5.\350\275\246\350\276\206\346\243\200\346\265\213/opencv-yolo-inference-vehicle.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/5.\350\275\246\350\276\206\346\243\200\346\265\213/opencv-yolo-inference-vehicle.md" @@ -68,7 +68,7 @@ while True: # post-process # parsing the output and run nms - # TO-DO your code... + # TODO(You): 请在此实现代码 cv2.namedWindow('Image', cv2.WINDOW_NORMAL) cv2.imshow("Image", image) @@ -87,164 +87,167 @@ while True: cv2.destroyAllWindows() ``` +以下对TODO部分实现正确的是? + ## 答案 ```python - for output in layerOutputs: - for detection in output: - scores = detection[5:] - # class id - classID = np.argmax(scores) - # get score by classid - score = scores[classID] - - # ignore if score is too low - if score >= min_score: - box = detection[0:4] * np.array([W, H, W, H]) - (centerX, centerY, width, height)= box.astype("int") - - x = int(centerX - (width / 2)) - y = int(centerY - (height / 2)) - - boxes.append([x, y, int(width), int(height)]) - confidences.append(float(score)) - classIDs.append(classID) - - # run nms using opencv.dnn module - idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) - - # render on image - idxs = array(idxs) - box_seq = idxs.flatten() - if len(idxs) > 0: - for seq in box_seq: - (x, y) = (boxes[seq][0], boxes[seq][1]) - (w, h) = (boxes[seq][2], boxes[seq][3]) - - # draw what you want - color = colors[classIDs[seq]] - cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) - text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) - cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) +for output in layerOutputs: + for detection in output: + scores = detection[5:] + # class id + classID = np.argmax(scores) + # get score by classid + score = scores[classID] + + # ignore if score is too low + if score >= min_score: + box = detection[0:4] * np.array([W, H, W, H]) + (centerX, centerY, width, height)= box.astype("int") + + x = int(centerX - (width / 2)) + y = int(centerY - (height / 2)) + + boxes.append([x, y, int(width), int(height)]) + confidences.append(float(score)) + classIDs.append(classID) + +# run nms using opencv.dnn module +idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) + +# render on image +idxs = array(idxs) +box_seq = idxs.flatten() +if len(idxs) > 0: + for seq in box_seq: + (x, y) = (boxes[seq][0], boxes[seq][1]) + (w, h) = (boxes[seq][2], boxes[seq][3]) + + # draw what you want + color = colors[classIDs[seq]] + cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) + text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) + cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) ``` +## 选项 -## scores解析错误 +### scores解析错误 ```python - for output in layerOutputs: - for detection in output: - scores = detection[5:] - # class id - classID = np.argmax(scores) - # get score - score = detection[4] - - # ignore if score is too low - if score >= min_score: - box = detection[0:4] * np.array([W, H, W, H]) - (centerX, centerY, width, height)= box.astype("int") - - x = int(centerX - (width / 2)) - y = int(centerY - (height / 2)) - - boxes.append([x, y, int(width), int(height)]) - confidences.append(float(score)) - classIDs.append(classID) - - # run nms using opencv.dnn module - idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) - - # render on image - idxs = array(idxs) - box_seq = idxs.flatten() - if len(idxs) > 0: - for seq in box_seq: - (x, y) = (boxes[seq][0], boxes[seq][1]) - (w, h) = (boxes[seq][2], boxes[seq][3]) - - # draw what you want - color = colors[classIDs[seq]] - cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) - text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) - cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) +for output in layerOutputs: + for detection in output: + scores = detection[5:] + # class id + classID = np.argmax(scores) + # get score + score = detection[4] + + # ignore if score is too low + if score >= min_score: + box = detection[0:4] * np.array([W, H, W, H]) + (centerX, centerY, width, height)= box.astype("int") + + x = int(centerX - (width / 2)) + y = int(centerY - (height / 2)) + + boxes.append([x, y, int(width), int(height)]) + confidences.append(float(score)) + classIDs.append(classID) + +# run nms using opencv.dnn module +idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) + +# render on image +idxs = array(idxs) +box_seq = idxs.flatten() +if len(idxs) > 0: + for seq in box_seq: + (x, y) = (boxes[seq][0], boxes[seq][1]) + (w, h) = (boxes[seq][2], boxes[seq][3]) + + # draw what you want + color = colors[classIDs[seq]] + cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) + text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) + cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) ``` -## box坐标没有还原到原始输入尺寸 +### box坐标没有还原到原始输入尺寸 ```python - for output in layerOutputs: - for detection in output: - scores = detection[5:] - # class id - classID = np.argmax(scores) - # get score by classid - score = scores[classID] - - # ignore if score is too low - if score >= min_score: - box = detection[0:4] - (centerX, centerY, width, height)= box.astype("int") - - x = int(centerX - (width / 2)) - y = int(centerY - (height / 2)) - - boxes.append([x, y, int(width), int(height)]) - confidences.append(float(score)) - classIDs.append(classID) - - # run nms using opencv.dnn module - idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) - - # render on image - idxs = array(idxs) - box_seq = idxs.flatten() - if len(idxs) > 0: - for seq in box_seq: - (x, y) = (boxes[seq][0], boxes[seq][1]) - (w, h) = (boxes[seq][2], boxes[seq][3]) - - # draw what you want - color = colors[classIDs[seq]] - cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) - text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) - cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) +for output in layerOutputs: + for detection in output: + scores = detection[5:] + # class id + classID = np.argmax(scores) + # get score by classid + score = scores[classID] + + # ignore if score is too low + if score >= min_score: + box = detection[0:4] + (centerX, centerY, width, height)= box.astype("int") + + x = int(centerX - (width / 2)) + y = int(centerY - (height / 2)) + + boxes.append([x, y, int(width), int(height)]) + confidences.append(float(score)) + classIDs.append(classID) + +# run nms using opencv.dnn module +idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) + +# render on image +idxs = array(idxs) +box_seq = idxs.flatten() +if len(idxs) > 0: + for seq in box_seq: + (x, y) = (boxes[seq][0], boxes[seq][1]) + (w, h) = (boxes[seq][2], boxes[seq][3]) + + # draw what you want + color = colors[classIDs[seq]] + cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) + text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) + cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) ``` -## box左上角坐标解析错误 +### box左上角坐标解析错误 ```python - for output in layerOutputs: - for detection in output: - scores = detection[5:] - # class id - classID = np.argmax(scores) - # get score by classid - score = scores[classID] - - # ignore if score is too low - if score >= min_score: - box = detection[0:4] * np.array([W, H, W, H]) - (x, y, width, height)= box.astype("int") - - boxes.append([x, y, int(width), int(height)]) - confidences.append(float(score)) - classIDs.append(classID) - - # run nms using opencv.dnn module - idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) - - # render on image - idxs = array(idxs) - box_seq = idxs.flatten() - if len(idxs) > 0: - for seq in box_seq: - (x, y) = (boxes[seq][0], boxes[seq][1]) - (w, h) = (boxes[seq][2], boxes[seq][3]) - - # draw what you want - color = colors[classIDs[seq]] - cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) - text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) - cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) +for output in layerOutputs: + for detection in output: + scores = detection[5:] + # class id + classID = np.argmax(scores) + # get score by classid + score = scores[classID] + + # ignore if score is too low + if score >= min_score: + box = detection[0:4] * np.array([W, H, W, H]) + (x, y, width, height)= box.astype("int") + + boxes.append([x, y, int(width), int(height)]) + confidences.append(float(score)) + classIDs.append(classID) + +# run nms using opencv.dnn module +idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3) + +# render on image +idxs = array(idxs) +box_seq = idxs.flatten() +if len(idxs) > 0: + for seq in box_seq: + (x, y) = (boxes[seq][0], boxes[seq][1]) + (w, h) = (boxes[seq][2], boxes[seq][3]) + + # draw what you want + color = colors[classIDs[seq]] + cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) + text = "{}: {:.3f}".format(LABELS[classIDs[seq]], confidences[seq]) + cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1) ```