From 49ba9fc7dbf069516ec277736b0ceaaba949afa5 Mon Sep 17 00:00:00 2001 From: feilong Date: Fri, 17 Dec 2021 19:22:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A4=84=E7=90=86=E5=9B=BE=E7=89=87=E8=B7=AF?= =?UTF-8?q?=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../classification.md" | 196 +++++++++--------- main.py | 4 +- 2 files changed, 100 insertions(+), 100 deletions(-) diff --git "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" index 66617e1..44b5a56 100755 --- "a/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" +++ "b/data/1.OpenCV\345\210\235\351\230\266/7.OpenCV\344\270\255\347\232\204\346\267\261\345\272\246\345\255\246\344\271\240/1.\345\233\276\345\203\217\345\210\206\347\261\273/classification.md" @@ -1,98 +1,98 @@ -# opencv.dnn做图像分类 - -图像分类是基于深度学习的计算机视觉任务中最简单、也是最基础的一类,它其中用到的CNN特征提取技术也是目标检测、目标分割等视觉任务的基础。 - -![](./result.png) - -具体到图像分类任务而言,其具体流程如下: -1. 输入指定大小RGB图像,1/3通道,宽高一般相等 -2. 通过卷积神经网络进行多尺度特征提取,生成高维特征值 -3. 利用全连接网络、或其他结构对高维特征进行分类,输出各目标分类的概率值(概率和为1) -4. 选择概率值最高的作为图像分类结果 - -![](./classification.png) - -`opencv.dnn`模块可以直接加载深度学习模型,并进行推理输出运行结果。下面是opencv.dnn模块加载googlenet caffe模型进行图片分类的代码,请你完善其中TO-DO部分的代码。 - -> 代码中LABEL_MAP是图像分类名称字典,给定索引得到具体分类名称(string)。 - -```python -import cv2 -import numpy as np -from labels import LABEL_MAP # 1000 labels in imagenet dataset - -# caffe model, googlenet aglo -weights = "bvlc_googlenet.caffemodel" -protxt = "bvlc_googlenet.prototxt" - -# read caffe model from disk -net = cv2.dnn.readNetFromCaffe(protxt, weights) - -# create input -image = cv2.imread("ocean-liner.jpg") -blob = cv2.dnn.blobFromImage(image, 1.0, (224, 224), (104, 117, 123), False, crop=False) -result = np.copy(image) - -# run! -net.setInput(blob) -out = net.forward() - -# TO-DO your code... - -# time cost -t, _ = net.getPerfProfile() -label = 'cost time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency()) -cv2.putText(result, label, (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2) - -# render on image -label = '%s: %.4f' % (LABEL_MAP[classId] if LABEL_MAP else 'Class #%d' % classId, confidence) -cv2.putText(result, label, (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) - -show_img = np.hstack((image, result)) - -# normal codes in opencv -cv2.imshow("Image", show_img) -cv2.waitKey(0) - -``` - - -## 答案 - -```python -# output probability, find the right index -out = out.flatten() -classId = np.argmax(out) -confidence = out[classId] - -``` - -## 输出理解错误 - -```python -# output probability, find the right index -classId = out[0] -confidence = out[1] - -``` - - -## 输出维度理解错误 - -```python -# output probability, find the right index -classId = np.argmax(out) -confidence = out[classId] - -``` - - -## 输出理解错误 - -```python -# output probability, find the right index -out = out.flatten() -classId = np.argmax(out[1:]) -confidence = out[classId + 1] - -``` \ No newline at end of file +# opencv.dnn做图像分类 + +图像分类是基于深度学习的计算机视觉任务中最简单、也是最基础的一类,它其中用到的CNN特征提取技术也是目标检测、目标分割等视觉任务的基础。 + +![](https://gitcode.net/csdn/skill_tree_opencv/-/raw/master/data/1.OpenCV初阶/7.OpenCV中的深度学习/1.图像分类/result.png) + +具体到图像分类任务而言,其具体流程如下: +1. 输入指定大小RGB图像,1/3通道,宽高一般相等 +2. 通过卷积神经网络进行多尺度特征提取,生成高维特征值 +3. 利用全连接网络、或其他结构对高维特征进行分类,输出各目标分类的概率值(概率和为1) +4. 选择概率值最高的作为图像分类结果 + +![](https://gitcode.net/csdn/skill_tree_opencv/-/raw/master/data/1.OpenCV初阶/7.OpenCV中的深度学习/1.图像分类/classification.png) + +`opencv.dnn`模块可以直接加载深度学习模型,并进行推理输出运行结果。下面是opencv.dnn模块加载googlenet caffe模型进行图片分类的代码,请你完善其中TO-DO部分的代码。 + +> 代码中LABEL_MAP是图像分类名称字典,给定索引得到具体分类名称(string)。 + +```python +import cv2 +import numpy as np +from labels import LABEL_MAP # 1000 labels in imagenet dataset + +# caffe model, googlenet aglo +weights = "bvlc_googlenet.caffemodel" +protxt = "bvlc_googlenet.prototxt" + +# read caffe model from disk +net = cv2.dnn.readNetFromCaffe(protxt, weights) + +# create input +image = cv2.imread("ocean-liner.jpg") +blob = cv2.dnn.blobFromImage(image, 1.0, (224, 224), (104, 117, 123), False, crop=False) +result = np.copy(image) + +# run! +net.setInput(blob) +out = net.forward() + +# TO-DO your code... + +# time cost +t, _ = net.getPerfProfile() +label = 'cost time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency()) +cv2.putText(result, label, (0, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2) + +# render on image +label = '%s: %.4f' % (LABEL_MAP[classId] if LABEL_MAP else 'Class #%d' % classId, confidence) +cv2.putText(result, label, (0, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + +show_img = np.hstack((image, result)) + +# normal codes in opencv +cv2.imshow("Image", show_img) +cv2.waitKey(0) + +``` + + +## 答案 + +```python +# output probability, find the right index +out = out.flatten() +classId = np.argmax(out) +confidence = out[classId] + +``` + +## 输出理解错误 + +```python +# output probability, find the right index +classId = out[0] +confidence = out[1] + +``` + + +## 输出维度理解错误 + +```python +# output probability, find the right index +classId = np.argmax(out) +confidence = out[classId] + +``` + + +## 输出理解错误 + +```python +# output probability, find the right index +out = out.flatten() +classId = np.argmax(out[1:]) +confidence = out[classId + 1] + +``` diff --git a/main.py b/main.py index c1f1a6e..fa35366 100644 --- a/main.py +++ b/main.py @@ -27,5 +27,5 @@ if __name__ == '__main__': # doc = DocWalker('doc') # doc.walk() - # img = ImgWalker('data') - # img.walk() + img = ImgWalker('data') + img.walk() -- GitLab