Merge pull request #13670 from allnes:dnn_fix_obj_detect_sample

f46cd9db · Alexander Alekhin · 37a5af36 · cca2c4b3 · f46cd9db · f46cd9db
隐藏空白更改
内联并排

Showing with 35 addition and 57 deletion

samples/dnn/object_detection.cpp samples/dnn/object_detection.cpp +27 -39

samples/dnn/object_detection.py samples/dnn/object_detection.py +8 -18

未找到文件。
--- a/samples/dnn/object_detection.cpp
+++ b/samples/dnn/object_detection.cpp
@@ -153,51 +153,39 @@ void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
    std::vector<int> classIds;
    std::vector<float> confidences;
    std::vector<Rect> boxes;
-    if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
+    if (outLayerType == "DetectionOutput")
    {
        // Network produces output blob with a shape 1x1xNx7 where N is a number of
        // detections and an every detection is a vector of values
        // [batchId, classId, confidence, left, top, right, bottom]
-        CV_Assert(outs.size() == 1);
-        float* data = (float*)outs[0].data;
-        for (size_t i = 0; i < outs[0].total(); i += 7)
+        CV_Assert(outs.size() > 0);
+        for (size_t k = 0; k < outs.size(); k++)
        {
-            float confidence = data[i + 2];
-            if (confidence > confThreshold)
+            float* data = (float*)outs[k].data;
+            for (size_t i = 0; i < outs[k].total(); i += 7)
            {
-                int left = (int)data[i + 3];
-                int top = (int)data[i + 4];
-                int right = (int)data[i + 5];
-                int bottom = (int)data[i + 6];
-                int width = right - left + 1;
-                int height = bottom - top + 1;
-                classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
-                boxes.push_back(Rect(left, top, width, height));
-                confidences.push_back(confidence);
-            }
-        }
-    }
-    else if (outLayerType == "DetectionOutput")
-    {
-        // Network produces output blob with a shape 1x1xNx7 where N is a number of
-        // detections and an every detection is a vector of values
-        // [batchId, classId, confidence, left, top, right, bottom]
-        CV_Assert(outs.size() == 1);
-        float* data = (float*)outs[0].data;
-        for (size_t i = 0; i < outs[0].total(); i += 7)
-        {
-            float confidence = data[i + 2];
-            if (confidence > confThreshold)
-            {
-                int left = (int)(data[i + 3] * frame.cols);
-                int top = (int)(data[i + 4] * frame.rows);
-                int right = (int)(data[i + 5] * frame.cols);
-                int bottom = (int)(data[i + 6] * frame.rows);
-                int width = right - left + 1;
-                int height = bottom - top + 1;
-                classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
-                boxes.push_back(Rect(left, top, width, height));
-                confidences.push_back(confidence);
+                float confidence = data[i + 2];
+                if (confidence > confThreshold)
+                {
+                    int left   = (int)data[i + 3];
+                    int top    = (int)data[i + 4];
+                    int right  = (int)data[i + 5];
+                    int bottom = (int)data[i + 6];
+                    int width  = right - left + 1;
+                    int height = bottom - top + 1;
+                    if (width * height <= 1)
+                    {
+                        left   = (int)(data[i + 3] * frame.cols);
+                        top    = (int)(data[i + 4] * frame.rows);
+                        right  = (int)(data[i + 5] * frame.cols);
+                        bottom = (int)(data[i + 6] * frame.rows);
+                        width  = right - left + 1;
+                        height = bottom - top + 1;
+                    }
+                    classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
+                    boxes.push_back(Rect(left, top, width, height));
+                    confidences.push_back(confidence);
+                }
            }
        }
    }

--- a/samples/dnn/object_detection.py
+++ b/samples/dnn/object_detection.py
@@ -102,7 +102,7 @@ def postprocess(frame, outs):
    classIds = []
    confidences = []
    boxes = []
-    if net.getLayer(0).outputNameToIndex('im_info') != -1:  # Faster-RCNN or R-FCN
+    if lastLayer.type == 'DetectionOutput':
        # Network produces output blob with a shape 1x1xNx7 where N is a number of
        # detections and an every detection is a vector of values
        # [batchId, classId, confidence, left, top, right, bottom]
@@ -116,23 +116,13 @@ def postprocess(frame, outs):
                    bottom = int(detection[6])
                    width = right - left + 1
                    height = bottom - top + 1
-                    classIds.append(int(detection[1]) - 1)  # Skip background label
-                    confidences.append(float(confidence))
-                    boxes.append([left, top, width, height])
-    elif lastLayer.type == 'DetectionOutput':
-        # Network produces output blob with a shape 1x1xNx7 where N is a number of
-        # detections and an every detection is a vector of values
-        # [batchId, classId, confidence, left, top, right, bottom]
-        for out in outs:
-            for detection in out[0, 0]:
-                confidence = detection[2]
-                if confidence > confThreshold:
-                    left = int(detection[3] * frameWidth)
-                    top = int(detection[4] * frameHeight)
-                    right = int(detection[5] * frameWidth)
-                    bottom = int(detection[6] * frameHeight)
-                    width = right - left + 1
-                    height = bottom - top + 1
+                    if width * height <= 1:
+                        left = int(detection[3] * frameWidth)
+                        top = int(detection[4] * frameHeight)
+                        right = int(detection[5] * frameWidth)
+                        bottom = int(detection[6] * frameHeight)
+                        width = right - left + 1
+                        height = bottom - top + 1
                    classIds.append(int(detection[1]) - 1)  # Skip background label
                    confidences.append(float(confidence))
                    boxes.append([left, top, width, height])