From a208c3bf040521a62a703725de82f71911339cab Mon Sep 17 00:00:00 2001
From: Alexander Nesterov <alexander.nesterov@intel.com>
Date: Wed, 12 Dec 2018 16:42:51 -0100
Subject: [PATCH] Fix openpose samples

---
 samples/dnn/openpose.cpp | 59 ++++++++++++++++------------------------
 samples/dnn/openpose.py  | 36 +++++++++++++++++++-----
 2 files changed, 53 insertions(+), 42 deletions(-)
diff --git a/samples/dnn/openpose.cpp b/samples/dnn/openpose.cpp
index b4934d76e4..48e2dc0475 100644
--- a/samples/dnn/openpose.cpp
+++ b/samples/dnn/openpose.cpp
@@ -57,21 +57,26 @@ const int POSE_PAIRS[3][20][2] = {
 int main(int argc, char **argv)
 {
     CommandLineParser parser(argc, argv,
-        "{ h help           | false | print this help message }"
-        "{ p proto          |       | (required) model configuration, e.g. hand/pose.prototxt }"
-        "{ m model          |       | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
-        "{ i image          |       | (required) path to image file (containing a single person, or hand) }"
-        "{ width            |  368  | Preprocess input image by resizing to a specific width. }"
-        "{ height           |  368  | Preprocess input image by resizing to a specific height. }"
-        "{ t threshold      |  0.1  | threshold or confidence value for the heatmap }"
+        "{ h help           | false     | print this help message }"
+        "{ p proto          |           | (required) model configuration, e.g. hand/pose.prototxt }"
+        "{ m model          |           | (required) model weights, e.g. hand/pose_iter_102000.caffemodel }"
+        "{ i image          |           | (required) path to image file (containing a single person, or hand) }"
+        "{ d dataset        |           | specify what kind of model was trained. It could be (COCO, MPI, HAND) depends on dataset. }"
+        "{ width            |  368      | Preprocess input image by resizing to a specific width. }"
+        "{ height           |  368      | Preprocess input image by resizing to a specific height. }"
+        "{ t threshold      |  0.1      | threshold or confidence value for the heatmap }"
+        "{ s scale          |  0.003922 | scale for blob }"
     );
 
     String modelTxt = samples::findFile(parser.get<string>("proto"));
     String modelBin = samples::findFile(parser.get<string>("model"));
     String imageFile = samples::findFile(parser.get<String>("image"));
+    String dataset = parser.get<String>("dataset");
     int W_in = parser.get<int>("width");
     int H_in = parser.get<int>("height");
     float thresh = parser.get<float>("threshold");
+    float scale  = parser.get<float>("scale");
+
     if (parser.get<bool>("help") || modelTxt.empty() || modelBin.empty() || imageFile.empty())
     {
         cout << "A sample app to demonstrate human or hand pose detection with a pretrained OpenPose dnn." << endl;
@@ -79,9 +84,18 @@ int main(int argc, char **argv)
         return 0;
     }
 
-    // read the network model
-    Net net = readNetFromCaffe(modelTxt, modelBin);
+    int midx, npairs, nparts;
+         if (!dataset.compare("COCO")) {  midx = 0; npairs = 17; nparts = 18; }
+    else if (!dataset.compare("MPI"))  {  midx = 1; npairs = 14; nparts = 16; }
+    else if (!dataset.compare("HAND")) {  midx = 2; npairs = 20; nparts = 22; }
+    else
+    {
+        std::cerr << "Can't interpret dataset parameter: " << dataset << std::endl;
+        exit(-1);
+    }
 
+    // read the network model
+    Net net = readNet(modelBin, modelTxt);
     // and the image
     Mat img = imread(imageFile);
     if (img.empty())
@@ -91,39 +105,14 @@ int main(int argc, char **argv)
     }
 
     // send it through the network
-    Mat inputBlob = blobFromImage(img, 1.0 / 255, Size(W_in, H_in), Scalar(0, 0, 0), false, false);
+    Mat inputBlob = blobFromImage(img, scale, Size(W_in, H_in), Scalar(0, 0, 0), false, false);
     net.setInput(inputBlob);
     Mat result = net.forward();
     // the result is an array of "heatmaps", the probability of a body part being in location x,y
 
-    int midx, npairs;
-    int nparts = result.size[1];
     int H = result.size[2];
     int W = result.size[3];
 
-    // find out, which model we have
-    if (nparts == 19)
-    {   // COCO body
-        midx   = 0;
-        npairs = 17;
-        nparts = 18; // skip background
-    }
-    else if (nparts == 16)
-    {   // MPI body
-        midx   = 1;
-        npairs = 14;
-    }
-    else if (nparts == 22)
-    {   // hand
-        midx   = 2;
-        npairs = 20;
-    }
-    else
-    {
-        cerr << "there should be 19 parts for the COCO model, 16 for MPI, or 22 for the hand one, but this model has " << nparts << " parts." << endl;
-        return (0);
-    }
-
     // find the position of the body parts
     vector<Point> points(22);
     for (int n=0; n<nparts; n++)
diff --git a/samples/dnn/openpose.py b/samples/dnn/openpose.py
index 9fcca1350a..e6bb1ba05a 100644
--- a/samples/dnn/openpose.py
+++ b/samples/dnn/openpose.py
@@ -1,5 +1,5 @@
 # To use Inference Engine backend, specify location of plugins:
-# export LD_LIBRARY_PATH=/opt/intel/deeplearning_deploymenttoolkit/deployment_tools/external/mklml_lnx/lib:$LD_LIBRARY_PATH
+# source /opt/intel/computer_vision_sdk/bin/setupvars.sh
 import cv2 as cv
 import numpy as np
 import argparse
@@ -12,10 +12,11 @@ parser.add_argument('--input', help='Path to image or video. Skip to capture fra
 parser.add_argument('--proto', help='Path to .prototxt')
 parser.add_argument('--model', help='Path to .caffemodel')
 parser.add_argument('--dataset', help='Specify what kind of model was trained. '
-                                      'It could be (COCO, MPI) depends on dataset.')
+                                      'It could be (COCO, MPI, HAND) depends on dataset.')
 parser.add_argument('--thr', default=0.1, type=float, help='Threshold value for pose parts heat map')
 parser.add_argument('--width', default=368, type=int, help='Resize input to specific width.')
 parser.add_argument('--height', default=368, type=int, help='Resize input to specific height.')
+parser.add_argument('--scale', default=0.003922, type=float, help='Scale for blob.')
 
 args = parser.parse_args()
 
@@ -30,8 +31,7 @@ if args.dataset == 'COCO':
                    ["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["Neck", "LHip"],
                    ["LHip", "LKnee"], ["LKnee", "LAnkle"], ["Neck", "Nose"], ["Nose", "REye"],
                    ["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"] ]
-else:
-    assert(args.dataset == 'MPI')
+elif args.dataset == 'MPI':
     BODY_PARTS = { "Head": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
                    "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
                    "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "Chest": 14,
@@ -41,11 +41,33 @@ else:
                    ["RElbow", "RWrist"], ["Neck", "LShoulder"], ["LShoulder", "LElbow"],
                    ["LElbow", "LWrist"], ["Neck", "Chest"], ["Chest", "RHip"], ["RHip", "RKnee"],
                    ["RKnee", "RAnkle"], ["Chest", "LHip"], ["LHip", "LKnee"], ["LKnee", "LAnkle"] ]
+else:
+    assert(args.dataset == 'HAND')
+    BODY_PARTS = { "Wrist": 0,
+                   "ThumbMetacarpal": 1, "ThumbProximal": 2, "ThumbMiddle": 3, "ThumbDistal": 4,
+                   "IndexFingerMetacarpal": 5, "IndexFingerProximal": 6, "IndexFingerMiddle": 7, "IndexFingerDistal": 8,
+                   "MiddleFingerMetacarpal": 9, "MiddleFingerProximal": 10, "MiddleFingerMiddle": 11, "MiddleFingerDistal": 12,
+                   "RingFingerMetacarpal": 13, "RingFingerProximal": 14, "RingFingerMiddle": 15, "RingFingerDistal": 16,
+                   "LittleFingerMetacarpal": 17, "LittleFingerProximal": 18, "LittleFingerMiddle": 19, "LittleFingerDistal": 20,
+                 }
+
+    POSE_PAIRS = [ ["Wrist", "ThumbMetacarpal"], ["ThumbMetacarpal", "ThumbProximal"],
+                   ["ThumbProximal", "ThumbMiddle"], ["ThumbMiddle", "ThumbDistal"],
+                   ["Wrist", "IndexFingerMetacarpal"], ["IndexFingerMetacarpal", "IndexFingerProximal"],
+                   ["IndexFingerProximal", "IndexFingerMiddle"], ["IndexFingerMiddle", "IndexFingerDistal"],
+                   ["Wrist", "MiddleFingerMetacarpal"], ["MiddleFingerMetacarpal", "MiddleFingerProximal"],
+                   ["MiddleFingerProximal", "MiddleFingerMiddle"], ["MiddleFingerMiddle", "MiddleFingerDistal"],
+                   ["Wrist", "RingFingerMetacarpal"], ["RingFingerMetacarpal", "RingFingerProximal"],
+                   ["RingFingerProximal", "RingFingerMiddle"], ["RingFingerMiddle", "RingFingerDistal"],
+                   ["Wrist", "LittleFingerMetacarpal"], ["LittleFingerMetacarpal", "LittleFingerProximal"],
+                   ["LittleFingerProximal", "LittleFingerMiddle"], ["LittleFingerMiddle", "LittleFingerDistal"] ]
+
 
 inWidth = args.width
 inHeight = args.height
+inScale = args.scale
 
-net = cv.dnn.readNetFromCaffe(cv.samples.findFile(args.proto), cv.samples.findFile(args.model))
+net = cv.dnn.readNet(cv.samples.findFile(args.proto), cv.samples.findFile(args.model))
 
 cap = cv.VideoCapture(args.input if args.input else 0)
 
@@ -57,12 +79,12 @@ while cv.waitKey(1) < 0:
 
     frameWidth = frame.shape[1]
     frameHeight = frame.shape[0]
-    inp = cv.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
+    inp = cv.dnn.blobFromImage(frame, inScale, (inWidth, inHeight),
                               (0, 0, 0), swapRB=False, crop=False)
     net.setInput(inp)
     out = net.forward()
 
-    assert(len(BODY_PARTS) == out.shape[1])
+    assert(len(BODY_PARTS) <= out.shape[1])
 
     points = []
     for i in range(len(BODY_PARTS)):
-- 
GitLab