提交 1e9b58e5 编写于 作者: OAK中国_官方's avatar OAK中国_官方

Update depthai_blazepose/BlazeposeDepthai.py,...

Update depthai_blazepose/BlazeposeDepthai.py, depthai_blazepose/BlazeposeDepthaiEdge.py, depthai_blazepose/demo.py, depthai_blazepose/BlazeposeRenderer.py, depthai_blazepose/docker_tflite2tensorflow.sh, depthai_blazepose/FPS.py, depthai_blazepose/LICENSE.txt, depthai_blazepose/mediapipe_utils.py, depthai_blazepose/o3d_utils.py, depthai_blazepose/README.md, depthai_blazepose/requirements.txt, depthai_blazepose/template_manager_script.py, depthai_blazepose/.idea/.gitignore, depthai_blazepose/.idea/.name, depthai_blazepose/.idea/depthai_blazepose-main.iml, depthai_blazepose/.idea/misc.xml, depthai_blazepose/.idea/modules.xml, depthai_blazepose/.idea/workspace.xml, depthai_blazepose/.idea/inspectionProfiles/profiles_settings.xml, depthai_blazepose/.idea/inspectionProfiles/Project_Default.xml, depthai_blazepose/__pycache__/BlazeposeDepthaiEdge.cpython-38.pyc, depthai_blazepose/__pycache__/FPS.cpython-38.pyc, depthai_blazepose/__pycache__/BlazeposeRenderer.cpython-38.pyc, depthai_blazepose/__pycache__/mediapipe_utils.cpython-38.pyc, depthai_blazepose/__pycache__/o3d_utils.cpython-38.pyc, depthai_blazepose/custom_models/DetectionBestCandidate_sh1.blob, depthai_blazepose/custom_models/DivideBy255_sh1.blob, depthai_blazepose/custom_models/convert_model.sh, depthai_blazepose/custom_models/DetectionBestCandidate.py, depthai_blazepose/custom_models/DivideBy255.py, depthai_blazepose/custom_models/README.md, depthai_blazepose/examples/semaphore_alphabet/medias/video.mp4, depthai_blazepose/examples/semaphore_alphabet/medias/semaphore.gif, depthai_blazepose/examples/semaphore_alphabet/medias/semaphore.mp4, depthai_blazepose/examples/semaphore_alphabet/demo.py, depthai_blazepose/examples/semaphore_alphabet/README.md, depthai_blazepose/img/pipeline_edge_mode.png, depthai_blazepose/img/full_body_landmarks.png, depthai_blazepose/img/pipeline_host_mode.png, depthai_blazepose/img/3d_visualizations.gif, depthai_blazepose/img/taichi.gif, depthai_blazepose/img/3d_world_visualization.gif, depthai_blazepose/models/convert_models.sh, depthai_blazepose/models/gen_blob_shave.sh, depthai_blazepose/models/pose_landmark_lite_sh4.blob, depthai_blazepose/models/pose_detection_sh4.blob, depthai_blazepose/models/pose_landmark_full_sh4.blob, depthai_blazepose/models/pose_landmark_heavy_sh4.blob files
Deleted depthai_blazepose/.gitkeep
上级 9134866c
# Default ignored files
/shelf/
/workspace.xml
template_manager_script.py
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8 (oak)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyChainedComparisonsInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoreConstantInTheMiddle" value="true" />
</inspection_tool>
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="7">
<item index="0" class="java.lang.String" itemvalue="opencv-python" />
<item index="1" class="java.lang.String" itemvalue="tensorflow" />
<item index="2" class="java.lang.String" itemvalue="moviepy" />
<item index="3" class="java.lang.String" itemvalue="numpy" />
<item index="4" class="java.lang.String" itemvalue="opencv-contrib-python" />
<item index="5" class="java.lang.String" itemvalue="depthai" />
<item index="6" class="java.lang.String" itemvalue="imutils" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="W605" />
<option value="E501" />
<option value="E701" />
<option value="E122" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N802" />
<option value="N806" />
<option value="N801" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="cv2.line" />
<option value="cv2.resize" />
<option value="cv2.getAffineTransform" />
<option value="cv2.transform" />
<option value="int.__getitem__" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (oak)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/depthai_blazepose-main.iml" filepath="$PROJECT_DIR$/.idea/depthai_blazepose-main.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="9fc8af4f-996c-4e90-aa09-6d074e70188d" name="Changes" comment="" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="MarkdownSettingsMigration">
<option name="stateVersion" value="1" />
</component>
<component name="ProjectId" id="2Gn4pNnAg7QmKce8KYu35DIdlEP" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;last_opened_file_path&quot;: &quot;D:/DDSix/最终部署现场/dd_six_kit_alg&quot;,
&quot;settings.editor.selected.configurable&quot;: &quot;com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable&quot;
}
}</component>
<component name="RunManager">
<configuration name="demo" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
<module name="depthai_blazepose-main" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/demo.py" />
<option name="PARAMETERS" value="-e --lm_m lite" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="9fc8af4f-996c-4e90-aa09-6d074e70188d" name="Changes" comment="" />
<created>1667011196090</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1667011196090</updated>
</task>
<servers />
</component>
</project>
此差异已折叠。
此差异已折叠。
import cv2
import numpy as np
from o3d_utils import Visu3D
import mediapipe_utils as mpu
# LINE_BODY and COLORS_BODY are used when drawing the skeleton in 3D.
rgb = {"right":(0,1,0), "left":(1,0,0), "middle":(1,1,0)}
LINES_BODY = [[9,10],[4,6],[1,3],
[12,14],[14,16],[16,20],[20,18],[18,16],
[12,11],[11,23],[23,24],[24,12],
[11,13],[13,15],[15,19],[19,17],[17,15],
[24,26],[26,28],[32,30],
[23,25],[25,27],[29,31]]
COLORS_BODY = ["middle","right","left",
"right","right","right","right","right",
"middle","middle","middle","middle",
"left","left","left","left","left",
"right","right","right","left","left","left"]
COLORS_BODY = [rgb[x] for x in COLORS_BODY]
class BlazeposeRenderer:
def __init__(self,
tracker,
show_3d=None,
output=None):
self.tracker = tracker
self.show_3d = show_3d
self.fram = None
self.pause = False
# Rendering flags
self.show_rot_rect = False
self.show_landmarks = True
self.show_score = False
self.show_fps = True
self.show_xyz_zone = self.show_xyz = self.tracker.xyz
if self.show_3d == "mixed" and not self.tracker.xyz:
print("'mixed' 3d visualization needs the tracker to be in 'xyz' mode !")
print("3d visualization falling back to 'world' mode.")
self.show_3d = 'world'
if self.show_3d == "image":
self.vis3d = Visu3D(zoom=0.7, segment_radius=3)
z = min(tracker.img_h, tracker.img_w)/3
self.vis3d.create_grid([0,tracker.img_h,-z],[tracker.img_w,tracker.img_h,-z],[tracker.img_w,tracker.img_h,z],[0,tracker.img_h,z],5,2) # Floor
self.vis3d.create_grid([0,0,z],[tracker.img_w,0,z],[tracker.img_w,tracker.img_h,z],[0,tracker.img_h,z],5,2) # Wall
self.vis3d.init_view()
elif self.show_3d == "world":
self.vis3d = Visu3D(bg_color=(0.2, 0.2, 0.2), zoom=1.1, segment_radius=0.01)
self.vis3d.create_grid([-1,1,-1],[1,1,-1],[1,1,1],[-1,1,1],2,2) # Floor
self.vis3d.create_grid([-1,1,1],[1,1,1],[1,-1,1],[-1,-1,1],2,2) # Wall
self.vis3d.init_view()
elif self.show_3d == "mixed":
self.vis3d = Visu3D(bg_color=(0.4, 0.4, 0.4), zoom=0.7, segment_radius=0.01)
half_length = 3
grid_depth = 5
self.vis3d.create_grid([-half_length,1,0],[half_length,1,0],[half_length,1,grid_depth],[-half_length,1,grid_depth],2*half_length,grid_depth) # Floor
self.vis3d.create_grid([-half_length,1,grid_depth],[half_length,1,grid_depth],[half_length,-1,grid_depth],[-half_length,-1,grid_depth],2*half_length,2) # Wall
self.vis3d.create_camera()
self.vis3d.init_view()
if output is None:
self.output = None
else:
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
self.output = cv2.VideoWriter(output,fourcc,tracker.video_fps,(tracker.img_w, tracker.img_h))
def is_present(self, body, lm_id):
return body.presence[lm_id] > self.tracker.presence_threshold
def draw_landmarks(self, body):
if self.show_rot_rect:
cv2.polylines(self.frame, [np.array(body.rect_points)], True, (0,255,255), 2, cv2.LINE_AA)
if self.show_landmarks:
list_connections = LINES_BODY
lines = [np.array([body.landmarks[point,:2] for point in line]) for line in list_connections if self.is_present(body, line[0]) and self.is_present(body, line[1])]
cv2.polylines(self.frame, lines, False, (255, 180, 90), 2, cv2.LINE_AA)
# for i,x_y in enumerate(body.landmarks_padded[:,:2]):
for i,x_y in enumerate(body.landmarks[:self.tracker.nb_kps,:2]):
if self.is_present(body, i):
if i > 10:
color = (0,255,0) if i%2==0 else (0,0,255)
elif i == 0:
color = (0,255,255)
elif i in [4,5,6,8,10]:
color = (0,255,0)
else:
color = (0,0,255)
cv2.circle(self.frame, (x_y[0], x_y[1]), 4, color, -11)
if self.show_score:
h, w = self.frame.shape[:2]
cv2.putText(self.frame, f"Landmark score: {body.lm_score:.2f}",
(20, h-60),
cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0), 2)
if self.show_xyz and body.xyz_ref:
x0, y0 = body.xyz_ref_coords_pixel.astype(np.int)
x0 -= 50
y0 += 40
cv2.rectangle(self.frame, (x0,y0), (x0+100, y0+85), (220,220,240), -1)
cv2.putText(self.frame, f"X:{body.xyz[0]/10:3.0f} cm", (x0+10, y0+20), cv2.FONT_HERSHEY_PLAIN, 1, (20,180,0), 2)
cv2.putText(self.frame, f"Y:{body.xyz[1]/10:3.0f} cm", (x0+10, y0+45), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 2)
cv2.putText(self.frame, f"Z:{body.xyz[2]/10:3.0f} cm", (x0+10, y0+70), cv2.FONT_HERSHEY_PLAIN, 1, (0,0,255), 2)
if self.show_xyz_zone and body.xyz_ref:
# Show zone on which the spatial data were calculated
cv2.rectangle(self.frame, tuple(body.xyz_zone[0:2]), tuple(body.xyz_zone[2:4]), (180,0,180), 2)
def draw_3d(self, body):
self.vis3d.clear()
self.vis3d.try_move()
self.vis3d.add_geometries()
if body is not None:
points = body.landmarks if self.show_3d == "image" else body.landmarks_world
draw_skeleton = True
if self.show_3d == "mixed":
if body.xyz_ref:
"""
Beware, the y value of landmarks_world coordinates is negative for landmarks
above the mid hips (like shoulders) and negative for landmarks below (like feet).
The y value of (x,y,z) coordinates given by depth sensor is negative in the lower part
of the image and positive in the upper part.
"""
translation = body.xyz / 1000
translation[1] = -translation[1]
if body.xyz_ref == "mid_hips":
points = points + translation
elif body.xyz_ref == "mid_shoulders":
mid_hips_to_mid_shoulders = np.mean([
points[mpu.KEYPOINT_DICT['right_shoulder']],
points[mpu.KEYPOINT_DICT['left_shoulder']]],
axis=0)
points = points + translation - mid_hips_to_mid_shoulders
else:
draw_skeleton = False
if draw_skeleton:
lines = LINES_BODY
colors = COLORS_BODY
for i,a_b in enumerate(lines):
a, b = a_b
if self.is_present(body, a) and self.is_present(body, b):
self.vis3d.add_segment(points[a], points[b], color=colors[i])
self.vis3d.render()
def draw(self, frame, body):
if not self.pause:
self.frame = frame
if body:
self.draw_landmarks(body)
self.body = body
elif self.frame is None:
self.frame = frame
self.body = None
# else: self.frame points to previous frame
if self.show_3d:
self.draw_3d(self.body)
return self.frame
def exit(self):
if self.output:
self.output.release()
def waitKey(self, delay=1):
if self.show_fps:
self.tracker.fps.draw(self.frame, orig=(50,50), size=1, color=(240,180,100))
cv2.imshow("Blazepose", self.frame)
if self.output:
self.output.write(self.frame)
key = cv2.waitKey(delay)
if key == 32:
# Pause on space bar
self.pause = not self.pause
elif key == ord('r'):
self.show_rot_rect = not self.show_rot_rect
elif key == ord('l'):
self.show_landmarks = not self.show_landmarks
elif key == ord('s'):
self.show_score = not self.show_score
elif key == ord('f'):
self.show_fps = not self.show_fps
elif key == ord('x'):
if self.tracker.xyz:
self.show_xyz = not self.show_xyz
elif key == ord('z'):
if self.tracker.xyz:
self.show_xyz_zone = not self.show_xyz_zone
return key
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 22 15:29:32 2017
@author: geaxx
"""
import time
import cv2
def now():
return time.perf_counter()
class FPS: # To measure the number of frame per second
def __init__(self, mean_nb_frames=10):
self.nbf = -1
self.fps = 0
self.start = 0
self.stop = 0
self.local_start = 0
self.mean_nb_frames = mean_nb_frames
def update(self):
if self.nbf%self.mean_nb_frames == 0:
if self.start != 0:
self.stop = now()
self.fps = self.mean_nb_frames/(self.stop-self.local_start)
self.local_start = self.stop
else :
self.start = self.local_start = now()
self.nbf+=1
def get(self):
return self.fps
def get_global(self):
if self.stop == 0: self.stop = now()
return self.nbf/(self.stop-self.start)
def draw(self, win, orig=(10,30), font=cv2.FONT_HERSHEY_SIMPLEX, size=2, color=(0,255,0), thickness=2):
cv2.putText(win,f"FPS={self.get():.2f}",orig,font,size,color,thickness)
MIT License
Copyright (c) [2021] [geaxgx]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# Blazepose tracking with DepthAI
Running Google Mediapipe single body pose tracking models on [DepthAI](https://docs.luxonis.com/en/gen2/) hardware (OAK-1, OAK-D, ...).
The Blazepose landmark models available in this repository are the version "full", "lite" and "heavy" of mediapipe 0.8.6 (2021/07),
The pose detection model comes from mediapipe 0.8.4 and is compatible with the 3 landmark models (the 0.8.6 version currently cannot be converted into a Myriad blob).
<p align="center"> <img src="img/taichi.gif" alst="Demo"></p>
For the challenger Movenet on DepthAI, please visit : [depthai_movenet](https://github.com/geaxgx/depthai_movenet)
For an OpenVINO version of Blazepose, please visit : [openvino_blazepose](https://github.com/geaxgx/openvino_blazepose)
- [Blazepose tracking with DepthAI](#blazepose-tracking-with-depthai)
- [Architecture: Host mode vs Edge mode](#architecture-host-mode-vs-edge-mode)
- [Inferred 3D vs Measured 3D](#inferred-3d-vs-measured-3d)
- [Install](#install)
- [Run](#run)
- [Mediapipe models](#mediapipe-models)
- [Custom models](#custom-models)
- [Landmarks](#landmarks)
- [Code](#code)
- [Examples](#examples)
- [Credits](#credits)
## Architecture: Host mode vs Edge mode
Two modes are available:
- **Host mode :** aside the neural networks that run on the device, almost all the processing is run on the host (the only processing done on the device is the letterboxing operation before the pose detection network when using the device camera as video source). **Use this mode when you want to infer on external input source (videos, images).**
- **Edge mode :** most of the processing (neural networks, post-processings, image manipulations) is run on the device thaks to the depthai scripting node feature. It works only with the device camera but is **definitely the best option when working with the internal camera** (much faster than in Host mode). The data exchanged between the host and the device is minimal: the landmarks of detected body (~3kB/frame). In Edge mode, you can choose not to send the camera video frame to the host (by specifying 'rgb_laconic' as input).
|Landmark model (Edge mode)| FPS (FPS with 'xyz' option)|
|-|-|
|Full|20 (18)|
|Lite|26 (22)|
|Heavy|8 (7)|
<br>
<p align="center"> <img src="img/pipeline_host_mode.png" alt="Host mode"></p>
<p align="center"> <img src="img/pipeline_edge_mode.png" alt="Edge mode"></p>
For depth-capable devices, when measuring the 3D location of a reference point, more nodes are used and not represented here (2 mono cameras, stereo node, spatial location calculator).
*Note : the Edge mode schema is missing a custom NeuralNetwork node between the ImageManip node on the right and the landmark NeuralNetwork. This custom NeuralNetwork runs a very simple model that normalize (divide by 255) the output image from the ImageManip node. This is a temporary fix, should be removed when depthai ImageManip node will support setFrameType(RGBF16F16F16p).*
## Inferred 3D vs Measured 3D
* **Inferred 3D** : the Landmark model is able to infer 3D (x,y,z) landmarks. Actually, the 0.8.6 model yields 2 outputs with 3D landmarks : Identity (accessed via *body.landmarks* with the API) and Identity_4 (accessed via *body.world_landmarks*). It may sound as redundant information but there is a difference: *world landmarks* are real-world 3D coordinates in meters with the origin at the center between hips. *world landmarks* share the same landmark topology as *landmarks*. However, *landmarks* provide coordinates (in pixels) of a 3D object projected onto the 2D image surface, while *world landmarks* provide coordinates (in meters) of the 3D object itself.
* **Measured 3D** : for devices able to measure depth (like OAK-D), we can determine the real 3D location of a point of the image in camera coordinate system. So one idea would be to measure the 3D locations of each inferred 2D body landmarks. It turns out it is not a good idea in practice for at least 2 reasons. First, a inferred keypoint may stand "outside" of its counterpart in the image and therefore in the aligned depth frame. It happens probably more frequently with extremities, and it can be explained by the inaccuracy of the model which is never 100% perfect. Secondly, we can't get depth for hidden keypoints. An alternative solution, implemented here, is to combine the inferred 3D *world landmarks* with the measured 3D location of one reference point, the center between hips. Compared to extremities, this reference point can be more robustly measured.
The image below demonstrates the 3 modes of 3D visualization:
1) **Image mode** (top-right), based on *body.landmarks*. Note that the size of the drawn skeleton depends on the distance camera-body, but that the mid hips reference point is restricted and can only moved inside a plane parallel to the wall grid;
2) **World mode** (bottom-left), based on *body.world_landmarks*. Note the mid hips reference point is fixed and the size of the skeleton does not change;
3) **Mixed mode** (bottom right), mixing *body.world_landmarks* with measured 3D location of the reference point. Like in World mode, the size of the skeleton does not change. But the mid hips reference point is not restricted any more.
<p align="center"> <img src="img/3d_visualizations.gif" alt="3D visualizations"></p>
## Install
Install the python packages (depthai, opencv, open3d) with the following command:
```
python3 -m pip install -r requirements.txt
```
## Run
**Usage:**
```
-> python3 demo.py -h
usage: demo.py [-h] [-e] [-i INPUT] [--pd_m PD_M] [--lm_m LM_M] [-xyz] [-c]
[--no_smoothing] [-f INTERNAL_FPS]
[--internal_frame_height INTERNAL_FRAME_HEIGHT] [-s] [-t]
[--force_detection] [-3 {None,image,mixed,world}]
[-o OUTPUT]
optional arguments:
-h, --help show this help message and exit
-e, --edge Use Edge mode (postprocessing runs on the device)
Tracker arguments:
-i INPUT, --input INPUT
'rgb' or 'rgb_laconic' or path to video/image file to
use as input (default=rgb)
--pd_m PD_M Path to an .blob file for pose detection model
--lm_m LM_M Landmark model ('full' or 'lite' or 'heavy') or path
to an .blob file
-xyz, --xyz Get (x,y,z) coords of reference body keypoint in
camera coord system (only for compatible devices)
-c, --crop Center crop frames to a square shape before feeding
pose detection model
--no_smoothing Disable smoothing filter
-f INTERNAL_FPS, --internal_fps INTERNAL_FPS
Fps of internal color camera. Too high value lower NN
fps (default= depends on the model)
--internal_frame_height INTERNAL_FRAME_HEIGHT
Internal color camera frame height in pixels
(default=640)
-s, --stats Print some statistics at exit
-t, --trace Print some debug messages
--force_detection Force person detection on every frame (never use
landmarks from previous frame to determine ROI)
Renderer arguments:
-3 {None,image,mixed,world}, --show_3d {None,image,mixed,world}
Display skeleton in 3d in a separate window. See
README for description.
-o OUTPUT, --output OUTPUT
Path to output video file
```
**Examples :**
- To use default internal color camera as input with the model "full" in Host mode:
```python3 demo.py```
- To use default internal color camera as input with the model "full" in Edge mode [**preferred**]:
```python3 demo.py -e```
- To use a file (video or image) as input :
```python3 demo.py -i filename```
- To use the model "lite" :
```python3 demo.py -lm_m lite```
- To measure body spatial location in camera coordinate system (only for depth-capable device like OAK-D):
```python3 demo.py -e -xyz```
The measure is made only on one reference point:
- the middle of the hips if both hips are visible;
- the middle of the shoulders if hips are not visible and both shoulders are visible.
- To show the skeleton in 3D 'world' mode (-xyz flag needed):
```python3 demo.py -e -xyz -3 world```
<p align="center"> <img src="img/3d_world_visualization.gif" alt="World mode"></p>
Note that the floor and wall grids does not correspond to a real floor and wall. Each grid square size is 1m x 1m.
- When using the internal camera, to change its FPS to 15 :
```python3 demo.py --internal_fps 15```
Note: by default, the default internal camera FPS depends on the model, the mode (Edge vs Host), the use of depth ("-xyz"). These default values are based on my own observations. **Please, don't hesitate to play with this parameter to find the optimal value.** If you observe that your FPS is well below the default value, you should lower the FPS with this option until the set FPS is just above the observed FPS.
- When using the internal camera, you probably don't need to work with the full resolution. You can set a lower resolution (and win a bit of FPS) by using this option:
```python3 demo.py --internal_frame_size 450```
Note: currently, depthai supports only some possible values for this argument. The value you specify will be replaced by the closest possible value (here 432 instead of 450).
- By default, temporal filters smooth the landmark positions. Use *--no_smoothing* to disable the filter.
|Keypress in OpenCV window|Function|
|-|-|
|*Esc*|Exit|
|*space*|Pause|
|r|Show/hide the bounding rotated rectangle around the body|
|l|Show/hide landmarks|
|s|Show/hide landmark score|
|f|Show/hide FPS|
|x|Show/hide (x,y,z) coordinates (only on depth-capable devices and if using "-xyz" flag)|
|z|Show/hide the square zone used to measure depth (only on depth-capable devices and if using "-xyz" flag)|
If using a 3D visualization mode ("-3" or "--show_3d"):
|Keypress in Open3d window|Function|
|-|-|
|o|Oscillating (rotating back and forth) of the view|
|r|Continuous rotating of the view|
|s|Stop oscillating or rotating|
|*Up*|Increasing rotating or oscillating speed|
|*Down*|Decreasing rotating or oscillating speed|
|*Right* or *Left*|Change the point of view to a predefined position|
|*Mouse*|Freely change the point of view|
## Mediapipe models
You can directly find the model files (.xml and .bin) under the 'models' directory. Below I describe how to get the files in case you need to regenerate the models.
1) Clone this github repository in a local directory (DEST_DIR)
2) In DEST_DIR/models directory, download the tflite models from [this archive](https://drive.google.com/file/d/1bEL4zmh2PEFsRfmFOofP0rbGNo-O1p5_/view?usp=sharing). The archive contains:
* Pose detection model from Mediapipe 0.8.4,
* Full, Lite anf Hevay pose landmark modelfrom Mediapipe 0.8.6.
*Note: the Pose detection model from Mediapipe 0.8.6 can't currently be converted (more info [here](https://github.com/PINTO0309/tflite2tensorflow/issues/11)).*
3) Install the amazing [PINTO's tflite2tensorflow tool](https://github.com/PINTO0309/tflite2tensorflow). Use the docker installation which includes many packages including a recent version of Openvino.
3) From DEST_DIR, run the tflite2tensorflow container: ```./docker_tflite2tensorflow.sh```
4) From the running container:
```
cd workdir/models
./convert_models.sh
```
The *convert_models.sh* converts the tflite models in tensorflow (.pb), then converts the pb file into Openvino IR format (.xml and .bin), and finally converts the IR files in MyriadX format (.blob).
5) By default, the number of SHAVES associated with the blob files is 4. In case you want to generate new blobs with different number of shaves, you can use the script *gen_blob_shave.sh*:
```
# Example: to generate blobs for 6 shaves
./gen_blob_shave.sh -m pd -n 6 # will generate pose_detection_sh6.blob
./gen_blob_shave.sh -m full -n 6 # will generate pose_landmark_full_sh6.blob
```
**Explanation about the Model Optimizer params :**
- The preview of the OAK-* color camera outputs BGR [0, 255] frames . The original tflite pose detection model is expecting RGB [-1, 1] frames. ```--reverse_input_channels``` converts BGR to RGB. ```--mean_values [127.5,127.5,127.5] --scale_values [127.5,127.5,127.5]``` normalizes the frames between [-1, 1].
- The original landmark model is expecting RGB [0, 1] frames. Therefore, the following arguments are used ```--reverse_input_channels```, but unlike the detection model, we choose to do the normalization in the python code and not in the models (via ```--scale_values```). Indeed, we have observed a better accuracy with FP16 models when doing the normalization of the inputs outside of the models ([a possible explanation](https://github.com/PINTO0309/tflite2tensorflow/issues/9#issuecomment-842460014)).
## Custom models
The `custom_models` directory contains the code to build the following custom models:
- DetectionBestCandidate: this model processes the outputs of the pose detection network (a 1x2254x1 tensor for the scores and a 1x2254x12 for the regressors) and yields the regressor with the highest score.
- DivideBy255: this model transforms an 256x256 RGB888p ([0, 255]) image to a 256x256 RGBF16F16F16p image ([0., 1.]).
The method used to build these models is well explained on the [rahulrav's blog](https://rahulrav.com/blog/depthai_camera.html).
## Landmarks
<p align="center">
<img src="img/full_body_landmarks.png">
</p>
<p align="center">
<a href="https://google.github.io/mediapipe/solutions/pose.html#pose-landmark-model-blazepose-ghum-3d">Source</a>
</p>
## Code
To facilitate reusability, the code is splitted in 2 classes:
- **BlazeposeDepthai**, which is responsible of computing the body landmarks. The importation of this class depends on the mode:
```
# For Host mode:
from BlazeposeDepthai import BlazeposeDepthai
```
```
# For Edge mode:
from BlazeposeDepthaiEdge import BlazeposeDepthai
```
- **BlazeposeRenderer**, which is responsible of rendering the landmarks and the skeleton on the video frame.
This way, you can replace the renderer from this repository and write and personalize your own renderer (for some projects, you may not even need a renderer).
The file ```demo.py``` is a representative example of how to use these classes:
```
from BlazeposeDepthaiEdge import BlazeposeDepthai
from BlazeposeRenderer import BlazeposeRenderer
# The argparse stuff has been removed to keep only the important code
tracker = BlazeposeDepthai(input_src=args.input,
pd_model=args.pd_m,
lm_model=args.lm_m,
smoothing=not args.no_smoothing,
xyz=args.xyz,
crop=args.crop,
internal_fps=args.internal_fps,
internal_frame_height=args.internal_frame_height,
force_detection=args.force_detection,
stats=args.stats,
trace=args.trace)
renderer = BlazeposeRenderer(
pose,
show_3d=args.show_3d,
output=args.output)
while True:
# Run blazepose on next frame
frame, body = tracker.next_frame()
if frame is None: break
# Draw 2d skeleton
frame = renderer.draw(frame, body)
key = renderer.waitKey(delay=1)
if key == 27 or key == ord('q'):
break
renderer.exit()
tracker.exit()
```
For more information on:
- the arguments of the tracker, please refer to the docstring of class `BlazeposeDepthai` or `BlazeposeDepthaiEdge` in `BlazeposeDepthai.py` or `BlazeposeDepthaiEdge.py`;
- the attributes of the 'body' element you can exploit in your program, please refer to the doctring of class `Body` in `mediapipe_utils.py`.
## Examples
|||
|-|-|
|[Semaphore alphabet](examples/semaphore_alphabet) |<img src="examples/semaphore_alphabet/medias/semaphore.gif" alt="Sempahore alphabet" width="200"/>|
## Credits
* [Google Mediapipe](https://github.com/google/mediapipe)
* Katsuya Hyodo a.k.a [Pinto](https://github.com/PINTO0309), the Wizard of Model Conversion !
* [Tai Chi Step by Step For Beginners Training Session 4](https://www.youtube.com/watch?v=oawZ_7wNWrU&ab_channel=MasterSongKungFu)
* [Semaphore with The RCR Museum](https://www.youtube.com/watch?v=DezaTjQYPh0&ab_channel=TheRoyalCanadianRegimentMuseum)
import torch
import torch.nn as nn
import sys, os
sys.path.insert(1, os.path.realpath(os.path.pardir))
from mediapipe_utils import generate_blazepose_anchors
import numpy as np
from math import pi
anchors = generate_blazepose_anchors()
print(f"Nb anchors: {len(anchors)}")
detection_input_length = 224
class DetectionBestCandidate(nn.Module):
def __init__(self):
super(DetectionBestCandidate, self).__init__()
# anchors shape is nb_anchorsx4 [x_center, y_center, width, height]
# Here: width and height is always 1, so we keep just [x_center, y_center]
self.anchors = torch.from_numpy(anchors[:,:2])
self.plus_anchor_center = np.array([[1,0,0,0,1,0,1,0,1,0,1,0], [0,1,0,0,0,1,0,1,0,1,0,1]], dtype=np.float)
self.plus_anchor_center = torch.from_numpy(self.plus_anchor_center)
def forward(self, x, y):
# x.shape: 1xnb_anchorsx1
# y.shape: 1xnb_anchorsx12
# decode_bboxes
best_id = torch.argmax(x)
score = x[0][best_id]
score = torch.sigmoid(score)
bbox = y[0,best_id]
bbox = bbox/detection_input_length + torch.mm(self.anchors[best_id].unsqueeze(0), self.plus_anchor_center)[0]
# bbox : the 4 first elements describe the square bounding box around the head (not used for determining rotated rectangle around the body)
# The 8 following corresponds to 4 (x,y) normalized keypoints coordinates (useful for determining rotated rectangle)
# For full body, keypoints 1 and 2 are used.
# For upper body, keypoints 3 and 4 are used.
# Here we are only interested in full body
# detections_to_rect (1st part)
sqn_rr_center_xy = bbox[4:6]
sqn_scale_xy = bbox[6:8]
return torch.cat((score, sqn_rr_center_xy, sqn_scale_xy))
def test():
model = DetectionBestCandidate()
X = torch.randn(1, len(anchors), 1, dtype=torch.float)
Y = torch.randn(1, len(anchors), 12, dtype=torch.float)
result = model(X, Y)
print(result)
def export_onnx():
"""
Exports the model to an ONNX file.
"""
model = DetectionBestCandidate()
X = torch.randn(1, len(anchors), 1, dtype=torch.float)
Y = torch.randn(1, len(anchors), 12, dtype=torch.float)
onnx_name = "DetectionBestCandidate.onnx"
print(f"Generating {onnx_name}")
torch.onnx.export(
model,
(X, Y),
onnx_name,
opset_version=10,
do_constant_folding=True,
# verbose=True,
input_names=['Identity_1', 'Identity'],
output_names=['result']
)
if __name__ == "__main__":
test()
export_onnx()
\ No newline at end of file
import torch
import torch.nn as nn
import sys, os
sys.path.insert(1, os.path.realpath(os.path.pardir))
import numpy as np
from math import pi
detection_input_length = 224
class DivideBy255(nn.Module):
def __init__(self):
super(DivideBy255, self).__init__()
def forward(self, x):
return x / 255.0
def test():
model = DivideBy255()
X = torch.ones(1, 3, 256, 256, dtype=torch.float)
result = model(X)
print(result)
def export_onnx():
"""
Exports the model to an ONNX file.
"""
model = DivideBy255()
X = torch.randn(1, 3, 256, 256, dtype=torch.float)
onnx_name = "DivideBy255.onnx"
print(f"Generating {onnx_name}")
torch.onnx.export(
model,
(X),
onnx_name,
opset_version=10,
do_constant_folding=True,
# verbose=True,
# input_names=['Identity_1', 'Identity'],
output_names=['input_1']
)
if __name__ == "__main__":
test()
export_onnx()
\ No newline at end of file
## Generation of DetectionBestCandidate.blob
1) pytorch is needed
2) Generate the ONNX model *DetectionBestCandidate.onnx* :
```
python DetectionBestCandidate.py
```
3) Start the tflite2tensorflow docker container:
```
../docker_tflite2tensorflow.sh
```
4) From the container shell:
```
cd workdir
./convert_model.sh -m DetectionBestCandidate # Use `-n #` if you want to change the number of shaves
```
## Generation of DivideBy255.blob
Same as above with 'DivideBy255' instead of 'DetectionBestCandidate'.
# This script has to be run from the docker container started by ./docker_tflite2tensorflow.sh
usage ()
{
echo "Generate a blob from an ONNX model with a specified number of shaves and cmx (nb cmx = nb shaves)"
echo
echo "Usage: ${0} [-m model] [-n nb_shaves]"
echo
echo "model: DetectionBestCandidate or DivideBy255 or other"
echo "nb_shaves must be between 1 and 13 (default=1)"
}
while getopts ":hm:n:" opt; do
case ${opt} in
h )
usage
exit 0
;;
m )
model=$OPTARG
;;
n )
nb_shaves=$OPTARG
;;
: )
echo "Error: -$OPTARG requires an argument."
usage
exit 1
;;
\? )
echo "Invalid option: -$OPTARG"
usage
exit 1
;;
esac
done
if [ -z "$model" ]
then
usage
exit 1
fi
if [ ! -f $model_onnx ]
then
echo "The model ${model_onnx} does not exist"
echo "Have you run 'python DetectionBestCandidate.py' ?"
exit 1
fi
if [ -z "$nb_shaves" ]
then
nb_shaves=1
fi
if [ $nb_shaves -lt 1 -o $nb_shaves -gt 13 ]
then
echo "Invalid number of shaves !"
usage
exit 1
fi
if [ $model = DivideBy255 ]
then
precision=U8
else
precision=FP16
fi
model_onnx="${model}.onnx"
model_xml="${model}.xml"
model_blob="${model}_sh${nb_shaves}.blob"
echo Model: $model_xml $model_blob
echo Shaves $nb_shaves
ARG=$1
source /opt/intel/openvino_2021/bin/setupvars.sh
# Use FP16 and make the batch_size explicit.
python /opt/intel/openvino_2021/deployment_tools/model_optimizer/mo_onnx.py \
--input_model $model_onnx --data_type half
/opt/intel/openvino_2021/deployment_tools/tools/compile_tool/compile_tool -d MYRIAD \
-m $model_xml \
-ip $precision \
-VPU_NUMBER_OF_SHAVES $nb_shaves \
-VPU_NUMBER_OF_CMX_SLICES $nb_shaves \
-o $model_blob
#!/usr/bin/env python3
from BlazeposeRenderer import BlazeposeRenderer
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--edge', action="store_true",
help="Use Edge mode (postprocessing runs on the device)")
parser_tracker = parser.add_argument_group("Tracker arguments")
parser_tracker.add_argument('-i', '--input', type=str, default="rgb",
help="'rgb' or 'rgb_laconic' or path to video/image file to use as input (default=%(default)s)")
parser_tracker.add_argument("--pd_m", type=str,
help="Path to an .blob file for pose detection model")
parser_tracker.add_argument("--lm_m", type=str,
help="Landmark model ('full' or 'lite' or 'heavy') or path to an .blob file")
parser_tracker.add_argument('-xyz', '--xyz', action="store_true",
help="Get (x,y,z) coords of reference body keypoint in camera coord system (only for compatible devices)")
parser_tracker.add_argument('-c', '--crop', action="store_true",
help="Center crop frames to a square shape before feeding pose detection model")
parser_tracker.add_argument('--no_smoothing', action="store_true",
help="Disable smoothing filter")
parser_tracker.add_argument('-f', '--internal_fps', type=int,
help="Fps of internal color camera. Too high value lower NN fps (default= depends on the model)")
parser_tracker.add_argument('--internal_frame_height', type=int, default=640,
help="Internal color camera frame height in pixels (default=%(default)i)")
parser_tracker.add_argument('-s', '--stats', action="store_true",
help="Print some statistics at exit")
parser_tracker.add_argument('-t', '--trace', action="store_true",
help="Print some debug messages")
parser_tracker.add_argument('--force_detection', action="store_true",
help="Force person detection on every frame (never use landmarks from previous frame to determine ROI)")
parser_renderer = parser.add_argument_group("Renderer arguments")
parser_renderer.add_argument('-3', '--show_3d', choices=[None, "image", "world", "mixed"], default=None,
help="Display skeleton in 3d in a separate window. See README for description.")
parser_renderer.add_argument("-o","--output",
help="Path to output video file")
args = parser.parse_args()
if args.edge:
from BlazeposeDepthaiEdge import BlazeposeDepthai
else:
from BlazeposeDepthai import BlazeposeDepthai
tracker = BlazeposeDepthai(input_src=args.input,
pd_model=args.pd_m,
lm_model=args.lm_m,
smoothing=not args.no_smoothing,
xyz=args.xyz,
crop=args.crop,
internal_fps=args.internal_fps,
internal_frame_height=args.internal_frame_height,
force_detection=args.force_detection,
stats=True,
trace=args.trace)
renderer = BlazeposeRenderer(
tracker,
show_3d=args.show_3d,
output=args.output)
while True:
# Run blazepose on next frame
frame, body = tracker.next_frame()
if frame is None: break
# Draw 2d skeleton
frame = renderer.draw(frame, body)
key = renderer.waitKey(delay=1)
if key == 27 or key == ord('q'):
break
renderer.exit()
tracker.exit()
\ No newline at end of file
# Used to convert original Mediapipe tflite models to Openvino IR and MyriadX blob
# https://github.com/PINTO0309/tflite2tensorflow
#
# First run: docker pull pinto0309/tflite2tensorflow
docker run -it --rm \
-v `pwd`:/home/user/workdir \
-v /tmp/.X11-unix/:/tmp/.X11-unix:rw \
--device /dev/video0:/dev/video0:mwr \
--net=host \
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR \
-e DISPLAY=$DISPLAY \
--privileged \
pinto0309/tflite2tensorflow:latest bash
# Semaphore alphabet with Blazepose on DepthAI
This demo demonstrates the recognition of semaphore alphabet.
For each arm (segment shoulder-elbow), its angle with the vertical axe is calculated. The pair of left and right arm angles gives the letter.
![Semaphore alphabet](medias/semaphore.gif)
## Usage
```
-> python3 demo.py -h
usage: demo.py [-h] [-m {full,lite,831}] [-i INPUT] [-o OUTPUT]
optional arguments:
-h, --help show this help message and exit
-m {full,lite,831}, --model {full,lite,831}
Landmark model to use (default=full
-i INPUT, --input INPUT
'rgb' or 'rgb_laconic' or path to video/image file to
use as input (default: rgb)
-o OUTPUT, --output OUTPUT
Path to output video file
```
## Credits
* [Semaphore with The RCR Museum](https://www.youtube.com/watch?v=DezaTjQYPh0&ab_channel=TheRoyalCanadianRegimentMuseum)
\ No newline at end of file
import cv2
from math import atan2, degrees
import sys
sys.path.append("../..")
from BlazeposeDepthai import BlazeposeDepthai
from BlazeposeRenderer import BlazeposeRenderer
from mediapipe_utils import KEYPOINT_DICT
import argparse
# For gesture demo
semaphore_flag = {
(3,4):'A', (2,4):'B', (1,4):'C', (0,4):'D',
(4,7):'E', (4,6):'F', (4,5):'G', (2,3):'H',
(0,3):'I', (0,6):'J', (3,0):'K', (3,7):'L',
(3,6):'M', (3,5):'N', (2,1):'O', (2,0):'P',
(2,7):'Q', (2,6):'R', (2,5):'S', (1,0):'T',
(1,7):'U', (0,5):'V', (7,6):'W', (7,5):'X',
(1,6):'Y', (5,6):'Z',
}
def recognize_gesture(b):
# b: body
def angle_with_y(v):
# v: 2d vector (x,y)
# Returns angle in degree of v with y-axis of image plane
if v[1] == 0:
return 90
angle = atan2(v[0], v[1])
return degrees(angle)
# For the demo, we want to recognize the flag semaphore alphabet
# For this task, we just need to measure the angles of both arms with vertical
right_arm_angle = angle_with_y(b.landmarks[KEYPOINT_DICT['right_elbow'],:2] - b.landmarks[KEYPOINT_DICT['right_shoulder'],:2])
left_arm_angle = angle_with_y(b.landmarks[KEYPOINT_DICT['left_elbow'],:2] - b.landmarks[KEYPOINT_DICT['left_shoulder'],:2])
right_pose = int((right_arm_angle +202.5) / 45) % 8
left_pose = int((left_arm_angle +202.5) / 45) % 8
letter = semaphore_flag.get((right_pose, left_pose), None)
return letter
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, choices=['full', 'lite', '831'], default='full',
help="Landmark model to use (default=%(default)s")
parser.add_argument('-i', '--input', type=str, default='rgb',
help="'rgb' or 'rgb_laconic' or path to video/image file to use as input (default: %(default)s)")
parser.add_argument("-o","--output",
help="Path to output video file")
args = parser.parse_args()
pose = BlazeposeDepthai(input_src=args.input, lm_model=args.model)
renderer = BlazeposeRenderer(pose, output=args.output)
while True:
# Run blazepose on next frame
frame, body = pose.next_frame()
if frame is None: break
# Draw 2d skeleton
frame = renderer.draw(frame, body)
# Gesture recognition
if body:
letter = recognize_gesture(body)
if letter:
cv2.putText(frame, letter, (frame.shape[1] // 2, 100), cv2.FONT_HERSHEY_PLAIN, 5, (0,190,255), 3)
key = renderer.waitKey(delay=1)
if key == 27 or key == ord('q'):
break
renderer.exit()
pose.exit()
因为 它太大了无法显示 image diff 。你可以改为 查看blob
因为 它太大了无法显示 image diff 。你可以改为 查看blob
此差异已折叠。
# This script has to be run from the docker container started by ./docker_tflite2tensorflow.sh
source /opt/intel/openvino_2021/bin/setupvars.sh
replace () { # Replace in file $1 occurences of the string $2 by $3
sed "s/${2}/${3}/" $1 > tmpf
mv tmpf $1
}
convert_model () {
model_name=$1
if [ -z "$2" ]
then
arg_mean_values=""
else
arg_mean_values="--mean_values ${2}"
fi
if [ -z "$3" ]
then
arg_scale_values=""
else
arg_scale_values="--scale_values ${3}"
fi
mean_values=$2
scale_values=$3
input_precision=$4
tflite2tensorflow \
--model_path ${model_name}.tflite \
--model_output_path ${model_name} \
--flatc_path ~/flatc \
--schema_path ~/schema.fbs \
--output_pb \
--optimizing_for_openvino_and_myriad
# For generating Openvino "non normalized input" models (the normalization would need to be made explictly in the code):
#tflite2tensorflow \
# --model_path ${model_name}.tflite \
# --model_output_path ${model_name} \
# --flatc_path ~/flatc \
# --schema_path ~/schema.fbs \
# --output_openvino_and_myriad
# Generate Openvino "normalized input" models
/opt/intel/openvino_2021/deployment_tools/model_optimizer/mo_tf.py \
--saved_model_dir ${model_name} \
--model_name ${model_name} \
--data_type FP16 \
${arg_mean_values} \
${arg_scale_values} \
--reverse_input_channels
# For Interpolate layers, replace in coordinate_transformation_mode, "half_pixel" by "align_corners" (bug optimizer)
if [ model_name != "pose_detection" ]
then
replace ${model_name}.xml half_pixel align_corners
fi
/opt/intel/openvino_2021/deployment_tools/inference_engine/lib/intel64/myriad_compile \
-m ${model_name}.xml \
-ip $input_precision \
-VPU_NUMBER_OF_SHAVES 4 \
-VPU_NUMBER_OF_CMX_SLICES 4 \
-o ${model_name}_sh4.blob
}
convert_model pose_detection "[127.5,127.5,127.5]" "[127.5,127.5,127.5]" "u8"
convert_model pose_landmark_full "" "" "fp16"
convert_model pose_landmark_lite "" "" "fp16"
convert_model pose_landmark_heavy "" "" "fp16"
# This script has to be run from the docker container started by ./docker_tflite2tensorflow.sh
usage ()
{
echo "Generate a new blob with a specified number of shaves and cmx (nb cmx = nb shaves)"
echo "Usage: ${0} -m model -n nb_shaves"
echo "model = 'pd' for pose detection, 'full' or 'lite' or 'heavy' for landmarks"
echo "nb_shaves must be between 1 and 13"
}
while getopts ":hm:n:" opt; do
case ${opt} in
h )
usage
exit 0
;;
m )
model=$OPTARG
;;
n )
nb_shaves=$OPTARG
;;
: )
echo "Error: -$OPTARG requires an argument."
usage
exit 1
;;
\? )
echo "Invalid option: -$OPTARG"
usage
exit 1
;;
esac
done
if [ -z "$model" ]
then
echo "Model not specified !"
usage
exit 1
fi
if [ $model == "pd" ]
then
model="pose_detection"
input_precision="u8"
elif [ $model == "full" ]
then
model="pose_landmark_full"
input_precision="fp16"
elif [ $model == "lite" ]
then
model="pose_landmark_lite"
input_precision="fp16"
elif [ $model == "heavy" ]
then
model="pose_landmark_heavy"
input_precision="fp16"
else
echo "Invalid model !"
usage
exit 1
fi
model_xml="${model}.xml"
if [ ! -f $model_xml ]
then
echo "The model ${model_xml} does not exist"
echo "Have you run convert_models.sh ?"
exit 1
fi
if [ -z "$nb_shaves" ]
then
echo "The number of shaves must be specified with -n"
usage
exit 1
fi
if [ $nb_shaves -lt 1 -o $nb_shaves -gt 13 ]
then
echo "Invalid number of shaves !"
usage
exit 1
fi
model_blob="${model}_sh${nb_shaves}.blob"
echo Model: $model_xml $model_blob
echo Shaves $nb_shaves
source /opt/intel/openvino_2021/bin/setupvars.sh
/opt/intel/openvino_2021/deployment_tools/inference_engine/lib/intel64/myriad_compile -m $model_xml -ip $input_precision -VPU_NUMBER_OF_SHAVES $nb_shaves -VPU_NUMBER_OF_CMX_SLICES $nb_shaves -o $model_blob
import open3d as o3d
import numpy as np
# From : https://stackoverflow.com/a/59026582/8574085
def calculate_zy_rotation_for_arrow(v):
"""
Calculates the rotations required to go from the vector v to the
z axis vector. The first rotation that is
calculated is over the z axis. This will leave the vector v on the
XZ plane. Then, the rotation over the y axis.
Returns the angles of rotation over axis z and y required to
get the vector v into the same orientation as axis z
Args:
- v ():
"""
# Rotation over z axis
gamma = np.arctan(v[1]/v[0])
Rz = np.array([[np.cos(gamma),-np.sin(gamma),0],
[np.sin(gamma),np.cos(gamma),0],
[0,0,1]])
# Rotate v to calculate next rotation
v = Rz.T@v.reshape(-1,1)
v = v.reshape(-1)
# Rotation over y axis
beta = np.arctan(v[0]/v[2])
Ry = np.array([[np.cos(beta),0,np.sin(beta)],
[0,1,0],
[-np.sin(beta),0,np.cos(beta)]])
return Rz @ Ry
def create_cylinder(height=1, radius=None, resolution=20):
"""
Create an cylinder in Open3D
"""
radius = height/20 if radius is None else radius
mesh_frame = o3d.geometry.TriangleMesh.create_cylinder(
radius=radius,
height=height,
resolution=resolution)
return(mesh_frame)
def create_segment(a, b, radius=0.05, color=(1,1,0), resolution=20):
"""
Creates an line(cylinder) from an pointa to point b,
or create an line from a vector v starting from origin.
Args:
- a, b: End points [x,y,z]
- radius: radius cylinder
"""
a = np.array(a)
b = np.array(b)
T = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
T[:3, -1] = a
v = b-a
height = np.linalg.norm(v)
if height == 0: return None
R = calculate_zy_rotation_for_arrow(v)
mesh = create_cylinder(height, radius)
mesh.rotate(R, center=np.array([0, 0, 0]))
mesh.translate((a+b)/2)
mesh.paint_uniform_color(color)
mesh.compute_vertex_normals()
return mesh
def create_tetra(p1, p2, p3, p4, color=(1,1,0)):
vertices = o3d.utility.Vector3dVector([p1, p2, p3, p4])
tetras = o3d.utility.Vector4iVector([[0, 1, 2, 3]])
mesh = o3d.geometry.TetraMesh(vertices, tetras)
mesh.paint_uniform_color(color)
return mesh
def create_grid(p0, p1, p2, p3, ni1, ni2, color=(0,0,0)):
'''
p0, p1, p2, p3 : points defining a quadrilateral
ni1: nb of equidistant intervals on segments p0p1 and p3p2
ni2: nb of equidistant intervals on segments p1p2 and p0p3
'''
p0 = np.array(p0)
p1 = np.array(p1)
p2 = np.array(p2)
p3 = np.array(p3)
vertices = [p0, p1, p2, p3]
lines = [[0,1],[0,3],[1,2],[2,3]]
for i in range(1,ni1):
l = len(vertices)
vertices.append((p0*(ni1-i)+p1*i)/ni1)
vertices.append((p3*(ni1-i)+p2*i)/ni1)
lines.append([l,l+1])
for i in range(1,ni2):
l = len(vertices)
vertices.append((p1*(ni2-i)+p2*i)/ni2)
vertices.append((p0*(ni2-i)+p3*i)/ni2)
lines.append([l,l+1])
vertices = o3d.utility.Vector3dVector(vertices)
lines = o3d.utility.Vector2iVector(lines)
mesh = o3d.geometry.LineSet(vertices, lines)
mesh.paint_uniform_color(color)
return mesh
def create_coord_frame(origin=[0, 0, 0],size=1):
mesh = o3d.geometry.TriangleMesh.create_coordinate_frame(size=size)
mesh.translate(origin)
return mesh
# Visu3d : custom class used to visualize 3d skeleton
class Visu3D:
def __init__(self, bg_color=[0,0,0], zoom=1, segment_radius=1):
self.vis = o3d.visualization.VisualizerWithKeyCallback()
self.vis.create_window()
opt = self.vis.get_render_option()
opt.background_color = np.asarray(bg_color)
# Defining callbacks - Key codes: https://www.glfw.org/docs/latest/group__keys.html
self.vis.register_key_callback(ord("R"), self.start_rotating)
self.vis.register_key_callback(ord("O"), self.start_oscillating)
self.vis.register_key_callback(ord("S"), self.stop_moving)
self.vis.register_key_callback(262, self.turn_view_right) # Right arrow
self.vis.register_key_callback(263, self.turn_view_left) # Left arrow
self.vis.register_key_callback(265, self.incr_rot_speed) # Up arrow
self.vis.register_key_callback(264, self.decr_rot_speed) # Down arrow
self.view_control = self.vis.get_view_control()
self.zoom = zoom
self.segment_radius = segment_radius
self.move = "oscillate"
self.angle = 0
self.direction = 1
self.oscillate_angle = 200
self.geometries = []
def set_view(self):
if self.angle_view % 4 == 0:
ax = 0
elif self.angle_view <= 3:
ax = 1
else:
ax = -1
if self.angle_view == 2 or self.angle_view == 6:
az = 0
elif 3 <= self.angle_view <= 5:
az = 1
else:
az = -1
self.view_control.set_front(np.array([ax,0,az]))
self.view_control.set_up(np.array([0,-1,0]))
def init_view(self):
self.angle_view = 0
self.rot_speed = 2
self.set_view()
self.view_control.set_zoom(self.zoom)
def create_grid(self, p0, p1, p2, p3, ni1, ni2, color=(1,1,1)):
'''
p0, p1, p2, p3 : points defining a quadrilateral
ni1: nb of equidistant intervals on segments p0p1 and p3p2
ni2: nb of equidistant intervals on segments p1p2 and p0p3
'''
grid = create_grid(p0, p1, p2, p3, ni1, ni2, color)
self.vis.add_geometry(grid)
self.geometries.append(grid)
def create_camera(self):
cam = o3d.geometry.TriangleMesh.create_arrow(cylinder_radius=0.02, cone_radius=0.03, cylinder_height=0.1, cone_height=0.08)
cam.paint_uniform_color([0.2,0.7,1])
cam.compute_vertex_normals()
self.geometries.append(cam)
def add_geometries(self):
for geo in self.geometries:
self.vis.add_geometry(geo, reset_bounding_box=False)
def add_segment(self, p1, p2, radius=None, color=[1,1,1]):
radius = self.segment_radius if radius is None else radius
line = create_segment(p1, p2, radius=radius, color=color)
if line: self.vis.add_geometry(line, reset_bounding_box=False)
def clear(self):
self.vis.clear_geometries()
# Callback
def incr_rot_speed(self, vis):
if self.move == "rotate":
if self.rot_speed * self.direction == -1:
self.direction = 1
else:
self.rot_speed += self.direction
else:
self.rot_speed += 1
# Callback
def decr_rot_speed(self, vis):
if self.move == "rotate":
if self.rot_speed * self.direction == 1:
self.direction = -1
else:
self.rot_speed -= self.direction
else:
self.rot_speed = max (1, self.rot_speed-1)
# Callback
def turn_view_right(self, vis):
self.angle_view = (self.angle_view + 1) %8
self.set_view()
self.move = None
# Callback
def turn_view_left(self, vis):
self.angle_view = (self.angle_view - 1) %8
self.set_view()
self.move = None
# Callback
def start_rotating(self, vis):
self.move = "rotate"
# Callback
def start_oscillating(self, vis):
self.move = "oscillate"
self.angle = 0
# Callback
def stop_moving(self, vis):
self.move = None
def try_move(self):
if self.move == "rotate":
self.view_control.rotate(self.rot_speed * self.direction,0)
elif self.move == "oscillate":
self.view_control.rotate(self.rot_speed * self.direction,0)
self.angle += self.rot_speed * self.direction
if abs(self.angle) >= self.oscillate_angle:
self.direction = - self.direction
def render(self):
self.vis.poll_events()
self.vis.update_renderer()
if __name__ == "__main__":
line = create_segment([0, 0, 0], [1, 0, 0], color=(1,0,0))
line2 = create_segment([1, 0, 0], [1, 1, 0], color=(0,1,0))
line3 = create_segment([1, 1, 0], [0, 0, 0], radius=0.1)
grid = create_grid([0,0,0],[0,0,1],[0,1,1],[0,1,0], 3, 2)
frame =create_coord_frame()
print(grid)
# Draw everything
o3d.visualization.draw_geometries([line, line2, line3, grid, frame])
\ No newline at end of file
opencv-python >= 4.5.1.48
open3d
depthai >= 2.10
"""
This file is the template of the scripting node source code in edge mode
Substitution is made in BlazeposeDepthaiEdge.py
In the following:
rrn_ : normalized [0:1] coordinates in rotated rectangle coordinate systems
sqn_ : normalized [0:1] coordinates in squared input image
"""
${_TRACE} ("Starting manager script node")
import marshal
from math import sin, cos, atan2, pi, hypot, degrees, floor
# Indexes of some keypoints
left_shoulder = 11
right_shoulder = 12
left_hip = 23
right_hip = 24
${_IF_XYZ}
# We use a filter for smoothing the reference point (mid hips or mid shoulders)
# for which we fetch the (x,y,z). Without this filter, the reference point is very shaky
class SmoothingFilter:
def __init__(self, alpha):
self.alpha = alpha
self.initialized = False
def apply(self, x):
if self.initialized:
result = self.alpha * x + (1 - self.alpha) * self.prev_x
else:
result = x
self.initialized = True
self.prev_x = result
return int(result)
def reset(self):
self.initialized = False
filter_x = SmoothingFilter(0.5)
filter_y = SmoothingFilter(0.5)
${_IF_XYZ}
# BufferMgr is used to statically allocate buffers once
# (replace dynamic allocation).
# These buffers are used for sending result to host
class BufferMgr:
def __init__(self):
self._bufs = {}
def __call__(self, size):
try:
buf = self._bufs[size]
except KeyError:
buf = self._bufs[size] = Buffer(size)
${_TRACE} (f"New buffer allocated: {size}")
return buf
buffer_mgr = BufferMgr()
def send_result(type, lm_score=0, rect_center_x=0, rect_center_y=0, rect_size=0, rotation=0, lms=0, lms_world=0, xyz_ref=0, xyz=0, xyz_zone=0):
# type : 0, 1 or 2
# 0 : pose detection only (detection score < threshold)
# 1 : pose detection + landmark regression
# 2 : landmark regression only (ROI computed from previous landmarks)
result = dict([("type", type), ("lm_score", lm_score), ("rotation", rotation),
("rect_center_x", rect_center_x), ("rect_center_y", rect_center_y), ("rect_size", rect_size),
("lms", lms), ('lms_world', lms_world),
("xyz_ref", xyz_ref), ("xyz", xyz), ("xyz_zone", xyz_zone)])
result_serial = marshal.dumps(result, 2)
buffer = buffer_mgr(len(result_serial))
buffer.getData()[:] = result_serial
${_TRACE} ("len result:"+str(len(result_serial)))
node.io['host'].send(buffer)
${_TRACE} ("Manager sent result to host")
def rr2img(rrn_x, rrn_y):
# Convert a point (rrn_x, rrn_y) expressed in normalized rotated rectangle (rrn)
# into (X, Y) expressed in normalized image (sqn)
X = sqn_rr_center_x + sqn_rr_size * ((rrn_x - 0.5) * cos_rot + (0.5 - rrn_y) * sin_rot)
Y = sqn_rr_center_y + sqn_rr_size * ((rrn_y - 0.5) * cos_rot + (rrn_x - 0.5) * sin_rot)
return X, Y
norm_pad_size = ${_pad_h} / ${_frame_size}
def is_visible(lm_id):
# Is the landmark lm_id is visible ?
# Here visibility means inferred visibility from the landmark model
return lms[lm_id*5+3] > ${_visibility_threshold}
def is_in_image(sqn_x, sqn_y):
# Is the point (sqn_x, sqn_y) is included in the useful part of the image (excluding the pads)?
return norm_pad_size < sqn_y < 1 - norm_pad_size
# send_new_frame_to_branch defines on which branch new incoming frames are sent
# 1 = pose detection branch
# 2 = landmark branch
send_new_frame_to_branch = 1
next_roi_lm_idx = 33*5
cfg_pre_pd = ImageManipConfig()
cfg_pre_pd.setResizeThumbnail(224, 224, 0, 0, 0)
while True:
if send_new_frame_to_branch == 1: # Routing frame to pd
node.io['pre_pd_manip_cfg'].send(cfg_pre_pd)
${_TRACE} ("Manager sent thumbnail config to pre_pd manip")
# Wait for pd post processing's result
detection = node.io['from_post_pd_nn'].get().getLayerFp16("result")
${_TRACE} ("Manager received pd result: "+str(detection))
pd_score, sqn_rr_center_x, sqn_rr_center_y, sqn_scale_x, sqn_scale_y = detection
if pd_score < ${_pd_score_thresh}:
send_result(0)
continue
scale_center_x = sqn_scale_x - sqn_rr_center_x
scale_center_y = sqn_scale_y - sqn_rr_center_y
sqn_rr_size = 2 * ${_rect_transf_scale} * hypot(scale_center_x, scale_center_y)
rotation = 0.5 * pi - atan2(-scale_center_y, scale_center_x)
rotation = rotation - 2 * pi *floor((rotation + pi) / (2 * pi))
${_IF_XYZ}
filter_x.reset()
filter_y.reset()
${_IF_XYZ}
# Routing frame to lm
sin_rot = sin(rotation) # We will often need these values later
cos_rot = cos(rotation)
# Tell pre_lm_manip how to crop body region
rr = RotatedRect()
rr.center.x = sqn_rr_center_x
rr.center.y = (sqn_rr_center_y * ${_frame_size} - ${_pad_h}) / ${_img_h}
rr.size.width = sqn_rr_size
rr.size.height = sqn_rr_size * ${_frame_size} / ${_img_h}
rr.angle = degrees(rotation)
if rr.size.width > 4 or rr.size.height > 7:
send_result(0)
send_new_frame_to_branch = 1
${_TRACE}("Avoid troubleshooting")
continue
cfg = ImageManipConfig()
cfg.setCropRotatedRect(rr, True)
cfg.setResize(256, 256)
node.io['pre_lm_manip_cfg'].send(cfg)
${_TRACE} ("Manager sent config to pre_lm manip")
# Wait for lm's result
lm_result = node.io['from_lm_nn'].get()
${_TRACE} ("Manager received result from lm nn")
lm_score = lm_result.getLayerFp16("Identity_1")[0]
if lm_score > ${_lm_score_thresh}:
lms = lm_result.getLayerFp16("Identity")
lms_world = lm_result.getLayerFp16("Identity_4")[:99]
xyz = 0
xyz_zone = 0
xyz_ref = 0
# Query xyz
${_IF_XYZ}
# Choosing the reference point: mid hips if hips visible, or mid shoulders otherwise
# xyz_ref codes the reference point, 1 if mid hips, 2 if mid shoulders, 0 if no reference point
if is_visible(right_hip) and is_visible(left_hip):
kp1 = right_hip
kp2 = left_hip
rrn_xyz_ref_x = (lms[5*kp1] + lms[5*kp2]) / 512 # 512 = 256*2 (256 for normalizing, 2 for the mean)
rrn_xyz_ref_y = (lms[5*kp1+1] + lms[5*kp2+1]) / 512
sqn_xyz_ref_x, sqn_xyz_ref_y = rr2img(rrn_xyz_ref_x, rrn_xyz_ref_y)
if is_in_image(sqn_xyz_ref_x, sqn_xyz_ref_y):
xyz_ref = 1
if xyz_ref == 0 and is_visible(right_shoulder) and is_visible(left_shoulder):
kp1 = right_shoulder
kp2 = left_shoulder
rrn_xyz_ref_x = (lms[5*kp1] + lms[5*kp2]) / 512 # 512 = 256*2 (256 for normalizing, 2 for the mean)
rrn_xyz_ref_y = (lms[5*kp1+1] + lms[5*kp2+1]) / 512
sqn_xyz_ref_x, sqn_xyz_ref_y = rr2img(rrn_xyz_ref_x, rrn_xyz_ref_y)
if is_in_image(sqn_xyz_ref_x, sqn_xyz_ref_y):
xyz_ref = 2
if xyz_ref:
cfg = SpatialLocationCalculatorConfig()
conf_data = SpatialLocationCalculatorConfigData()
conf_data.depthThresholds.lowerThreshold = 100
conf_data.depthThresholds.upperThreshold = 10000
half_zone_size = max(int(sqn_rr_size * ${_frame_size} / 90), 4)
xc = filter_x.apply(sqn_xyz_ref_x * ${_frame_size} + ${_crop_w})
yc = filter_y.apply(sqn_xyz_ref_y * ${_frame_size} - ${_pad_h})
roi_left = max(0, xc - half_zone_size)
roi_right = min(${_img_w}-1, xc + half_zone_size)
roi_top = max(0, yc - half_zone_size)
roi_bottom = min(${_img_h}-1, yc + half_zone_size)
roi_topleft = Point2f(roi_left, roi_top)
roi_bottomright = Point2f(roi_right, roi_bottom)
conf_data.roi = Rect(roi_topleft, roi_bottomright)
cfg = SpatialLocationCalculatorConfig()
cfg.addROI(conf_data)
node.io['spatial_location_config'].send(cfg)
${_TRACE} ("Manager sent ROI to spatial_location_config")
# Wait xyz response
xyz_data = node.io['spatial_data'].get().getSpatialLocations()
${_TRACE} ("Manager received spatial_location")
coords = xyz_data[0].spatialCoordinates
xyz = [float(coords.x), float(coords.y), float(coords.z)]
roi = xyz_data[0].config.roi
xyz_zone = [int(roi.topLeft().x - ${_crop_w}), int(roi.topLeft().y), int(roi.bottomRight().x - ${_crop_w}), int(roi.bottomRight().y)]
else:
xyz = [0.0] * 3
xyz_zone = [0] * 4
${_IF_XYZ}
# Send result to host
send_result(send_new_frame_to_branch, lm_score, sqn_rr_center_x, sqn_rr_center_y, sqn_rr_size, rotation, lms, lms_world, xyz_ref, xyz, xyz_zone)
if not ${_force_detection}:
send_new_frame_to_branch = 2
# Calculate the ROI for next frame
rrn_rr_center_x = lms[next_roi_lm_idx] / 256
rrn_rr_center_y = lms[next_roi_lm_idx+1] / 256
rrn_scale_x = lms[next_roi_lm_idx+5] / 256
rrn_scale_y = lms[next_roi_lm_idx+6] / 256
sqn_scale_x, sqn_scale_y = rr2img(rrn_scale_x, rrn_scale_y)
sqn_rr_center_x, sqn_rr_center_y = rr2img(rrn_rr_center_x, rrn_rr_center_y)
scale_center_x = sqn_scale_x - sqn_rr_center_x
scale_center_y = sqn_scale_y - sqn_rr_center_y
sqn_rr_size = 2 * ${_rect_transf_scale} * hypot(scale_center_x, scale_center_y)
rotation = 0.5 * pi - atan2(-scale_center_y, scale_center_x)
rotation = rotation - 2 * pi *floor((rotation + pi) / (2 * pi))
else:
send_result(send_new_frame_to_branch, lm_score)
send_new_frame_to_branch = 1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册