diff --git a/.gitignore b/.gitignore index 369e31b4aa2f480b1905c62f998489159cec6001..2df6ebfd274c28d16085843f8f1a5bd34f62dcea 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,8 @@ **/__pycache__ **/__pycache__/** -.vscode \ No newline at end of file +.vscode + +build/ +**/build +**/build/** diff --git a/models/face_detection_yunet/CMakeLists.txt b/models/face_detection_yunet/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c82939a01520c730f6ba5432193aec62513ecad7 --- /dev/null +++ b/models/face_detection_yunet/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.24.0) +project(opencv_zoo_face_detection_yunet) + +set(OPENCV_VERSION "4.7.0") +set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation") + +# Find OpenCV +find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH}) + +add_executable(demo demo.cpp) +target_link_libraries(demo ${OpenCV_LIBS}) diff --git a/models/face_detection_yunet/README.md b/models/face_detection_yunet/README.md index f7af42f5215b9eda77466b492cbf8028a46b1c76..934c1d6f1c716d368e4270f1958835c73b0f7223 100644 --- a/models/face_detection_yunet/README.md +++ b/models/face_detection_yunet/README.md @@ -20,6 +20,8 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval). ## Demo +### Python + Run the following command to try the demo: ```shell @@ -32,6 +34,23 @@ python demo.py --input /path/to/image python demo.py --help ``` +### C++ + +Install latest OpenCV and CMake >= 3.24.0 to get started with: + +```shell +# A typical and default installation path of OpenCV is /usr/local +cmake -B build -D OPENCV_INSTALLATION_PATH /path/to/opencv/installation . +cmake --build build + +# detect on camera input +./build/demo +# detect on an image +./build/demo -i=/path/to/image +# get help messages +./build/demo -h +``` + ### Example outputs ![webcam demo](./examples/yunet_demo.gif) diff --git a/models/face_detection_yunet/demo.cpp b/models/face_detection_yunet/demo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41b75d122e04744631c3190cf4160fc65140249c --- /dev/null +++ b/models/face_detection_yunet/demo.cpp @@ -0,0 +1,220 @@ +#include "opencv2/opencv.hpp" + +#include +#include +#include +#include + +const std::map str2backend{ + {"opencv", cv::dnn::DNN_BACKEND_OPENCV}, {"cuda", cv::dnn::DNN_BACKEND_CUDA}, + {"timvx", cv::dnn::DNN_BACKEND_TIMVX}, {"cann", cv::dnn::DNN_BACKEND_CANN} +}; +const std::map str2target{ + {"cpu", cv::dnn::DNN_TARGET_CPU}, {"cuda", cv::dnn::DNN_TARGET_CUDA}, + {"npu", cv::dnn::DNN_TARGET_NPU}, {"cuda_fp16", cv::dnn::DNN_TARGET_CUDA_FP16} +}; + +class YuNet +{ +public: + YuNet(const std::string& model_path, + const cv::Size& input_size = cv::Size(320, 320), + float conf_threshold = 0.6f, + float nms_threshold = 0.3f, + int top_k = 5000, + int backend_id = 0, + int target_id = 0) + : model_path_(model_path), input_size_(input_size), + conf_threshold_(conf_threshold), nms_threshold_(nms_threshold), + top_k_(top_k), backend_id_(backend_id), target_id_(target_id) + { + model = cv::FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_); + } + + void setBackendAndTarget(int backend_id, int target_id) + { + backend_id_ = backend_id; + target_id_ = target_id; + model = cv::FaceDetectorYN::create(model_path_, "", input_size_, conf_threshold_, nms_threshold_, top_k_, backend_id_, target_id_); + } + + /* Overwrite the input size when creating the model. Size format: [Width, Height]. + */ + void setInputSize(const cv::Size& input_size) + { + input_size_ = input_size; + model->setInputSize(input_size_); + } + + cv::Mat infer(const cv::Mat image) + { + cv::Mat res; + model->detect(image, res); + return res; + } + +private: + cv::Ptr model; + + std::string model_path_; + cv::Size input_size_; + float conf_threshold_; + float nms_threshold_; + int top_k_; + int backend_id_; + int target_id_; +}; + +cv::Mat visualize(const cv::Mat& image, const cv::Mat& faces, float fps = -1.f) +{ + static cv::Scalar box_color{0, 255, 0}; + static std::vector landmark_color{ + cv::Scalar(255, 0, 0), // right eye + cv::Scalar( 0, 0, 255), // left eye + cv::Scalar( 0, 255, 0), // nose tip + cv::Scalar(255, 0, 255), // right mouth corner + cv::Scalar( 0, 255, 255) // left mouth corner + }; + static cv::Scalar text_color{0, 255, 0}; + + auto output_image = image.clone(); + + if (fps >= 0) + { + cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, text_color, 2); + } + + for (int i = 0; i < faces.rows; ++i) + { + // Draw bounding boxes + int x1 = static_cast(faces.at(i, 0)); + int y1 = static_cast(faces.at(i, 1)); + int w = static_cast(faces.at(i, 2)); + int h = static_cast(faces.at(i, 3)); + cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2); + + // Confidence as text + float conf = faces.at(i, 14); + cv::putText(output_image, cv::format("%.4f", conf), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.5, text_color); + + // Draw landmarks + for (int j = 0; j < landmark_color.size(); ++j) + { + int x = static_cast(faces.at(i, 2*j+4)), y = static_cast(faces.at(i, 2*j+5)); + cv::circle(output_image, cv::Point(x, y), 2, landmark_color[j], 2); + } + } + return output_image; +} + +int main(int argc, char** argv) +{ + cv::CommandLineParser parser(argc, argv, + "{help h | | Print this message}" + "{input i | | Set input to a certain image, omit if using camera}" + "{model m | face_detection_yunet_2022mar.onnx | Set path to the model}" + "{backend b | opencv | Set DNN backend}" + "{target t | cpu | Set DNN target}" + "{save s | false | Whether to save result image or not}" + "{vis v | false | Whether to visualize result image or not}" + /* model params below*/ + "{conf_threshold | 0.9 | Set the minimum confidence for the model to identify a face. Filter out faces of conf < conf_threshold}" + "{nms_threshold | 0.3 | Set the threshold to suppress overlapped boxes. Suppress boxes if IoU(box1, box2) >= nms_threshold, the one of higher score is kept.}" + "{top_k | 5000 | Keep top_k bounding boxes before NMS. Set a lower value may help speed up postprocessing.}" + ); + if (parser.has("help")) + { + parser.printMessage(); + return 0; + } + + std::string input_path = parser.get("input"); + std::string model_path = parser.get("model"); + std::string backend = parser.get("backend"); + std::string target = parser.get("target"); + bool save_flag = parser.get("save"); + bool vis_flag = parser.get("vis"); + + // model params + float conf_threshold = parser.get("conf_threshold"); + float nms_threshold = parser.get("nms_threshold"); + int top_k = parser.get("top_k"); + const int backend_id = str2backend.at(backend); + const int target_id = str2target.at(target); + + // Instantiate YuNet + YuNet model(model_path, cv::Size(320, 320), conf_threshold, nms_threshold, top_k, backend_id, target_id); + + // If input is an image + if (!input_path.empty()) + { + auto image = cv::imread(input_path); + + // Inference + model.setInputSize(image.size()); + auto faces = model.infer(image); + + // Print faces + std::cout << cv::format("%d faces detected:\n", faces.rows); + for (int i = 0; i < faces.rows; ++i) + { + int x1 = static_cast(faces.at(i, 0)); + int y1 = static_cast(faces.at(i, 1)); + int w = static_cast(faces.at(i, 2)); + int h = static_cast(faces.at(i, 3)); + float conf = faces.at(i, 14); + std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f\n", i, x1, y1, w, h, conf); + } + + // Draw reults on the input image + if (save_flag || vis_flag) + { + auto res_image = visualize(image, faces); + if (save_flag) + { + std::cout << "Results are saved to result.jpg\n"; + cv::imwrite("result.jpg", res_image); + } + if (vis_flag) + { + cv::namedWindow(input_path, cv::WINDOW_AUTOSIZE); + cv::imshow(input_path, res_image); + cv::waitKey(0); + } + } + } + else // Call default camera + { + int device_id = 0; + auto cap = cv::VideoCapture(device_id); + int w = static_cast(cap.get(cv::CAP_PROP_FRAME_WIDTH)); + int h = static_cast(cap.get(cv::CAP_PROP_FRAME_HEIGHT)); + model.setInputSize(cv::Size(w, h)); + + auto tick_meter = cv::TickMeter(); + cv::Mat frame; + while (cv::waitKey(1) < 0) + { + bool has_frame = cap.read(frame); + if (!has_frame) + { + std::cout << "No frames grabbed! Exiting ...\n"; + break; + } + + // Inference + tick_meter.start(); + cv::Mat faces = model.infer(frame); + tick_meter.stop(); + + // Draw results on the input image + auto res_image = visualize(frame, faces, (float)tick_meter.getFPS()); + // Visualize in a new window + cv::imshow("YuNet Demo", res_image); + + tick_meter.reset(); + } + } + + return 0; +}