From f5035150824c49d3a2d7cb7ce69ac1ffbdf8c63d Mon Sep 17 00:00:00 2001 From: Dmitry Kurtaev Date: Sat, 2 Dec 2017 23:52:35 +0300 Subject: [PATCH] JavaScript bindings for dnn module --- .../protobuf/src/google/protobuf/stubs/port.h | 3 +- .../dnn_javascript/dnn_javascript.markdown | 44 ++++ .../dnn/table_of_content_dnn.markdown | 16 ++ modules/dnn/CMakeLists.txt | 2 +- .../dnn/include/opencv2/dnn/all_layers.hpp | 20 -- modules/dnn/include/opencv2/dnn/dnn.hpp | 8 +- modules/dnn/src/dnn.cpp | 7 +- modules/dnn/src/layers/eltwise_layer.cpp | 23 +- modules/dnn/src/layers/lrn_layer.cpp | 11 +- modules/dnn/src/layers/pooling_layer.cpp | 29 ++- modules/js/src/core_bindings.cpp | 8 +- modules/js/src/embindgen.py | 5 +- platforms/js/build_js.py | 2 +- samples/dnn/js_face_recognition.html | 205 ++++++++++++++++++ 14 files changed, 326 insertions(+), 57 deletions(-) create mode 100644 doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown create mode 100644 samples/dnn/js_face_recognition.html diff --git a/3rdparty/protobuf/src/google/protobuf/stubs/port.h b/3rdparty/protobuf/src/google/protobuf/stubs/port.h index 376be5f7ab..88bc28654b 100644 --- a/3rdparty/protobuf/src/google/protobuf/stubs/port.h +++ b/3rdparty/protobuf/src/google/protobuf/stubs/port.h @@ -224,8 +224,7 @@ static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF); #if defined(__clang__) && defined(__has_cpp_attribute) \ && !defined(GOOGLE_PROTOBUF_OS_APPLE) -# if defined(GOOGLE_PROTOBUF_OS_NACL) || defined(EMSCRIPTEN) || \ - __has_cpp_attribute(clang::fallthrough) +# if defined(GOOGLE_PROTOBUF_OS_NACL) || __has_cpp_attribute(clang::fallthrough) # define GOOGLE_FALLTHROUGH_INTENDED [[clang::fallthrough]] # endif #endif diff --git a/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown b/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown new file mode 100644 index 0000000000..0a9caeae35 --- /dev/null +++ b/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown @@ -0,0 +1,44 @@ +# How to run deep networks in browser {#tutorial_dnn_javascript} + +## Introduction +This tutorial will show us how to run deep learning models using OpenCV.js right +in a browser. Tutorial refers a sample of face detection and face recognition +models pipeline. + +## Face detection +Face detection network gets BGR image as input and produces set of bounding boxes +that might contain faces. All that we need is just select the boxes with a strong +confidence. + +## Face recognition +Network is called OpenFace (project https://github.com/cmusatyalab/openface). +Face recognition model receives RGB face image of size `96x96`. Then it returns +`128`-dimensional unit vector that represents input face as a point on the unit +multidimensional sphere. So difference between two faces is an angle between two +output vectors. + +## Sample +All the sample is an HTML page that has JavaScript code to use OpenCV.js functionality. +You may see an insertion of this page below. Press `Start` button to begin a demo. +Press `Add a person` to name a person that is recognized as an unknown one. +Next we'll discuss main parts of the code. + +@htmlinclude js_face_recognition.html + +-# Run face detection network to detect faces on input image. +@snippet dnn/js_face_recognition.html Run face detection model +You may play with input blob sizes to balance detection quality and efficiency. +The bigger input blob the smaller faces may be detected. + +-# Run face recognition network to receive `128`-dimensional unit feature vector by input face image. +@snippet dnn/js_face_recognition.html Get 128 floating points feature vector + +-# Perform a recognition. +@snippet dnn/js_face_recognition.html Recognize +Match a new feature vector with registered ones. Return a name of the best matched person. + +-# The main loop. +@snippet dnn/js_face_recognition.html Define frames processing +A main loop of our application receives a frames from a camera and makes a recognition +of an every detected face on the frame. We start this function ones when OpenCV.js was +initialized and deep learning models were downloaded. diff --git a/doc/tutorials/dnn/table_of_content_dnn.markdown b/doc/tutorials/dnn/table_of_content_dnn.markdown index 2385f14fd0..94b818c41a 100644 --- a/doc/tutorials/dnn/table_of_content_dnn.markdown +++ b/doc/tutorials/dnn/table_of_content_dnn.markdown @@ -25,6 +25,14 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn} In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module. +- @subpage tutorial_dnn_android + + *Compatibility:* \> OpenCV 3.3 + + *Author:* Dmitry Kurtaev + + This tutorial will show you how to run deep learning model using OpenCV on Android device. + - @subpage tutorial_dnn_yolo *Compatibility:* \> OpenCV 3.3.1 @@ -32,3 +40,11 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn} *Author:* Alessandro de Oliveira Faria In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image. + +- @subpage tutorial_dnn_javascript + + *Compatibility:* \> OpenCV 3.3.1 + + *Author:* Dmitry Kurtaev + + In this tutorial we'll run deep learning models in browser using OpenCV.js. diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 2ca831a424..b36bfe5193 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -15,7 +15,7 @@ set(the_description "Deep neural network module. It allows to load models from d ocv_add_dispatched_file("layers/layers_common" AVX AVX2) -ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java) +ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo -Wmissing-declarations -Wmissing-prototypes ) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 3dc256de10..4822918e90 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -221,11 +221,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS LRNLayer : public Layer { public: - enum Type - { - CHANNEL_NRM, - SPATIAL_NRM - }; int type; int size; @@ -238,14 +233,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS PoolingLayer : public Layer { public: - enum Type - { - MAX, - AVE, - STOCHASTIC, - ROI - }; - int type; Size kernel, stride, pad; bool globalPooling; @@ -474,13 +461,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN class CV_EXPORTS EltwiseLayer : public Layer { public: - enum EltwiseOp - { - PROD = 0, - SUM = 1, - MAX = 2, - }; - static Ptr create(const LayerParams ¶ms); }; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index fa26388124..f365a2667d 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -423,8 +423,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames. * @param outBlobNames names for layers which outputs are needed to get */ - CV_WRAP void forward(std::vector >& outputBlobs, - const std::vector& outBlobNames); + void forward(std::vector >& outputBlobs, + const std::vector& outBlobNames); //TODO: /** @brief Optimized forward. @@ -467,7 +467,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * @note If updating blob is not empty then @p blob must have the same shape, * because network reshaping is not implemented yet. */ - CV_WRAP void setInput(const Mat &blob, const String& name = ""); + CV_WRAP void setInput(InputArray blob, const String& name = ""); /** @brief Sets the new value for the learned param of the layer. * @param layer name or id of the layer. @@ -733,7 +733,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN * If @p crop is false, direct resize without cropping and preserving aspect ratio is performed. * @returns 4-dimansional Mat with NCHW dimensions order. */ - CV_EXPORTS_W Mat blobFromImage(const Mat& image, double scalefactor=1.0, const Size& size = Size(), + CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(), const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true); /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and * crops @p images from center, subtract @p mean values, scales values by @p scalefactor, diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index d91598ce2b..f2c52eb518 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -84,11 +84,11 @@ static String toString(const T &v) return ss.str(); } -Mat blobFromImage(const Mat& image, double scalefactor, const Size& size, +Mat blobFromImage(InputArray image, double scalefactor, const Size& size, const Scalar& mean, bool swapRB, bool crop) { CV_TRACE_FUNCTION(); - std::vector images(1, image); + std::vector images(1, image.getMat()); return blobFromImages(images, scalefactor, size, mean, swapRB, crop); } @@ -1910,7 +1910,7 @@ void Net::setInputsNames(const std::vector &inputBlobNames) impl->netInputLayer->setNames(inputBlobNames); } -void Net::setInput(const Mat &blob_, const String& name) +void Net::setInput(InputArray blob, const String& name) { CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); @@ -1930,6 +1930,7 @@ void Net::setInput(const Mat &blob_, const String& name) ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) ); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); MatShape prevShape = shape(ld.outputBlobs[pin.oid]); + Mat blob_ = blob.getMat(); bool oldShape = prevShape == shape(blob_); if (oldShape) { diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index b98537b91d..7e2214ea46 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -52,22 +52,27 @@ namespace dnn class EltwiseLayerImpl : public EltwiseLayer { public: - EltwiseOp op; + enum EltwiseOp + { + PROD = 0, + SUM = 1, + MAX = 2, + } op; std::vector coeffs; EltwiseLayerImpl(const LayerParams& params) { setParamsFrom(params); - op = EltwiseLayer::SUM; + op = SUM; if (params.has("operation")) { String operation = params.get("operation").toLowerCase(); if (operation == "prod") - op = EltwiseLayer::PROD; + op = PROD; else if (operation == "sum") - op = EltwiseLayer::SUM; + op = SUM; else if (operation == "max") - op = EltwiseLayer::MAX; + op = MAX; else CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\""); } @@ -122,7 +127,7 @@ public: int channels; size_t planeSize; - EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} + EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {} static void run(const Mat** srcs, int nsrcs, Mat& dst, const std::vector& coeffs, EltwiseOp op, @@ -150,7 +155,7 @@ public: CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize); bool simpleCoeffs = true; - if( op == EltwiseLayer::SUM && !coeffs.empty() ) + if( op == SUM && !coeffs.empty() ) { CV_Assert( coeffs.size() == (size_t)nsrcs ); @@ -192,7 +197,7 @@ public: const float* srcptr0 = srcs[0]->ptr() + globalDelta; float* dstptr = dstptr0 + globalDelta; - if( op == EltwiseLayer::PROD ) + if( op == PROD ) { for( k = 1; k < n; k++ ) { @@ -204,7 +209,7 @@ public: srcptr0 = (const float*)dstptr; } } - else if( op == EltwiseLayer::MAX ) + else if( op == MAX ) { for( k = 1; k < n; k++ ) { diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp index 50c0ae1dfc..0f869015bf 100644 --- a/modules/dnn/src/layers/lrn_layer.cpp +++ b/modules/dnn/src/layers/lrn_layer.cpp @@ -67,9 +67,9 @@ public: type = -1; String nrmType = params.get("norm_region", "ACROSS_CHANNELS"); if (nrmType == "ACROSS_CHANNELS") - type = LRNLayer::CHANNEL_NRM; + type = CHANNEL_NRM; else if (nrmType == "WITHIN_CHANNEL") - type = LRNLayer::SPATIAL_NRM; + type = SPATIAL_NRM; else CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\""); @@ -397,6 +397,13 @@ public: } return flops; } + +private: + enum Type + { + CHANNEL_NRM, + SPATIAL_NRM + }; }; Ptr LRNLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 6c51f61f10..c2e8ffc272 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -63,7 +63,7 @@ class PoolingLayerImpl : public PoolingLayer public: PoolingLayerImpl(const LayerParams& params) { - type = PoolingLayer::MAX; + type = MAX; computeMaxIdx = true; globalPooling = false; @@ -71,11 +71,11 @@ public: { String pool = params.get("pool").toLowerCase(); if (pool == "max") - type = PoolingLayer::MAX; + type = MAX; else if (pool == "ave") - type = PoolingLayer::AVE; + type = AVE; else if (pool == "stochastic") - type = PoolingLayer::STOCHASTIC; + type = STOCHASTIC; else CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, @@ -83,7 +83,7 @@ public: } else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale")) { - type = PoolingLayer::ROI; + type = ROI; } setParamsFrom(params); ceilMode = params.get("ceil_mode", true); @@ -115,8 +115,7 @@ public: { return backendId == DNN_BACKEND_DEFAULT || backendId == DNN_BACKEND_HALIDE && haveHalide() && - (type == PoolingLayer::MAX || - type == PoolingLayer::AVE && !pad.width && !pad.height); + (type == MAX || type == AVE && !pad.width && !pad.height); } #ifdef HAVE_OPENCL @@ -200,9 +199,9 @@ public: virtual Ptr initHalide(const std::vector > &inputs) { - if (type == PoolingLayer::MAX) + if (type == MAX) return initMaxPoolingHalide(inputs); - else if (type == PoolingLayer::AVE) + else if (type == AVE) return initAvePoolingHalide(inputs); else return Ptr(); @@ -221,7 +220,7 @@ public: float spatialScale; PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0), - computeMaxIdx(0), poolingType(PoolingLayer::MAX), spatialScale(0) {} + computeMaxIdx(0), poolingType(MAX), spatialScale(0) {} static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel, Size stride, Size pad, int poolingType, float spatialScale, @@ -698,7 +697,7 @@ public: out.height = 1; out.width = 1; } - else if (type == PoolingLayer::ROI) + else if (type == ROI) { out.height = pooledSize.height; out.width = pooledSize.width; @@ -757,6 +756,14 @@ public: } return flops; } +private: + enum Type + { + MAX, + AVE, + STOCHASTIC, + ROI + }; }; Ptr PoolingLayer::create(const LayerParams& params) diff --git a/modules/js/src/core_bindings.cpp b/modules/js/src/core_bindings.cpp index 5cd5eb205e..18427d18fe 100644 --- a/modules/js/src/core_bindings.cpp +++ b/modules/js/src/core_bindings.cpp @@ -73,11 +73,13 @@ #include "opencv2/video/tracking.hpp" #include "opencv2/video/background_segm.hpp" #include "opencv2/objdetect.hpp" +#include "opencv2/dnn.hpp" #include using namespace emscripten; using namespace cv; +using namespace dnn; namespace binding_utils { @@ -339,12 +341,12 @@ EMSCRIPTEN_BINDINGS(binding_utils) .constructor() .constructor(&binding_utils::createMat, allow_raw_pointers()) - .class_function("eye", select_overload(&binding_utils::matEye)) .class_function("eye", select_overload(&binding_utils::matEye)) - .class_function("ones", select_overload(&binding_utils::matOnes)) + .class_function("eye", select_overload(&binding_utils::matEye)) .class_function("ones", select_overload(&binding_utils::matOnes)) - .class_function("zeros", select_overload(&binding_utils::matZeros)) + .class_function("ones", select_overload(&binding_utils::matOnes)) .class_function("zeros", select_overload(&binding_utils::matZeros)) + .class_function("zeros", select_overload(&binding_utils::matZeros)) .property("rows", &cv::Mat::rows) .property("cols", &cv::Mat::cols) diff --git a/modules/js/src/embindgen.py b/modules/js/src/embindgen.py index 124ab9054c..a27fba2f21 100644 --- a/modules/js/src/embindgen.py +++ b/modules/js/src/embindgen.py @@ -125,6 +125,9 @@ video = {'': ['CamShift', 'calcOpticalFlowFarneback', 'calcOpticalFlowPyrLK', 'c 'BackgroundSubtractorMOG2': ['BackgroundSubtractorMOG2', 'apply'], 'BackgroundSubtractor': ['apply', 'getBackgroundImage']} +dnn = {'dnn_Net': ['setInput', 'forward'], + '': ['readNetFromCaffe', 'readNetFromTensorflow', 'readNetFromTorch', 'readNetFromDarknet', 'blobFromImage']} + def makeWhiteList(module_list): wl = {} for m in module_list: @@ -135,7 +138,7 @@ def makeWhiteList(module_list): wl[k] = m[k] return wl -white_list = makeWhiteList([core, imgproc, objdetect, video]) +white_list = makeWhiteList([core, imgproc, objdetect, video, dnn]) # Features to be exported export_enums = False diff --git a/platforms/js/build_js.py b/platforms/js/build_js.py index f58625d172..34b579a7a9 100644 --- a/platforms/js/build_js.py +++ b/platforms/js/build_js.py @@ -134,7 +134,7 @@ class Builder: "-DBUILD_ZLIB=ON", "-DBUILD_opencv_apps=OFF", "-DBUILD_opencv_calib3d=OFF", - "-DBUILD_opencv_dnn=OFF", + "-DBUILD_opencv_dnn=ON", "-DBUILD_opencv_features2d=OFF", "-DBUILD_opencv_flann=OFF", "-DBUILD_opencv_ml=OFF", diff --git a/samples/dnn/js_face_recognition.html b/samples/dnn/js_face_recognition.html new file mode 100644 index 0000000000..8cdfb57ba8 --- /dev/null +++ b/samples/dnn/js_face_recognition.html @@ -0,0 +1,205 @@ + + + + + + + + + + + + + + +
+ + + + + +
+ + + + -- GitLab