G-API: Introduce streaming::desync and infer(ROI)

- desync() is a new (and for now, the only one) intrinsic which splits the graph execution into asynchronous parts when running in Streaming mode; - desync() makes no effect when compiling in Traditional mode; - Added tests on desync() working in various scenarios; - Extended GStreamingExecutor to support desync(); also extended GStreamingCompiled() with a new version of pull() returning a vector of optional values; - Fixed various issues with storing the type information & proper construction callbacks for GArray<> and GOpaque; - Introduced a new infer(Roi,GMat) overload with a sample; - Introduced an internal API for Islands to control fusion procedure (to fuse or not to fuse); - Introduced handleStopStream() callback for island executables; - Added GCompileArgs to metadata of the graph (required for other features).

G-API: Introduce streaming::desync and infer(ROI)
- desync() is a new (and for now, the only one) intrinsic which splits the graph execution into asynchronous parts when running in Streaming mode; - desync() makes no effect when compiling in Traditional mode; - Added tests on desync() working in various scenarios; - Extended GStreamingExecutor to support desync(); also extended GStreamingCompiled() with a new version of pull() returning a vector of optional values; - Fixed various issues with storing the type information & proper construction callbacks for GArray<> and GOpaque; - Introduced a new infer(Roi,GMat) overload with a sample; - Introduced an internal API for Islands to control fusion procedure (to fuse or not to fuse); - Introduced handleStopStream() callback for island executables; - Added GCompileArgs to metadata of the graph (required for other features).
ca8bb8d0 · Dmitry Matveev · f345ed56 · ca8bb8d0 · ca8bb8d0 · ca8bb8d0
40 changed file
--- a/modules/gapi/CMakeLists.txt
+++ b/modules/gapi/CMakeLists.txt
@@ -74,6 +74,7 @@ set(gapi_srcs
    src/api/kernels_imgproc.cpp
    src/api/kernels_video.cpp
    src/api/kernels_nnparsers.cpp
+    src/api/kernels_streaming.cpp
    src/api/render.cpp
    src/api/render_ocv.cpp
    src/api/ginfer.cpp
@@ -97,6 +98,7 @@ set(gapi_srcs
    src/compiler/passes/pattern_matching.cpp
    src/compiler/passes/perform_substitution.cpp
    src/compiler/passes/streaming.cpp
+    src/compiler/passes/intrin.cpp

    # Executor
    src/executor/gexecutor.cpp

--- a/modules/gapi/include/opencv2/gapi.hpp
+++ b/modules/gapi/include/opencv2/gapi.hpp
@@ -33,4 +33,8 @@
 #include <opencv2/gapi/gkernel.hpp>
 #include <opencv2/gapi/operators.hpp>

+// Include this file here to avoid cyclic dependency between
+// Desync & GKernel & GComputation & GStreamingCompiled.
+#include <opencv2/gapi/streaming/desync.hpp>
+
 #endif // OPENCV_GAPI_HPP
--- a/modules/gapi/include/opencv2/gapi/garray.hpp
+++ b/modules/gapi/include/opencv2/gapi/garray.hpp
@@ -284,6 +284,14 @@ namespace detail
            return static_cast<VectorRefT<T>&>(*m_ref).rref();
        }

+        // Check if was created for/from std::vector<T>
+        template <typename T> bool holds() const
+        {
+            if (!m_ref) return false;
+            using U = typename std::decay<T>::type;
+            return dynamic_cast<VectorRefT<U>*>(m_ref.get()) != nullptr;
+        }
+
        void mov(VectorRef &v)
        {
            m_ref->mov(*v.m_ref);
@@ -341,15 +349,18 @@ public:
    explicit GArray(detail::GArrayU &&ref) // GArrayU-based constructor
        : m_ref(ref) { putDetails(); }     //   (used by GCall, not for users)

-    detail::GArrayU strip() const { return m_ref; }
-
-private:
-    static void VCTor(detail::VectorRef& vref) {
+    /// @private
+    detail::GArrayU strip() const {
+        return m_ref;
+    }
+    /// @private
+    static void VCtor(detail::VectorRef& vref) {
        vref.reset<HT>();
-        vref.storeKind<HT>();
    }
+
+private:
    void putDetails() {
-        m_ref.setConstructFcn(&VCTor);
+        m_ref.setConstructFcn(&VCtor);
        m_ref.specifyType<HT>();  // FIXME: to unify those 2 to avoid excessive dynamic_cast
        m_ref.storeKind<HT>();    //
    }

--- a/modules/gapi/include/opencv2/gapi/gkernel.hpp
+++ b/modules/gapi/include/opencv2/gapi/gkernel.hpp
@@ -28,6 +28,7 @@ namespace cv {

 using GShapes = std::vector<GShape>;
 using GKinds = std::vector<cv::detail::OpaqueKind>;
+using GCtors  = std::vector<detail::HostCtor>;

 // GKernel describes kernel API to the system
 // FIXME: add attributes of a kernel, (e.g. number and types
@@ -41,6 +42,7 @@ struct GAPI_EXPORTS GKernel
    M           outMeta;    // generic adaptor to API::outMeta(...)
    GShapes     outShapes;  // types (shapes) kernel's outputs
    GKinds      inKinds;    // kinds of kernel's inputs (fixme: below)
+    GCtors      outCtors;   // captured constructors for template output types
 };
 // TODO: It's questionable if inKinds should really be here. Instead,
 // this information could come from meta.
@@ -60,30 +62,27 @@ namespace detail
    // yield() is used in graph construction time as a generic method to obtain
    // lazy "return value" of G-API operations
    //
-    namespace
+    template<typename T> struct Yield;
+    template<> struct Yield<cv::GMat>
    {
-        template<typename T> struct Yield;
-        template<> struct Yield<cv::GMat>
-        {
-            static inline cv::GMat yield(cv::GCall &call, int i) { return call.yield(i); }
-        };
-        template<> struct Yield<cv::GMatP>
-        {
-            static inline cv::GMatP yield(cv::GCall &call, int i) { return call.yieldP(i); }
-        };
-        template<> struct Yield<cv::GScalar>
-        {
-            static inline cv::GScalar yield(cv::GCall &call, int i) { return call.yieldScalar(i); }
-        };
-        template<typename U> struct Yield<cv::GArray<U> >
-        {
-            static inline cv::GArray<U> yield(cv::GCall &call, int i) { return call.yieldArray<U>(i); }
-        };
-        template<typename U> struct Yield<cv::GOpaque<U> >
-        {
-            static inline cv::GOpaque<U> yield(cv::GCall &call, int i) { return call.yieldOpaque<U>(i); }
-        };
-    } // anonymous namespace
+        static inline cv::GMat yield(cv::GCall &call, int i) { return call.yield(i); }
+    };
+    template<> struct Yield<cv::GMatP>
+    {
+        static inline cv::GMatP yield(cv::GCall &call, int i) { return call.yieldP(i); }
+    };
+    template<> struct Yield<cv::GScalar>
+    {
+        static inline cv::GScalar yield(cv::GCall &call, int i) { return call.yieldScalar(i); }
+    };
+    template<typename U> struct Yield<cv::GArray<U> >
+    {
+        static inline cv::GArray<U> yield(cv::GCall &call, int i) { return call.yieldArray<U>(i); }
+    };
+    template<typename U> struct Yield<cv::GOpaque<U> >
+    {
+        static inline cv::GOpaque<U> yield(cv::GCall &call, int i) { return call.yieldOpaque<U>(i); }
+    };

    ////////////////////////////////////////////////////////////////////////////
    // Helper classes which brings outputMeta() marshalling to kernel
@@ -215,7 +214,8 @@ public:
                              , K::tag()
                              , &K::getOutMeta
                              , {detail::GTypeTraits<R>::shape...}
-                              , {detail::GTypeTraits<Args>::op_kind...}});
+                              , {detail::GTypeTraits<Args>::op_kind...}
+                              , {detail::GObtainCtor<R>::get()...}});
        call.pass(args...); // TODO: std::forward() here?
        return yield(call, typename detail::MkSeq<sizeof...(R)>::type());
    }
@@ -240,7 +240,8 @@ public:
                              , K::tag()
                              , &K::getOutMeta
                              , {detail::GTypeTraits<R>::shape}
-                              , {detail::GTypeTraits<Args>::op_kind...}});
+                              , {detail::GTypeTraits<Args>::op_kind...}
+                              , {detail::GObtainCtor<R>::get()}});
        call.pass(args...);
        return detail::Yield<R>::yield(call, 0);
    }
@@ -459,11 +460,6 @@ namespace gapi {
        std::vector<GTransform> m_transformations;

    protected:
-        /// @private
-        // Check if package contains ANY implementation of a kernel API
-        // by API textual id.
-        bool includesAPI(const std::string &id) const;
-
        /// @private
        // Remove ALL implementations of the given API (identified by ID)
        void removeAPI(const std::string &id);
@@ -566,6 +562,9 @@ namespace gapi {
            return includesAPI(KAPI::id());
        }

+        /// @private
+        bool includesAPI(const std::string &id) const;
+
        // FIXME: The below comment is wrong, and who needs this function?
        /**
         * @brief Find a kernel (by its API)

--- a/modules/gapi/include/opencv2/gapi/gopaque.hpp
+++ b/modules/gapi/include/opencv2/gapi/gopaque.hpp
@@ -295,25 +295,27 @@ namespace detail
 template<typename T> class GOpaque
 {
 public:
-    GOpaque() { putDetails(); }              // Empty constructor
-    explicit GOpaque(detail::GOpaqueU &&ref) // GOpaqueU-based constructor
-        : m_ref(ref) { putDetails(); }       // (used by GCall, not for users)
-
-    detail::GOpaqueU strip() const { return m_ref; }
-
-private:
    // Host type (or Flat type) - the type this GOpaque is actually
    // specified to.
    using HT = typename detail::flatten_g<util::decay_t<T>>::type;

-    static void CTor(detail::OpaqueRef& ref) {
+    GOpaque() { putDetails(); }              // Empty constructor
+    explicit GOpaque(detail::GOpaqueU &&ref) // GOpaqueU-based constructor
+        : m_ref(ref) { putDetails(); }       // (used by GCall, not for users)
+
+    /// @private
+    detail::GOpaqueU strip() const {
+        return m_ref;
+    }
+    /// @private
+    static void Ctor(detail::OpaqueRef& ref) {
        ref.reset<HT>();
-        ref.storeKind<HT>();
    }
+private:
    void putDetails() {
-        m_ref.setConstructFcn(&CTor);
-        m_ref.specifyType<HT>(); // FIXME: to unify those 2 to avoid excessive dynamic_cast
-        m_ref.storeKind<HT>();   //
+        m_ref.setConstructFcn(&Ctor);
+        m_ref.specifyType<HT>();
+        m_ref.storeKind<HT>();
    }

    detail::GOpaqueU m_ref;

--- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp
+++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp
@@ -8,15 +8,99 @@
 #ifndef OPENCV_GAPI_GSTREAMING_COMPILED_HPP
 #define OPENCV_GAPI_GSTREAMING_COMPILED_HPP

+#include <memory>
 #include <vector>

 #include <opencv2/gapi/opencv_includes.hpp>
 #include <opencv2/gapi/own/assert.hpp>
+#include <opencv2/gapi/util/optional.hpp>
 #include <opencv2/gapi/garg.hpp>
 #include <opencv2/gapi/streaming/source.hpp>

 namespace cv {

+template<class T> using optional = cv::util::optional<T>;
+
+namespace detail {
+template<typename T> struct wref_spec {
+    using type = T;
+};
+template<typename T> struct wref_spec<std::vector<T> > {
+    using type = T;
+};
+
+template<typename RefHolder>
+struct OptRef {
+    struct OptHolder {
+        virtual void mov(RefHolder &h) = 0;
+        virtual void reset() = 0;
+        virtual ~OptHolder() = default;
+        using Ptr = std::shared_ptr<OptHolder>;
+    };
+    template<class T> struct Holder final: OptHolder {
+        std::reference_wrapper<cv::optional<T> > m_opt_ref;
+
+        explicit Holder(cv::optional<T>& opt) : m_opt_ref(std::ref(opt)) {
+        }
+        virtual void mov(RefHolder &h) override {
+            using U = typename wref_spec<T>::type;
+            m_opt_ref.get() = cv::util::make_optional(std::move(h.template wref<U>()));
+        }
+        virtual void reset() override {
+            m_opt_ref.get().reset();
+        }
+    };
+    template<class T>
+    explicit OptRef(cv::optional<T>& t) : m_opt{new Holder<T>(t)} {}
+    void mov(RefHolder &h) { m_opt->mov(h); }
+    void reset()           { m_opt->reset();}
+private:
+    typename OptHolder::Ptr m_opt;
+};
+using OptionalVectorRef = OptRef<cv::detail::VectorRef>;
+using OptionalOpaqueRef = OptRef<cv::detail::OpaqueRef>;
+} // namespace detail
+
+// TODO: Keep it in sync with GRunArgP (derive the type automatically?)
+using GOptRunArgP = util::variant<
+    optional<cv::Mat>*,
+    optional<cv::RMat>*,
+    optional<cv::Scalar>*,
+    cv::detail::OptionalVectorRef,
+    cv::detail::OptionalOpaqueRef
+>;
+using GOptRunArgsP = std::vector<GOptRunArgP>;
+
+namespace detail {
+
+template<typename T> inline GOptRunArgP wrap_opt_arg(optional<T>& arg) {
+    // By default, T goes to an OpaqueRef. All other types are specialized
+    return GOptRunArgP{OptionalOpaqueRef(arg)};
+}
+
+template<typename T> inline GOptRunArgP wrap_opt_arg(optional<std::vector<T> >& arg) {
+    return GOptRunArgP{OptionalVectorRef(arg)};
+}
+
+template<> inline GOptRunArgP wrap_opt_arg(optional<cv::Mat> &m) {
+    return GOptRunArgP{&m};
+}
+
+template<> inline GOptRunArgP wrap_opt_arg(optional<cv::Scalar> &s) {
+    return GOptRunArgP{&s};
+}
+
+} // namespace detail
+
+// Now cv::gout() may produce an empty vector (see "dynamic graphs"), so
+// there may be a conflict between these two. State here that Opt version
+// _must_ have at least one input for this overload
+template<typename T, typename... Ts>
+inline GOptRunArgsP gout(optional<T>&arg, optional<Ts>&... args)
+{
+    return GOptRunArgsP{ detail::wrap_opt_arg(arg), detail::wrap_opt_arg(args)... };
+}
+
 /**
 * \addtogroup gapi_main_classes
 * @{
@@ -169,6 +253,44 @@ public:
    // NB: Used from python
    GAPI_WRAP std::tuple<bool, cv::GRunArgs> pull();

+    /**
+     * @brief Get some next available data from the pipeline.
+     *
+     * This method takes a vector of cv::optional object. An object is
+     * assigned to some value if this value is available (ready) at
+     * the time of the call, and resets the object to empty() if it is
+     * not.
+     *
+     * This is a blocking method which guarantees that some data has
+     * been written to the output vector on return.
+     *
+     * Using this method only makes sense if the graph has
+     * desynchronized parts (see cv::gapi::desync). If there is no
+     * desynchronized parts in the graph, the behavior of this
+     * method is identical to the regular pull() (all data objects are
+     * produced synchronously in the output vector).
+     *
+     * Use gout() to create an output parameter vector.
+     *
+     * Output vectors must have the same number of elements as defined
+     * in the cv::GComputation protocol (at the moment of its
+     * construction). Shapes of elements also must conform to protocol
+     * (e.g. cv::optional<cv::Mat> needs to be passed where cv::GMat
+     * has been declared as output, and so on). Run-time exception is
+     * generated on type mismatch.
+     *
+     * This method writes new data into objects passed via output
+     * vector.  If there is no data ready yet, this method blocks. Use
+     * try_pull() if you need a non-blocking version.
+     *
+     * @param outs vector of output parameters to obtain.
+     * @return true if next result has been obtained,
+     *    false marks end of the stream.
+     *
+     * @sa cv::gapi::desync
+     */
+    bool pull(cv::GOptRunArgsP &&outs);
+
    /**
     * @brief Try to get the next processed frame from the pipeline.
     *

--- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
+++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
@@ -191,6 +191,29 @@ namespace detail

    template<typename T> using wrap_gapi_helper = WrapValue<typename std::decay<T>::type>;
    template<typename T> using wrap_host_helper = WrapValue<typename std::decay<g_type_of_t<T> >::type>;
+
+// Union type for various user-defined type constructors (GArray<T>,
+// GOpaque<T>, etc)
+//
+// TODO: Replace construct-only API with a more generic one (probably
+//    with bits of introspection)
+//
+// Not required for non-user-defined types (GMat, GScalar, etc)
+using HostCtor = util::variant
+    < util::monostate
+    , detail::ConstructVec
+    , detail::ConstructOpaque
+    >;
+
+template<typename T> struct GObtainCtor {
+    static HostCtor get() { return HostCtor{}; }
+};
+template<typename T> struct GObtainCtor<GArray<T> > {
+    static HostCtor get() { return HostCtor{ConstructVec{&GArray<T>::VCtor}}; };
+};
+template<typename T> struct GObtainCtor<GOpaque<T> > {
+    static HostCtor get() { return HostCtor{ConstructOpaque{&GOpaque<T>::Ctor}}; };
+};
 } // namespace detail
 } // namespace cv


--- a/modules/gapi/include/opencv2/gapi/infer.hpp
+++ b/modules/gapi/include/opencv2/gapi/infer.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2019-2020 Intel Corporation


 #ifndef OPENCV_GAPI_INFER_HPP
@@ -77,6 +77,9 @@ public:

    using ResultL = std::tuple< cv::GArray<R>... >;
    using APIList = std::function<ResultL(cv::GArray<cv::Rect>, Args...)>;
+
+    // FIXME: Args... must be limited to a single GMat
+    using APIRoi = std::function<Result(cv::GOpaque<cv::Rect>, Args...)>;
 };

 // Single-return-value network definition (specialized base class)
@@ -92,6 +95,9 @@ public:

    using ResultL = cv::GArray<R>;
    using APIList = std::function<ResultL(cv::GArray<cv::Rect>, Args...)>;
+
+    // FIXME: Args... must be limited to a single GMat
+    using APIRoi = std::function<Result(cv::GOpaque<cv::Rect>, Args...)>;
 };

 // APIList2 is also template to allow different calling options
@@ -114,10 +120,10 @@ struct InferAPIList2 {
 // a particular backend, not by a network itself.
 struct GInferBase {
    static constexpr const char * id() {
-        return "org.opencv.dnn.infer";     // Universal stub
+        return "org.opencv.dnn.infer";            // Universal stub
    }
    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
-        return GMetaArgs{};                // One more universal stub
+        return GMetaArgs{};                       // One more universal stub
    }
 };

@@ -164,15 +170,25 @@ private:
    std::shared_ptr<Priv> m_priv;
 };
 /** @} */
+// Base "InferROI" kernel.
+// All notes from "Infer" kernel apply here as well.
+struct GInferROIBase {
+    static constexpr const char * id() {
+        return "org.opencv.dnn.infer-roi";        // Universal stub
+    }
+    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
+        return GMetaArgs{};                       // One more universal stub
+    }
+};

 // Base "Infer list" kernel.
 // All notes from "Infer" kernel apply here as well.
 struct GInferListBase {
    static constexpr const char * id() {
-        return "org.opencv.dnn.infer-roi";      // Universal stub
+        return "org.opencv.dnn.infer-roi-list-1"; // Universal stub
    }
    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
-        return GMetaArgs{};                     // One more universal stub
+        return GMetaArgs{};                       // One more universal stub
    }
 };

@@ -180,10 +196,10 @@ struct GInferListBase {
 // All notes from "Infer" kernel apply here as well.
 struct GInferList2Base {
    static constexpr const char * id() {
-        return "org.opencv.dnn.infer-roi-list"; // Universal stub
+        return "org.opencv.dnn.infer-roi-list-2"; // Universal stub
    }
    static GMetaArgs getOutMeta(const GMetaArgs &, const GArgs &) {
-        return GMetaArgs{};                     // One more universal stub
+        return GMetaArgs{};                       // One more universal stub
    }
 };

@@ -200,6 +216,19 @@ struct GInfer final
    static constexpr const char* tag() { return Net::tag(); }
 };

+// A specific roi-inference kernel. API (::on()) is fixed here and
+// verified against Net.
+template<typename Net>
+struct GInferROI final
+    : public GInferROIBase
+    , public detail::KernelTypeMedium< GInferROI<Net>
+                                     , typename Net::APIRoi > {
+    using GInferROIBase::getOutMeta; // FIXME: name lookup conflict workaround?
+
+    static constexpr const char* tag() { return Net::tag(); }
+};
+
+
 // A generic roi-list inference kernel. API (::on()) is derived from
 // the Net template parameter (see more in infer<> overload).
 template<typename Net>
@@ -238,6 +267,23 @@ struct GInferList2 final
 namespace cv {
 namespace gapi {

+/** @brief Calculates response for the specified network (template
+ *     parameter) for the specified region in the source image.
+ *     Currently expects a single-input network only.
+ *
+ * @tparam A network type defined with G_API_NET() macro.
+ * @param in input image where to take ROI from.
+ * @param roi an object describing the region of interest
+ *   in the source image. May be calculated in the same graph dynamically.
+ * @return an object of return type as defined in G_API_NET().
+ *   If a network has multiple return values (defined with a tuple), a tuple of
+ *   objects of appropriate type is returned.
+ * @sa  G_API_NET()
+ */
+template<typename Net>
+typename Net::Result infer(cv::GOpaque<cv::Rect> roi, cv::GMat in) {
+    return GInferROI<Net>::on(roi, in);
+}

 /** @brief Calculates responses for the specified network (template
 *     parameter) for every region in the source image.
@@ -328,7 +374,8 @@ infer(const std::string& tag, const GInferInputs& inputs)
                tag,
                GInferBase::getOutMeta,
                {}, // outShape will be filled later
-                std::move(kinds)
+                std::move(kinds),
+                {}, // outCtors will be filled later
            });

    call->setArgs(std::move(input_args));

--- a/modules/gapi/include/opencv2/gapi/streaming/desync.hpp
+++ b/modules/gapi/include/opencv2/gapi/streaming/desync.hpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GSTREAMING_DESYNC_HPP
+#define OPENCV_GAPI_GSTREAMING_DESYNC_HPP
+
+#include <tuple>
+
+#include <opencv2/gapi/util/util.hpp>
+#include <opencv2/gapi/gtype_traits.hpp>
+#include <opencv2/gapi/garg.hpp>
+#include <opencv2/gapi/gcall.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+
+namespace cv {
+namespace gapi {
+namespace streaming {
+
+namespace detail {
+struct GDesync {
+    static const char *id() {
+        return "org.opencv.streaming.desync";
+    }
+
+    // An universal yield for desync.
+    // Yields output objects according to the input Types...
+    // Reuses gkernel machinery.
+    // FIXME: This function can be generic and declared in gkernel.hpp
+    //        (it is there already, but a part of GKernelType[M]
+    template<typename... R, int... IIs>
+    static std::tuple<R...> yield(cv::GCall &call, cv::detail::Seq<IIs...>) {
+        return std::make_tuple(cv::detail::Yield<R>::yield(call, IIs)...);
+    }
+};
+
+template<typename G>
+G desync(const G &g) {
+    cv::GKernel k{
+          GDesync::id()                                     // kernel id
+        , ""                                                // kernel tag
+        , [](const GMetaArgs &a, const GArgs &) {return a;} // outMeta callback
+        , {cv::detail::GTypeTraits<G>::shape}               // output Shape
+        , {cv::detail::GTypeTraits<G>::op_kind}             // input data kinds
+        , {cv::detail::GObtainCtor<G>::get()}               // output template ctors
+    };
+    cv::GCall call(std::move(k));
+    call.pass(g);
+    return std::get<0>(GDesync::yield<G>(call, cv::detail::MkSeq<1>::type()));
+}
+} // namespace detail
+
+/**
+ * @brief Starts a desynchronized branch in the graph.
+ *
+ * This operation takes a single G-API data object and returns a
+ * graph-level "duplicate" of this object.
+ *
+ * Operations which use this data object can be desynchronized
+ * from the rest of the graph.
+ *
+ * This operation has no effect when a GComputation is compiled with
+ * regular cv::GComputation::compile(), since cv::GCompiled objects
+ * always produce their full output vectors.
+ *
+ * This operation only makes sense when a GComputation is compiled in
+ * straming mode with cv::GComputation::compileStreaming(). If this
+ * operation is used and there are desynchronized outputs, the user
+ * should use a special version of cv::GStreamingCompiled::pull()
+ * which produces an array of cv::util::optional<> objects.
+ *
+ * @note This feature is highly experimental now and is currently
+ * limited to a single GMat argument only.
+ */
+GAPI_EXPORTS GMat desync(const GMat &g);
+
+} // namespace streaming
+} // namespace gapi
+} // namespace cv
+
+#endif // OPENCV_GAPI_GSTREAMING_DESYNC_HPP
--- a/modules/gapi/samples/infer_single_roi.cpp
+++ b/modules/gapi/samples/infer_single_roi.cpp
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+
+#include <opencv2/imgproc.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/gapi.hpp>
+#include <opencv2/gapi/core.hpp>
+#include <opencv2/gapi/imgproc.hpp>
+#include <opencv2/gapi/infer.hpp>
+#include <opencv2/gapi/render.hpp>
+#include <opencv2/gapi/infer/ie.hpp>
+#include <opencv2/gapi/cpu/gcpukernel.hpp>
+#include <opencv2/gapi/streaming/cap.hpp>
+#include <opencv2/highgui.hpp>
+
+const std::string keys =
+    "{ h help |                              | Print this help message }"
+    "{ input  |                              | Path to the input video file }"
+    "{ facem  | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }"
+    "{ faced  | CPU                          | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"
+    "{ r roi  | -1,-1,-1,-1                  | Region of interest (ROI) to use for inference. Identified automatically when not set }";
+
+namespace {
+
+std::string weights_path(const std::string &model_path) {
+    const auto EXT_LEN = 4u;
+    const auto sz = model_path.size();
+    CV_Assert(sz > EXT_LEN);
+
+    auto ext = model_path.substr(sz - EXT_LEN);
+    std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){
+            return static_cast<unsigned char>(std::tolower(c));
+        });
+    CV_Assert(ext == ".xml");
+    return model_path.substr(0u, sz - EXT_LEN) + ".bin";
+}
+
+cv::util::optional<cv::Rect> parse_roi(const std::string &rc) {
+    cv::Rect rv;
+    char delim[3];
+
+    std::stringstream is(rc);
+    is >> rv.x >> delim[0] >> rv.y >> delim[1] >> rv.width >> delim[2] >> rv.height;
+    if (is.bad()) {
+        return cv::util::optional<cv::Rect>(); // empty value
+    }
+    const auto is_delim = [](char c) {
+        return c == ',';
+    };
+    if (!std::all_of(std::begin(delim), std::end(delim), is_delim)) {
+        return cv::util::optional<cv::Rect>(); // empty value
+
+    }
+    if (rv.x < 0 || rv.y < 0 || rv.width <= 0 || rv.height <= 0) {
+        return cv::util::optional<cv::Rect>(); // empty value
+    }
+    return cv::util::make_optional(std::move(rv));
+}
+
+} // namespace
+
+namespace custom {
+
+G_API_NET(FaceDetector,   <cv::GMat(cv::GMat)>, "face-detector");
+
+using GDetections = cv::GArray<cv::Rect>;
+using GRect       = cv::GOpaque<cv::Rect>;
+using GSize       = cv::GOpaque<cv::Size>;
+using GPrims      = cv::GArray<cv::gapi::wip::draw::Prim>;
+
+G_API_OP(GetSize, <GSize(cv::GMat)>, "sample.custom.get-size") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+
+G_API_OP(LocateROI, <GRect(cv::GMat)>, "sample.custom.locate-roi") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+
+G_API_OP(ParseSSD, <GDetections(cv::GMat, GRect, GSize)>, "sample.custom.parse-ssd") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+G_API_OP(BBoxes, <GPrims(GDetections, GRect)>, "sample.custom.b-boxes") {
+    static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GOpaqueDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+GAPI_OCV_KERNEL(OCVGetSize, GetSize) {
+    static void run(const cv::Mat &in, cv::Size &out) {
+        out = {in.cols, in.rows};
+    }
+};
+
+GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) {
+    // This is the place where we can run extra analytics
+    // on the input image frame and select the ROI (region
+    // of interest) where we want to detect our objects (or
+    // run any other inference).
+    //
+    // Currently it doesn't do anything intelligent,
+    // but only crops the input image to square (this is
+    // the most convenient aspect ratio for detectors to use)
+
+    static void run(const cv::Mat &in_mat, cv::Rect &out_rect) {
+
+        // Identify the central point & square size (- some padding)
+        const auto center = cv::Point{in_mat.cols/2, in_mat.rows/2};
+        auto sqside = std::min(in_mat.cols, in_mat.rows);
+
+        // Now build the central square ROI
+        out_rect = cv::Rect{ center.x - sqside/2
+                           , center.y - sqside/2
+                           , sqside
+                           , sqside
+                           };
+    }
+};
+
+GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) {
+    static void run(const cv::Mat &in_ssd_result,
+                    const cv::Rect &in_roi,
+                    const cv::Size &in_parent_size,
+                    std::vector<cv::Rect> &out_objects) {
+        const auto &in_ssd_dims = in_ssd_result.size;
+        CV_Assert(in_ssd_dims.dims() == 4u);
+
+        const int MAX_PROPOSALS = in_ssd_dims[2];
+        const int OBJECT_SIZE   = in_ssd_dims[3];
+        CV_Assert(OBJECT_SIZE  == 7); // fixed SSD object size
+
+        const cv::Size up_roi = in_roi.size();
+        const cv::Rect surface({0,0}, in_parent_size);
+
+        out_objects.clear();
+
+        const float *data = in_ssd_result.ptr<float>();
+        for (int i = 0; i < MAX_PROPOSALS; i++) {
+            const float image_id   = data[i * OBJECT_SIZE + 0];
+            const float label      = data[i * OBJECT_SIZE + 1];
+            const float confidence = data[i * OBJECT_SIZE + 2];
+            const float rc_left    = data[i * OBJECT_SIZE + 3];
+            const float rc_top     = data[i * OBJECT_SIZE + 4];
+            const float rc_right   = data[i * OBJECT_SIZE + 5];
+            const float rc_bottom  = data[i * OBJECT_SIZE + 6];
+            (void) label; // unused
+
+            if (image_id < 0.f) {
+                break;    // marks end-of-detections
+            }
+            if (confidence < 0.5f) {
+                continue; // skip objects with low confidence
+            }
+
+            // map relative coordinates to the original image scale
+            // taking the ROI into account
+            cv::Rect rc;
+            rc.x      = static_cast<int>(rc_left   * up_roi.width);
+            rc.y      = static_cast<int>(rc_top    * up_roi.height);
+            rc.width  = static_cast<int>(rc_right  * up_roi.width)  - rc.x;
+            rc.height = static_cast<int>(rc_bottom * up_roi.height) - rc.y;
+            rc.x += in_roi.x;
+            rc.y += in_roi.y;
+            out_objects.emplace_back(rc & surface);
+        }
+    }
+};
+
+GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) {
+    // This kernel converts the rectangles into G-API's
+    // rendering primitives
+    static void run(const std::vector<cv::Rect> &in_face_rcs,
+                    const             cv::Rect  &in_roi,
+                          std::vector<cv::gapi::wip::draw::Prim> &out_prims) {
+        out_prims.clear();
+        const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) {
+            return cv::gapi::wip::draw::Rect(rc, clr, 2);
+        };
+        out_prims.emplace_back(cvt(in_roi, CV_RGB(0,255,255))); // cyan
+        for (auto &&rc : in_face_rcs) {
+            out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0)));   // green
+        }
+    }
+};
+
+} // namespace custom
+
+int main(int argc, char *argv[])
+{
+    cv::CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help")) {
+        cmd.printMessage();
+        return 0;
+    }
+
+    // Prepare parameters first
+    const std::string input = cmd.get<std::string>("input");
+    const auto opt_roi = parse_roi(cmd.get<std::string>("roi"));
+
+    const auto face_model_path = cmd.get<std::string>("facem");
+    auto face_net = cv::gapi::ie::Params<custom::FaceDetector> {
+        face_model_path,                 // path to topology IR
+        weights_path(face_model_path),   // path to weights
+        cmd.get<std::string>("faced"),   // device specifier
+    };
+    auto kernels = cv::gapi::kernels
+        < custom::OCVGetSize
+        , custom::OCVLocateROI
+        , custom::OCVParseSSD
+        , custom::OCVBBoxes>();
+    auto networks = cv::gapi::networks(face_net);
+
+    // Now build the graph. The graph structure may vary
+    // pased on the input parameters
+    cv::GStreamingCompiled pipeline;
+    auto inputs = cv::gin(cv::gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(input));
+
+    if (opt_roi.has_value()) {
+        // Use the value provided by user
+        std::cout << "Will run inference for static region "
+                  << opt_roi.value()
+                  << " only"
+                  << std::endl;
+        cv::GMat in;
+        cv::GOpaque<cv::Rect> in_roi;
+        auto blob = cv::gapi::infer<custom::FaceDetector>(in_roi, in);
+        auto  rcs = custom::ParseSSD::on(blob, in_roi, custom::GetSize::on(in));
+        auto  out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, in_roi));
+        pipeline  = cv::GComputation(cv::GIn(in, in_roi), cv::GOut(out))
+            .compileStreaming(cv::compile_args(kernels, networks));
+
+        // Since the ROI to detect is manual, make it part of the input vector
+        inputs.push_back(cv::gin(opt_roi.value())[0]);
+    } else {
+        // Automatically detect ROI to infer. Make it output parameter
+        std::cout << "ROI is not set or invalid. Locating it automatically"
+                  << std::endl;
+        cv::GMat in;
+        cv::GOpaque<cv::Rect> roi = custom::LocateROI::on(in);
+        auto blob = cv::gapi::infer<custom::FaceDetector>(roi, in);
+        auto  rcs = custom::ParseSSD::on(blob, roi, custom::GetSize::on(in));
+        auto  out = cv::gapi::wip::draw::render3ch(in, custom::BBoxes::on(rcs, roi));
+        pipeline  = cv::GComputation(cv::GIn(in), cv::GOut(out))
+            .compileStreaming(cv::compile_args(kernels, networks));
+    }
+
+    // The execution part
+    pipeline.setSource(std::move(inputs));
+    pipeline.start();
+
+    cv::Mat out;
+    while (pipeline.pull(cv::gout(out))) {
+        cv::imshow("Out", out);
+        cv::waitKey(1);
+    }
+    return 0;
+}
--- a/modules/gapi/src/api/gbackend.cpp
+++ b/modules/gapi/src/api/gbackend.cpp
@@ -67,6 +67,21 @@ cv::gapi::GKernelPackage cv::gapi::GBackend::Priv::auxiliaryKernels() const
    return {};
 }

+bool cv::gapi::GBackend::Priv::controlsMerge() const
+{
+    return false;
+}
+
+bool cv::gapi::GBackend::Priv::allowsMerge(const cv::gimpl::GIslandModel::Graph &,
+                                           const ade::NodeHandle &,
+                                           const ade::NodeHandle &,
+                                           const ade::NodeHandle &) const
+{
+    GAPI_Assert(controlsMerge());
+    return true;
+}
+
+
 // GBackend public implementation //////////////////////////////////////////////
 cv::gapi::GBackend::GBackend()
 {

--- a/modules/gapi/src/api/gbackend_priv.hpp
+++ b/modules/gapi/src/api/gbackend_priv.hpp
@@ -19,7 +19,7 @@
 #include "opencv2/gapi/gkernel.hpp"

 #include "compiler/gmodel.hpp"
-
+#include "compiler/gislandmodel.hpp"

 namespace cv
 {
@@ -68,6 +68,22 @@ public:

    virtual cv::gapi::GKernelPackage auxiliaryKernels() const;

+    // Ask backend if it has a custom control over island fusion process
+    // This method is quite redundant but there's nothing better fits
+    // the current fusion process. By default, [existing] backends don't
+    // control the merge.
+    // FIXME: Refactor to a single entity?
+    virtual bool controlsMerge() const;
+
+    // Ask backend if it is ok to merge these two islands connected
+    // via a data slot. By default, [existing] backends allow to merge everything.
+    // FIXME: Refactor to a single entity?
+    // FIXME: Strip down the type details form graph? (make it ade::Graph?)
+    virtual bool allowsMerge(const cv::gimpl::GIslandModel::Graph &g,
+                             const ade::NodeHandle &a_nh,
+                             const ade::NodeHandle &slot_nh,
+                             const ade::NodeHandle &b_nh) const;
+
    virtual ~Priv() = default;
 };


--- a/modules/gapi/src/api/ginfer.cpp
+++ b/modules/gapi/src/api/ginfer.cpp
@@ -70,7 +70,10 @@ cv::GMat cv::GInferOutputs::at(const std::string& name)
    auto it = m_priv->out_blobs.find(name);
    if (it == m_priv->out_blobs.end()) {
        // FIXME: Avoid modifying GKernel
+        // Expect output to be always GMat
        m_priv->call->kernel().outShapes.push_back(cv::GShape::GMAT);
+        // ...so _empty_ constructor is passed here.
+        m_priv->call->kernel().outCtors.emplace_back(cv::util::monostate{});
        int out_idx = static_cast<int>(m_priv->out_blobs.size());
        it = m_priv->out_blobs.emplace(name, m_priv->call->yield(out_idx)).first;
        m_priv->info->out_names.push_back(name);

--- a/modules/gapi/src/api/kernels_streaming.cpp
+++ b/modules/gapi/src/api/kernels_streaming.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "precomp.hpp"
+
+#include <opencv2/gapi/streaming/desync.hpp>
+#include <opencv2/gapi/core.hpp>
+
+cv::GMat cv::gapi::streaming::desync(const cv::GMat &g) {
+    // FIXME: this is a limited implementation of desync
+    // The real implementation must be generic (template) and
+    // reside in desync.hpp (and it is detail::desync<>())
+
+    // FIXME: Put a copy here to solve the below problem
+    // FIXME: Because of the copy, the desync functionality is limited
+    // to GMat only (we don't have generic copy kernel for other
+    // object types)
+    return cv::gapi::copy(detail::desync(g));
+
+    // FIXME
+    //
+    // If consumed by multiple different islands (OCV and Fluid by
+    // example, an object needs to be desynchronized individually
+    // for every path.
+    //
+    // This is a limitation of the current implementation. It works
+    // this way: every "desync" link from the main path to a new
+    // desync path gets its "DesyncQueue" object which stores only the
+    // last value written before of the desync object (DO) it consumes
+    // (the container of type "last written value" or LWV.
+    //
+    //                         LWV
+    // [Sync path] -> desync() - - > DO -> [ISL0 @ Desync path #1]
+    //
+    // At the same time, generally, every island in the streaming
+    // graph gets its individual input as a queue (so normally, a
+    // writer pushes the same output MULTIPLE TIMES if it has mutliple
+    // readers):
+    //
+    //                         LWV
+    // [Sync path] -> desync() - - > DO1 -> [ISL0 @ Desync path #1]
+    //                       : LWV
+    //                       ' - - > DO2 -> [ISL1 @ Desync path #1]
+    //
+    // For users, it may seem legit to use desync here only once, and
+    // it MUST BE legit once the problem is fixed.
+    // But the problem with the current implementation is that islands
+    // on the same desync path get different desync queues and in fact
+    // stay desynchronized between each other. One shouldn't consider
+    // this as a single desync path anymore.
+    // If these two ISLs are then merged e.g. with add(a,b), the
+    // results will be inconsistent, given that the latency of ISL0
+    // and ISL1 may be different. This is not the same frame anymore
+    // coming as `a` and `b` to add(a,b) because of it.
+    //
+    // To make things clear, we forbid this now and ask to call
+    // desync one more time to allow that. It is bad since the graph
+    // structure and island layout depends on kernel packages used,
+    // not on the sole GComputation structure. This needs to be fixed!
+    // Here's the working configuration:
+    //
+    //                         LWV
+    // [Sync path] -> desync() - - > DO1 -> [ISL0 @ Desync path #1]
+    //            :            LWV
+    //            '-> desync() - - > DO2 -> [ISL1 @ Desync path #2] <-(!)
+    //
+    // Put an operation right after desync() is a quick workaround to
+    // this synchronization problem. There will be one "last_written_value"
+    // connected to a desynchronized data object, and this sole last_written_value
+    // object will feed both branches of the streaming executable.
+}
--- a/modules/gapi/src/backends/ie/giebackend.cpp
+++ b/modules/gapi/src/backends/ie/giebackend.cpp
@@ -519,6 +519,65 @@ struct Infer: public cv::detail::KernelTag {
    }
 };

+struct InferROI: public cv::detail::KernelTag {
+    using API = cv::GInferROIBase;
+    static cv::gapi::GBackend backend()  { return cv::gapi::ie::backend(); }
+    static KImpl kernel()                { return KImpl{outMeta, run}; }
+
+    static cv::GMetaArgs outMeta(const ade::Graph      &gr,
+                                 const ade::NodeHandle &nh,
+                                 const cv::GMetaArgs   &in_metas,
+                                 const cv::GArgs       &/*in_args*/) {
+        cv::GMetaArgs result;
+
+        GConstGIEModel gm(gr);
+        const auto &uu = gm.metadata(nh).get<IEUnit>();
+
+        // Initialize input information
+        // FIXME: So far it is pretty limited
+        GAPI_Assert(1u == uu.params.input_names.size());
+        GAPI_Assert(2u == in_metas.size());
+
+        // 0th is ROI, 1st is in0put image
+        auto       &&ii = uu.inputs.at(uu.params.input_names.at(0));
+        const auto &meta = util::get<cv::GMatDesc>(in_metas.at(1));
+        ii->setPrecision(toIE(meta.depth));
+        ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR);
+
+        // FIXME: It would be nice here to have an exact number of network's
+        // input/output parameters. Probably GCall should store it here for us.
+        // It doesn't, as far as I know..
+        for (const auto &out_name : uu.params.output_names) {
+            // NOTE: our output_names vector follows the API order
+            // of this operation's outputs
+            const IE::DataPtr& ie_out = uu.outputs.at(out_name);
+            const IE::SizeVector dims = ie_out->getTensorDesc().getDims();
+
+            cv::GMatDesc outm(toCV(ie_out->getPrecision()),
+                              toCV(ie_out->getTensorDesc().getDims()));
+            result.emplace_back(outm);
+        }
+        return result;
+    }
+
+    static void run(IECompiled &iec, const IEUnit &uu, IECallContext &ctx) {
+        // non-generic version for now, per the InferROI's definition
+        GAPI_Assert(uu.params.num_in == 1);
+        const auto& this_roi = ctx.inArg<cv::detail::OpaqueRef>(0).rref<cv::Rect>();
+        const auto  this_mat = ctx.inMat(1);
+        IE::Blob::Ptr this_blob = wrapIE(this_mat, cv::gapi::ie::TraitAs::IMAGE);
+        IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(this_roi));
+        iec.this_request.SetBlob(*uu.params.input_names.begin(), roi_blob);
+        iec.this_request.Infer();
+        for (auto i : ade::util::iota(uu.params.num_out)) {
+            cv::Mat& out_mat = ctx.outMatR(i);
+            IE::Blob::Ptr out_blob = iec.this_request.GetBlob(uu.params.output_names[i]);
+            copyFromIE(out_blob, out_mat);
+        }
+    }
+};
+
+
 struct InferList: public cv::detail::KernelTag {
    using API = cv::GInferListBase;
    static cv::gapi::GBackend backend()  { return cv::gapi::ie::backend(); }
@@ -780,6 +839,7 @@ namespace {

        virtual cv::gapi::GKernelPackage auxiliaryKernels() const override {
            return cv::gapi::kernels< cv::gimpl::ie::Infer
+                                    , cv::gimpl::ie::InferROI
                                    , cv::gimpl::ie::InferList
                                    , cv::gimpl::ie::InferList2
                                    >();

--- a/modules/gapi/src/backends/ocl/goclbackend.cpp
+++ b/modules/gapi/src/backends/ocl/goclbackend.cpp
@@ -272,4 +272,8 @@ void cv::gimpl::GOCLExecutable::run(std::vector<InObj>  &&input_objs,
            GAPI_Assert((out_arg_data == (mag_mat.getMat(ACCESS_RW).data)) && " data for output parameters was reallocated ?");
        }
    }
+
+    // In/Out args clean-up is mandatory now with RMat
+    for (auto &it : input_objs) magazine::unbind(m_res, it.first);
+    for (auto &it : output_objs) magazine::unbind(m_res, it.first);
 }
--- a/modules/gapi/src/compiler/gcompiler.cpp
+++ b/modules/gapi/src/compiler/gcompiler.cpp
@@ -238,6 +238,11 @@ cv::gimpl::GCompiler::GCompiler(const cv::GComputation &c,
                                                      // (no compound backend present here)
    m_e.addPass("kernels", "check_islands_content", passes::checkIslandsContent);

+    // Special stage for intrinsics handling
+    m_e.addPassStage("intrin");
+    m_e.addPass("intrin", "desync",         passes::intrinDesync);
+    m_e.addPass("intrin", "finalizeIntrin", passes::intrinFinalize);
+
    //Input metas may be empty when a graph is compiled for streaming
    m_e.addPassStage("meta");
    if (!m_metas.empty())
@@ -384,6 +389,9 @@ cv::gimpl::GCompiler::GPtr cv::gimpl::GCompiler::generateGraph()
    {
        GModel::Graph(*g).metadata().set(OriginalInputMeta{m_metas});
    }
+    // FIXME: remove m_args, remove GCompileArgs from backends' method signatures,
+    // rework backends to access GCompileArgs from graph metadata
+    GModel::Graph(*g).metadata().set(CompileArgs{m_args});
    return g;
 }


--- a/modules/gapi/src/compiler/gislandmodel.cpp
+++ b/modules/gapi/src/compiler/gislandmodel.cpp
@@ -175,13 +175,26 @@ void GIslandModel::generateInitial(GIslandModel::Graph &g,
        {
            auto src_data_nh = in_edge->srcNode();
            auto isl_slot_nh = data_to_slot.at(src_data_nh);
-            g.link(isl_slot_nh, nh); // no other data stored yet
+            auto isl_new_eh  = g.link(isl_slot_nh, nh); // no other data stored yet
+            // Propagate some special metadata from the GModel to GIslandModel
+            // TODO: Make it a single place (a function) for both inputs/outputs?
+            // (since it is duplicated in the below code block)
+            if (src_g.metadata(in_edge).contains<DesyncEdge>())
+            {
+                const auto idx = src_g.metadata(in_edge).get<DesyncEdge>().index;
+                g.metadata(isl_new_eh).set(DesyncIslEdge{idx});
+            }
        }
        for (auto out_edge : src_op_nh->outEdges())
        {
            auto dst_data_nh = out_edge->dstNode();
            auto isl_slot_nh = data_to_slot.at(dst_data_nh);
-            g.link(nh, isl_slot_nh);
+            auto isl_new_eh  = g.link(nh, isl_slot_nh);
+            if (src_g.metadata(out_edge).contains<DesyncEdge>())
+            {
+                const auto idx = src_g.metadata(out_edge).get<DesyncEdge>().index;
+                g.metadata(isl_new_eh).set(DesyncIslEdge{idx});
+            }
        }
    } // for(all_operations)
 }
@@ -254,6 +267,9 @@ void GIslandModel::syncIslandTags(Graph &g, ade::Graph &orig_g)
 void GIslandModel::compileIslands(Graph &g, const ade::Graph &orig_g, const GCompileArgs &args)
 {
    GModel::ConstGraph gm(orig_g);
+    if (gm.metadata().contains<HasIntrinsics>()) {
+        util::throw_error(std::logic_error("FATAL: The graph has unresolved intrinsics"));
+    }

    auto original_sorted = gm.metadata().get<ade::passes::TopologicalSortData>();
    for (auto nh : g.nodes())

--- a/modules/gapi/src/compiler/gislandmodel.hpp
+++ b/modules/gapi/src/compiler/gislandmodel.hpp
@@ -142,6 +142,14 @@ public:
    // at that stage.
    virtual void handleNewStream() {}; // do nothing here by default

+    // This method is called for every IslandExecutable when
+    // the stream-based execution is stopped.
+    // All processing is guaranteed to be stopped by this moment,
+    // with no pending or running 'run()' processes ran in background.
+    // FIXME: This method is tightly bound to the GStreamingExecutor
+    // now.
+    virtual void handleStopStream() {} // do nothing here by default
+
    virtual ~GIslandExecutable() = default;
 };

@@ -222,8 +230,19 @@ struct IslandsCompiled
    static const char *name() { return "IslandsCompiled"; }
 };

+// This flag marks an edge in an GIslandModel as "desynchronized"
+// i.e. it starts a new desynchronized subgraph
+struct DesyncIslEdge
+{
+    static const char *name() { return "DesynchronizedIslandEdge"; }
+
+    // Projection from GModel/DesyncEdge.index
+    int index;
+};
+
 namespace GIslandModel
 {
+
    using Graph = ade::TypedGraph
        < NodeKind
        , FusedIsland
@@ -232,6 +251,7 @@ namespace GIslandModel
        , Emitter
        , Sink
        , IslandsCompiled
+        , DesyncIslEdge
        , ade::passes::TopologicalSortData
        >;

@@ -244,6 +264,7 @@ namespace GIslandModel
        , Emitter
        , Sink
        , IslandsCompiled
+        , DesyncIslEdge
        , ade::passes::TopologicalSortData
        >;


--- a/modules/gapi/src/compiler/gmodel.cpp
+++ b/modules/gapi/src/compiler/gmodel.cpp
@@ -77,7 +77,7 @@ ade::NodeHandle GModel::mkDataNode(GModel::Graph &g, const GShape shape)
    return data_h;
 }

-void GModel::linkIn(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t in_port)
+ade::EdgeHandle GModel::linkIn(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t in_port)
 {
    // Check if input is already connected
    for (const auto& in_e : opH->inEdges())
@@ -96,9 +96,11 @@ void GModel::linkIn(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::si

    // Replace an API object with a REF (G* -> GOBJREF)
    op.args[in_port] = cv::GArg(RcDesc{gm.rc, gm.shape, {}});
+
+    return eh;
 }

-void GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t out_port)
+ade::EdgeHandle GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::size_t out_port)
 {
    // FIXME: check validity using kernel prototype

@@ -121,6 +123,8 @@ void GModel::linkOut(Graph &g, ade::NodeHandle opH, ade::NodeHandle objH, std::s
    const auto min_out_size = std::max(op.outs.size(), storage_with_port);
    op.outs.resize(min_out_size, RcDesc{-1,GShape::GMAT,{}}); // FIXME: Invalid shape instead?
    op.outs[out_port] = RcDesc{gm.rc, gm.shape, {}};
+
+    return eh;
 }

 std::vector<ade::NodeHandle> GModel::orderedInputs(const ConstGraph &g, ade::NodeHandle nh)
@@ -210,26 +214,29 @@ ade::NodeHandle GModel::detail::dataNodeOf(const ConstLayoutGraph &g, const GOri
    return g.metadata().get<Layout>().object_nodes.at(origin);
 }

-void GModel::redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
+std::vector<ade::EdgeHandle> GModel::redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
 {
    std::vector<ade::EdgeHandle> ehh(from->outEdges().begin(), from->outEdges().end());
+    std::vector<ade::EdgeHandle> ohh;
+    ohh.reserve(ehh.size());
    for (auto e : ehh)
    {
        auto dst = e->dstNode();
        auto input = g.metadata(e).get<Input>();
        g.erase(e);
-        linkIn(g, dst, to, input.port);
+        ohh.push_back(linkIn(g, dst, to, input.port));
    }
+    return ohh;
 }

-void GModel::redirectWriter(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
+ade::EdgeHandle GModel::redirectWriter(Graph &g, ade::NodeHandle from, ade::NodeHandle to)
 {
    GAPI_Assert(from->inEdges().size() == 1);
    auto e = from->inEdges().front();
    auto op = e->srcNode();
    auto output = g.metadata(e).get<Output>();
    g.erase(e);
-    linkOut(g, op, to, output.port);
+    return linkOut(g, op, to, output.port);
 }

 GMetaArgs GModel::collectInputMeta(const GModel::ConstGraph &cg, ade::NodeHandle node)

--- a/modules/gapi/src/compiler/gmodel.hpp
+++ b/modules/gapi/src/compiler/gmodel.hpp
@@ -211,6 +211,58 @@ struct CustomMetaFunction
    CM customOutMeta;
 };

+// This is a general flag indicating that this GModel has intrinsics.
+// In the beginning of the compilation, it is a quick check to
+// indicate there are intrinsics.
+//
+// In the end of the compilation, having this flag is fatal -- all
+// intrinsics must be resolved.
+struct HasIntrinsics
+{
+    static const char *name() { return "HasIntrinsicsFlag"; }
+};
+
+// This is a special tag for both DATA and OP nodes indicating
+// which desynchronized path this node belongs to.
+// This tag is set by a special complex pass intrinDesync/accept.
+struct DesyncPath
+{
+    static const char *name() { return "DesynchronizedPath"; }
+
+    // A zero-based index of the desynchronized path in the graph.
+    // Set by intrinDesync() compiler pass
+    int index;
+};
+
+// This is a special tag for graph Edges indicating that this
+// particular edge starts a desynchronized path in the graph.
+// At the execution stage, the data coming "through" these edges
+// (virtually, of course, since our GModel edges never transfer the
+// actual data, they just represent these transfers) is desynchronized
+// from the rest of the pipeline, i.e. may be "lost" (stay unconsumed
+// and then overwritten with some new data when streaming).
+struct DesyncEdge
+{
+    static const char *name() { return "DesynchronizedEdge"; }
+
+    // A zero-based index of the desynchronized path in the graph.
+    // Set by intrinDesync/apply() compiler pass
+    int index;
+};
+
+// This flag marks the island graph as "desynchronized"
+struct Desynchronized
+{
+    static const char *name() { return "Desynchronized"; }
+};
+
+// Reference to compile args of the computation
+struct CompileArgs
+{
+    static const char *name() { return "CompileArgs"; }
+    GCompileArgs args;
+};
+
 namespace GModel
 {
    using Graph = ade::TypedGraph
@@ -232,6 +284,11 @@ namespace GModel
        , CustomMetaFunction
        , Streaming
        , Deserialized
+        , HasIntrinsics
+        , DesyncPath
+        , DesyncEdge
+        , Desynchronized
+        , CompileArgs
        >;

    // FIXME: How to define it based on GModel???
@@ -254,6 +311,11 @@ namespace GModel
        , CustomMetaFunction
        , Streaming
        , Deserialized
+        , HasIntrinsics
+        , DesyncPath
+        , DesyncEdge
+        , Desynchronized
+        , CompileArgs
        >;

    // FIXME:
@@ -278,11 +340,11 @@ namespace GModel
    // Clears logged messages of a node.
    GAPI_EXPORTS void log_clear(Graph &g, ade::NodeHandle node);

-    GAPI_EXPORTS void linkIn   (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t in_port);
-    GAPI_EXPORTS void linkOut  (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t out_port);
+    GAPI_EXPORTS ade::EdgeHandle linkIn   (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t in_port);
+    GAPI_EXPORTS ade::EdgeHandle linkOut  (Graph &g, ade::NodeHandle op,     ade::NodeHandle obj, std::size_t out_port);

-    GAPI_EXPORTS void redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to);
-    GAPI_EXPORTS void redirectWriter (Graph &g, ade::NodeHandle from, ade::NodeHandle to);
+    GAPI_EXPORTS std::vector<ade::EdgeHandle> redirectReaders(Graph &g, ade::NodeHandle from, ade::NodeHandle to);
+    GAPI_EXPORTS             ade::EdgeHandle  redirectWriter (Graph &g, ade::NodeHandle from, ade::NodeHandle to);

    GAPI_EXPORTS std::vector<ade::NodeHandle> orderedInputs (const ConstGraph &g, ade::NodeHandle nh);
    GAPI_EXPORTS std::vector<ade::NodeHandle> orderedOutputs(const ConstGraph &g, ade::NodeHandle nh);

--- a/modules/gapi/src/compiler/gmodelbuilder.cpp
+++ b/modules/gapi/src/compiler/gmodelbuilder.cpp
@@ -134,12 +134,19 @@ cv::gimpl::Unrolled cv::gimpl::unrollExpr(const GProtoArgs &ins,

                // Put the outputs object description of the node
                // so that they are not lost if they are not consumed by other operations
+                GAPI_Assert(call_p.m_k.outCtors.size() == call_p.m_k.outShapes.size());
                for (const auto &it : ade::util::indexed(call_p.m_k.outShapes))
                {
                    std::size_t port  = ade::util::index(it);
                    GShape shape      = ade::util::value(it);

-                    GOrigin org { shape, node, port, {}, origin.kind };
+                    // FIXME: then use ZIP
+                    HostCtor ctor     = call_p.m_k.outCtors[port];
+
+                    // NB: Probably this fixes all other "missing host ctor"
+                    // problems.
+                    // TODO: Clean-up the old workarounds if it really is.
+                    GOrigin org {shape, node, port, std::move(ctor), origin.kind};
                    origins.insert(org);
                }


--- a/modules/gapi/src/compiler/gobjref.hpp
+++ b/modules/gapi/src/compiler/gobjref.hpp
@@ -16,15 +16,9 @@ namespace cv

 namespace gimpl
 {
-    // Union type for various user-defined type constructors (GArray<T>, GOpaque<T>, etc)
-    // FIXME: Replace construct-only API with a more generic one
-    //    (probably with bits of introspection)
-    // Not required for non-user-defined types (GMat, GScalar, etc)
-    using HostCtor = util::variant
-    < util::monostate
-    , detail::ConstructVec
-    , detail::ConstructOpaque
-    >;
+    // HostCtor was there, but then moved to public
+    // Redeclare here to avoid changing tons of code
+    using HostCtor = cv::detail::HostCtor;

    using ConstVal = util::variant
    < util::monostate

--- a/modules/gapi/src/compiler/gstreaming.cpp
+++ b/modules/gapi/src/compiler/gstreaming.cpp
@@ -69,6 +69,11 @@ bool cv::GStreamingCompiled::Priv::pull(cv::GRunArgsP &&outs)
    return m_exec->pull(std::move(outs));
 }

+bool cv::GStreamingCompiled::Priv::pull(cv::GOptRunArgsP &&outs)
+{
+    return m_exec->pull(std::move(outs));
+}
+
 bool cv::GStreamingCompiled::Priv::try_pull(cv::GRunArgsP &&outs)
 {
    return m_exec->try_pull(std::move(outs));
@@ -113,6 +118,7 @@ bool cv::GStreamingCompiled::pull(cv::GRunArgsP &&outs)

 std::tuple<bool, cv::GRunArgs> cv::GStreamingCompiled::pull()
 {
+    // FIXME: Why it is not @ priv??
    GRunArgs run_args;
    GRunArgsP outs;
    const auto& out_shapes = m_priv->outShapes();
@@ -144,6 +150,11 @@ std::tuple<bool, cv::GRunArgs> cv::GStreamingCompiled::pull()
    return std::make_tuple(is_over, run_args);
 }

+bool cv::GStreamingCompiled::pull(cv::GOptRunArgsP &&outs)
+{
+    return m_priv->pull(std::move(outs));
+}
+
 bool cv::GStreamingCompiled::try_pull(cv::GRunArgsP &&outs)
 {
    return m_priv->try_pull(std::move(outs));

--- a/modules/gapi/src/compiler/gstreaming_priv.hpp
+++ b/modules/gapi/src/compiler/gstreaming_priv.hpp
@@ -42,6 +42,7 @@ public:
    void setSource(GRunArgs &&args);
    void start();
    bool pull(cv::GRunArgsP &&outs);
+    bool pull(cv::GOptRunArgsP &&outs);
    bool try_pull(cv::GRunArgsP &&outs);
    void stop();


--- a/modules/gapi/src/compiler/passes/exec.cpp
+++ b/modules/gapi/src/compiler/passes/exec.cpp
@@ -20,6 +20,7 @@
 #include <opencv2/gapi/util/optional.hpp>  // util::optional
 #include "logger.hpp"    // GAPI_LOG

+#include "api/gbackend_priv.hpp" // for canMerge()
 #include "compiler/gmodel.hpp"
 #include "compiler/gislandmodel.hpp"
 #include "compiler/passes/passes.hpp"
@@ -54,11 +55,28 @@ namespace
        // Also check the cases backend can't handle
        // (e.x. GScalar connecting two fluid ops should split the graph)
        const GModel::ConstGraph g(src_graph);
+        if (g.metadata().contains<Desynchronized>()) {
+            // Fusion of a graph having a desynchronized path is
+            // definitely non-trivial
+            return false;
+        }
        const auto& active_backends = g.metadata().get<ActiveBackends>().backends;
-        return active_backends.size() == 1 &&
-                ade::util::all_of(g.nodes(), [&](ade::NodeHandle nh) {
-            return !g.metadata(nh).contains<Island>();
-        });
+        if (active_backends.size() != 1u) {
+            // More than 1 backend involved - non-trivial
+            return false;
+        }
+        const auto& has_island_tags = [&](ade::NodeHandle nh) {
+            return g.metadata(nh).contains<Island>();
+        };
+        if (ade::util::any_of(g.nodes(), has_island_tags)) {
+            // There are user-defined islands - non-trivial
+            return false;
+        }
+        if (active_backends.begin()->priv().controlsMerge()) {
+            // If the only backend controls Island Fusion on its own - non-trivial
+            return false;
+        }
+        return true;
    }

    void fuseTrivial(GIslandModel::Graph &g, const ade::Graph &src_graph)
@@ -125,9 +143,9 @@ namespace
    };

    bool canMerge(const GIslandModel::Graph &g,
-                  const ade::NodeHandle a_nh,
-                  const ade::NodeHandle /*slot_nh*/,
-                  const ade::NodeHandle b_nh,
+                  const ade::NodeHandle &a_nh,
+                  const ade::NodeHandle &slot_nh,
+                  const ade::NodeHandle &b_nh,
                  const MergeContext &ctx = MergeContext())
    {
        auto a_ptr = g.metadata(a_nh).get<FusedIsland>().object;
@@ -142,8 +160,8 @@ namespace
        // Islands which cause a cycle can't be merged as well
        // (since the flag is set, the procedure already tried to
        // merge these islands in the past)
-        if (ade::util::contains(ctx.cycle_causers, std::make_pair(a_ptr, b_ptr))||
-            ade::util::contains(ctx.cycle_causers, std::make_pair(b_ptr, a_ptr)))
+        if (   ade::util::contains(ctx.cycle_causers, std::make_pair(a_ptr, b_ptr))
+            || ade::util::contains(ctx.cycle_causers, std::make_pair(b_ptr, a_ptr)))
            return false;

        // There may be user-defined islands. Initially user-defined
@@ -163,7 +181,13 @@ namespace
                return false;
        }

-        // FIXME: add a backend-specified merge checker
+        // If available, run the backend-specified merge checker
+        const auto &this_backend_p = a_ptr->backend().priv();
+        if (    this_backend_p.controlsMerge()
+            && !this_backend_p.allowsMerge(g, a_nh, slot_nh, b_nh))
+        {
+            return false;
+        }
        return true;
    }

@@ -205,10 +229,31 @@ namespace
    {
        using namespace std::placeholders;

+        // Before checking for candidates, find and ban neighbor nodes
+        // (input or outputs) which are connected via desynchronized
+        // edges.
+        GIsland::node_set nodes_with_desync_edges;
+        for (const auto& in_eh : nh->inEdges()) {
+            if (g.metadata(in_eh).contains<DesyncIslEdge>()) {
+                nodes_with_desync_edges.insert(in_eh->srcNode());
+            }
+        }
+        for (const auto& output_data_nh : nh->outNodes()) {
+            for (const auto &out_reader_eh : output_data_nh->outEdges()) {
+                if (g.metadata(out_reader_eh).contains<DesyncIslEdge>()) {
+                    nodes_with_desync_edges.insert(out_reader_eh->dstNode());
+                }
+            }
+        }
+
        // Find a first matching candidate GIsland for merge
        // among inputs
-        for (const auto& input_data_nh : nh->inNodes())
+        for (const auto& in_eh : nh->inEdges())
        {
+            if (ade::util::contains(nodes_with_desync_edges, in_eh->srcNode())) {
+                continue; // desync edges can never be fused
+            }
+            const auto& input_data_nh = in_eh->srcNode();
            if (input_data_nh->inNodes().size() != 0)
            {
                // Data node must have a single producer only
@@ -224,14 +269,17 @@ namespace
        // Ok, now try to find it among the outputs
        for (const auto& output_data_nh : nh->outNodes())
        {
-            auto mergeTest = [&](ade::NodeHandle cons_nh) -> bool {
-                return canMerge(g, nh, output_data_nh, cons_nh, ctx);
+            auto mergeTest = [&](ade::EdgeHandle cons_eh) -> bool {
+                if (ade::util::contains(nodes_with_desync_edges, cons_eh->dstNode())) {
+                    return false;  // desync edges can never be fused
+                }
+                return canMerge(g, nh, output_data_nh, cons_eh->dstNode(), ctx);
            };
-            auto cand_it = std::find_if(output_data_nh->outNodes().begin(),
-                                        output_data_nh->outNodes().end(),
+            auto cand_it = std::find_if(output_data_nh->outEdges().begin(),
+                                        output_data_nh->outEdges().end(),
                                        mergeTest);
-            if (cand_it != output_data_nh->outNodes().end())
-                return std::make_tuple(*cand_it,
+            if (cand_it != output_data_nh->outEdges().end())
+                return std::make_tuple((*cand_it)->dstNode(),
                                       output_data_nh,
                                       Direction::Out);
        } // for(outNodes)
@@ -251,6 +299,7 @@ namespace
        ade::NodeHandle m_slot;
        ade::NodeHandle m_cons;

+        using Change = ChangeT<DesyncIslEdge>;
        Change::List m_changes;

        struct MergeObjects
@@ -423,10 +472,10 @@ namespace
        auto backend = m_gim.metadata(m_prod).get<FusedIsland>()
            .object->backend();
        auto merged = std::make_shared<GIsland>(backend,
-                                                           std::move(mo.all),
-                                                           std::move(mo.in_ops),
-                                                           std::move(mo.out_ops),
-                                                           std::move(maybe_user_tag));
+                                                std::move(mo.all),
+                                                std::move(mo.in_ops),
+                                                std::move(mo.out_ops),
+                                                std::move(maybe_user_tag));
        // FIXME: move this debugging to some user-controllable log-level
 #ifdef DEBUG_MERGE
        merged->debug();
@@ -440,7 +489,9 @@ namespace
                                                 m_prod->inEdges().end());
        for (auto in_edge : input_edges)
        {
-            m_changes.enqueue<Change::NewLink>(m_g, in_edge->srcNode(), new_nh);
+            // FIXME: Introduce a Relink primitive instead?
+            // (combining the both actions into one?)
+            m_changes.enqueue<Change::NewLink>(m_g, in_edge->srcNode(), new_nh, in_edge);
            m_changes.enqueue<Change::DropLink>(m_g, m_prod, in_edge);
        }

@@ -450,7 +501,7 @@ namespace
                                                  m_cons->outEdges().end());
        for (auto out_edge : output_edges)
        {
-            m_changes.enqueue<Change::NewLink>(m_g, new_nh, out_edge->dstNode());
+            m_changes.enqueue<Change::NewLink>(m_g, new_nh, out_edge->dstNode(), out_edge);
            m_changes.enqueue<Change::DropLink>(m_g, m_cons, out_edge);
        }

@@ -491,6 +542,10 @@ namespace
                    m_changes.enqueue<Change::DropLink>(m_g, non_opt_slot_nh, eh);
                }
            }
+            // FIXME: No metadata copied here (from where??)
+            // For DesyncIslEdges it still works, as these tags are
+            // placed to Data->Op edges and this one is an Op->Data
+            // edge.
            m_changes.enqueue<Change::NewLink>(m_g, new_nh, non_opt_slot_nh);
        }

@@ -502,7 +557,7 @@ namespace
             m_prod->outEdges().end());
        for (auto extra_out : prod_extra_out_edges)
        {
-            m_changes.enqueue<Change::NewLink>(m_g, new_nh, extra_out->dstNode());
+            m_changes.enqueue<Change::NewLink>(m_g, new_nh, extra_out->dstNode(), extra_out);
            m_changes.enqueue<Change::DropLink>(m_g, m_prod, extra_out);
        }

@@ -514,7 +569,7 @@ namespace
             m_cons->inEdges().end());
        for (auto extra_in : cons_extra_in_edges)
        {
-            m_changes.enqueue<Change::NewLink>(m_g, extra_in->srcNode(), new_nh);
+            m_changes.enqueue<Change::NewLink>(m_g, extra_in->srcNode(), new_nh, extra_in);
            m_changes.enqueue<Change::DropLink>(m_g, m_cons, extra_in);
        }

@@ -557,10 +612,10 @@ namespace
            there_was_a_merge = false;

            // FIXME: move this debugging to some user-controllable log level
-    #ifdef DEBUG_MERGE
+#ifdef DEBUG_MERGE
            GAPI_LOG_INFO(NULL, "Before next merge attempt " << iteration << "...");
            merge_debug(g, iteration);
-    #endif
+#endif
            iteration++;
            auto sorted = pass_helpers::topoSort(im);
            for (auto nh : sorted)
@@ -600,9 +655,9 @@ namespace
                                          "merge(" << l_obj->name() << "," << r_obj->name() <<
                                          ") was successful!");
                            action.commit();
-    #ifdef DEBUG_MERGE
+#ifdef DEBUG_MERGE
                            GIslandModel::syncIslandTags(gim, g);
-    #endif
+#endif
                            there_was_a_merge = true;
                            break; // start do{}while from the beginning
                        }

--- a/modules/gapi/src/compiler/passes/intrin.cpp
+++ b/modules/gapi/src/compiler/passes/intrin.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+
+#include "precomp.hpp"
+
+#include <ade/util/algorithm.hpp>
+#include <ade/util/zip_range.hpp>
+#include <opencv2/gapi/streaming/desync.hpp>// GDesync intrinsic
+
+#include "compiler/gmodel.hpp"
+#include "compiler/passes/passes.hpp"
+
+namespace desync {
+namespace {
+
+// Drop the desynchronized node `nh` from the graph, reconnect the
+// graph structure properly.  This is a helper function which is used
+// in both drop(g) and apply(g) passes.
+//
+// @return a vector of new edge handles connecting the "main" graph
+// with its desynchronized part.
+std::vector<ade::EdgeHandle> drop(cv::gimpl::GModel::Graph &g,
+                                  ade::NodeHandle nh) {
+    using namespace cv::gimpl;
+
+    // What we need to do here:
+    // 1. Connect the readers of its produced data objects
+    //    to the input data objects of desync;
+    // 2. Drop the data object it produces.
+    // 3. Drop the desync operation itself;
+    std::vector<ade::NodeHandle> in_data_objs = GModel::orderedInputs(g, nh);
+    std::vector<ade::NodeHandle> out_data_objs = GModel::orderedOutputs(g, nh);
+    std::vector<ade::EdgeHandle> new_links;
+    GAPI_Assert(in_data_objs.size() == out_data_objs.size());
+    GAPI_DbgAssert(ade::util::all_of
+                   (out_data_objs,
+                    [&](const ade::NodeHandle &oh) {
+                       return g.metadata(oh).contains<Data>();
+                   }));
+    // (1)
+    for (auto &&it: ade::util::zip(ade::util::toRange(in_data_objs),
+                                   ade::util::toRange(out_data_objs))) {
+        auto these_new_links = GModel::redirectReaders(g,
+                                                       std::get<1>(it),
+                                                       std::get<0>(it));
+        new_links.insert(new_links.end(),
+                         these_new_links.begin(),
+                         these_new_links.end());
+    }
+    // (2)
+    for (auto &&old_out_nh : out_data_objs) {
+        g.erase(old_out_nh);
+    }
+    // (3)
+    g.erase(nh);
+
+    return new_links;
+}
+
+// Tracing a desynchronizing subgraph is somewhat tricky and happens
+// in both directions: downwards and upwards.
+//
+// The downward process is the basic one: we start with a "desync"
+// OP node and go down to the graph using the "output" edges. We check
+// if all nodes on this path [can] belong to this desynchronized path
+// and don't overlap with others.
+//
+// An important contract to maintain is that the desynchronized part
+// can't have any input references from the "main" graph part or any
+// other desynchronized part in the graph. This contract is validated
+// by checking every node's input which must belong to the same
+// desynchronized part.
+//
+// Here is the pitfall of this check:
+//
+//       v
+//     GMat_0
+//       v
+//   +----------+
+//   | desync() |      <- This point originates the traceDown process
+//   +----------+
+//       v
+//     GMat_0'         <- This node will be tagged for this desync at
+//       :--------.       step 0/1
+//       v        :    <- The order how output nodes are visited is not
+//   +----------+ :       specified, we can visit Op2() first (as there
+//   | Op1()    | :       is a direct link) bypassing visiting and tagging
+//   +----------+ :       Op1() and GMat_1
+//       v        :
+//     GMat_1     :
+//       :    .---'
+//       v    v        <- When we visit Op2() via the 2nd edge on this
+//   +----------+         graph, we check if all inputs belong to the same
+//   | Op2()    |         desynchronized graph and GMat_1 fails this check
+//   +----------+         (since the traceDown() process haven't visited
+//                        it yet).
+//
+// Cases like this originate the traceUp() process: if we find an
+// input node in our desynchronized path which doesn't belong to this
+// path YET, it is not 100% a problem, and we need to trace it back
+// (upwards) to see if it is really a case.
+
+// This recursive function checks the desync_id in the graph upwards.
+// The process doesn't continue for nodes which have a valid
+// desync_id already.
+// The process only continues for nodes which have no desync_id
+// assigned. If there's no such nodes anymore, the procedure is
+// considered complete and a list of nodes to tag is returned to the
+// caller.
+//
+// If NO inputs of this node have a valid desync_id, the desync
+// invariant is broken and the function throws.
+void traceUp(cv::gimpl::GModel::Graph &g,
+             const ade::NodeHandle &nh,
+             int desync_id,
+             std::vector<ade::NodeHandle> &path) {
+    using namespace cv::gimpl;
+
+    GAPI_Assert(!nh->inNodes().empty()
+                && "traceUp: a desynchronized part of the graph is not isolated?");
+
+    if (g.metadata(nh).contains<DesyncPath>()) {
+        // We may face nodes which have DesyncPath already visited during
+        // this recursive process (e.g. via some other output or branch in the
+        // subgraph)
+        if (g.metadata(nh).get<DesyncPath>().index != desync_id) {
+            GAPI_Assert(false && "Desynchronization can't be nested!");
+        }
+        return; // This object belongs to the desync path - exit early.
+    }
+
+    // Regardless of the result, put this nh to the path
+    path.push_back(nh);
+
+    // Check if the input nodes are OK
+    std::vector<ade::NodeHandle> nodes_to_trace;
+    nodes_to_trace.reserve(nh->inNodes().size());
+    for (auto &&in_nh : nh->inNodes()) {
+        if (g.metadata(in_nh).contains<DesyncPath>()) {
+            // We may face nodes which have DesyncPath already visited during
+            // this recursive process (e.g. via some other output or branch in the
+            // subgraph)
+            GAPI_Assert(g.metadata(in_nh).get<DesyncPath>().index == desync_id
+                        && "Desynchronization can't be nested!");
+        } else {
+            nodes_to_trace.push_back(in_nh);
+        }
+    }
+
+    // If there are nodes to trace, continue the recursion
+    for (auto &&up_nh : nodes_to_trace) {
+        traceUp(g, up_nh, desync_id, path);
+    }
+}
+
+// This recursive function propagates the desync_id down to the graph
+// starting at nh, and also checks:
+// - if this desync path is isolated;
+// - if this desync path is not overlapped.
+// It also originates the traceUp() process at the points of
+// uncertainty (as described in the comment above).
+void traceDown(cv::gimpl::GModel::Graph &g,
+               const ade::NodeHandle &nh,
+               int desync_id) {
+    using namespace cv::gimpl;
+
+    if (g.metadata(nh).contains<DesyncPath>()) {
+        // We may face nodes which have DesyncPath already visited during
+        // this recursive process (e.g. via some other output or branch in the
+        // subgraph)
+        GAPI_Assert(g.metadata(nh).get<DesyncPath>().index == desync_id
+                    && "Desynchronization can't be nested!");
+    } else {
+        g.metadata(nh).set(DesyncPath{desync_id});
+    }
+
+    // All inputs of this data object must belong to the same
+    // desync path.
+    for (auto &&in_nh : nh->inNodes()) {
+        // If an input object is not assigned to this desync path,
+        // it does not means that the object doesn't belong to
+        // this path. Check it.
+        std::vector<ade::NodeHandle> path_up;
+        traceUp(g, in_nh, desync_id, path_up);
+        // We get here on success. Just set the proper tags for
+        // the identified input path.
+        for (auto &&up_nh : path_up) {
+            g.metadata(up_nh).set(DesyncPath{desync_id});
+        }
+    }
+
+    // Propagate the tag & check down
+    for (auto &&out_nh : nh->outNodes()) {
+        traceDown(g, out_nh, desync_id);
+    }
+}
+
+// Streaming case: ensure the graph has proper isolation of the
+// desynchronized parts, set proper Edge metadata hints for
+// GStreamingExecutable
+void apply(cv::gimpl::GModel::Graph &g) {
+    using namespace cv::gimpl;
+
+    // Stage 0. Trace down the desync operations in the graph.
+    // Tag them with their unique (per graph) identifiers.
+    int total_desync = 0;
+    for (auto &&nh : g.nodes()) {
+        if (g.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = g.metadata(nh).get<Op>();
+            if (op.k.name == cv::gapi::streaming::detail::GDesync::id()) {
+                GAPI_Assert(!g.metadata(nh).contains<DesyncPath>()
+                            && "Desynchronization can't be nested!");
+                const int this_desync_id = total_desync++;
+                g.metadata(nh).set(DesyncPath{this_desync_id});
+                for (auto &&out_nh: nh->outNodes()) {
+                    traceDown(g, out_nh, this_desync_id);
+                }
+            } // if (desync)
+        } // if(OP)
+    } // for(nodes)
+
+    // Tracing is done for all desync ops in the graph now.
+    // Stage 1. Drop the desync operations from the graph, but mark
+    // the desynchronized edges a special way.
+    // The desynchronized edge is the edge which connects a main
+    // subgraph data with a desynchronized subgraph data.
+    std::vector<ade::NodeHandle> nodes(g.nodes().begin(), g.nodes().end());
+    for (auto &&nh : nodes) {
+        if (nh == nullptr) {
+            // Some nodes could be dropped already during the procedure
+            // thanks ADE their NodeHandles updated automatically
+            continue;
+        }
+        if (g.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = g.metadata(nh).get<Op>();
+            if (op.k.name == cv::gapi::streaming::detail::GDesync::id()) {
+                auto index = g.metadata(nh).get<DesyncPath>().index;
+                auto new_links = drop(g, nh);
+                for (auto &&eh : new_links) {
+                    g.metadata(eh).set(DesyncEdge{index});
+                }
+            } // if (desync)
+        } // if (Op)
+    } // for(nodes)
+
+    // Stage 2. Put a synchronized tag if there were changes applied
+    if (total_desync > 0) {
+        g.metadata().set(Desynchronized{});
+    }
+}
+
+// Probably the simplest case: desync makes no sense in the regular
+// compilation process, so just drop all its occurences in the graph,
+// reconnecting nodes properly.
+void drop(cv::gimpl::GModel::Graph &g) {
+    // FIXME: LOG here that we're dropping the desync operations as
+    // they have no sense when compiling in the regular mode.
+    using namespace cv::gimpl;
+    std::vector<ade::NodeHandle> nodes(g.nodes().begin(), g.nodes().end());
+    for (auto &&nh : nodes) {
+        if (nh == nullptr) {
+            // Some nodes could be dropped already during the procedure
+            // thanks ADE their NodeHandles updated automatically
+            continue;
+        }
+        if (g.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = g.metadata(nh).get<Op>();
+            if (op.k.name == cv::gapi::streaming::detail::GDesync::id()) {
+                drop(g, nh);
+            } // if (desync)
+        } // if (Op)
+    } // for(nodes)
+}
+
+} // anonymous namespace
+} // namespace desync
+
+void cv::gimpl::passes::intrinDesync(ade::passes::PassContext &ctx) {
+    GModel::Graph gr(ctx.graph);
+    if (!gr.metadata().contains<HasIntrinsics>())
+        return;
+
+    gr.metadata().contains<Streaming>()
+        ? desync::apply(gr) // Streaming compilation
+        : desync::drop(gr); // Regular compilation
+}
+
+// Clears the HasIntrinsics flag if all intrinsics have been handled.
+void cv::gimpl::passes::intrinFinalize(ade::passes::PassContext &ctx) {
+    GModel::Graph gr(ctx.graph);
+    for (auto &&nh : gr.nodes()) {
+        if (gr.metadata(nh).get<NodeType>().t == NodeType::OP) {
+            const auto &op = gr.metadata(nh).get<Op>();
+            if (is_intrinsic(op.k.name)) {
+                return;
+            }
+        }
+    }
+    // If reached here, really clear the flag
+    gr.metadata().erase<HasIntrinsics>();
+}
--- a/modules/gapi/src/compiler/passes/kernels.cpp
+++ b/modules/gapi/src/compiler/passes/kernels.cpp
@@ -14,6 +14,7 @@
 #include <opencv2/gapi/gcompoundkernel.hpp> // compound::backend()
 #include <opencv2/gapi/gkernel.hpp>         // GKernelPackage
 #include <opencv2/gapi/infer.hpp>           // GNetPackage
+#include <opencv2/gapi/streaming/desync.hpp>// GDesync intrinsic

 #include "compiler/gmodel.hpp"
 #include "compiler/passes/passes.hpp"
@@ -24,6 +25,20 @@
 #include "logger.hpp"    // GAPI_LOG
 #include "api/gproto_priv.hpp" // is_dynamic, rewrap

+namespace
+{
+    // FIXME: This may be not the right design choice, but so far it works
+    const std::vector<std::string> known_intrinsics = {
+        cv::gapi::streaming::detail::GDesync::id()
+    };
+}
+bool cv::gimpl::is_intrinsic(const std::string &s) {
+    // FIXME: This search might be better in time once we start using string
+    return std::find(known_intrinsics.begin(),
+                     known_intrinsics.end(),
+                     s) != known_intrinsics.end();
+}
+
 namespace
 {
    struct ImplInfo
@@ -130,8 +145,13 @@ void cv::gimpl::passes::bindNetParams(ade::passes::PassContext &ctx,
    }
 }

-// This pass, given the kernel package, selects a kernel implementation
-// for every operation in the graph
+// This pass, given the kernel package, selects a kernel
+// implementation for every operation in the graph
+//
+// Starting OpenCV 4.3, G-API may have some special "intrinsic"
+// operations.  Those can be implemented by backends as regular
+// kernels, but if not, they are handled by the framework itself in
+// its optimization/execution passes.
 void cv::gimpl::passes::resolveKernels(ade::passes::PassContext   &ctx,
                                       const gapi::GKernelPackage &kernels)
 {
@@ -142,7 +162,25 @@ void cv::gimpl::passes::resolveKernels(ade::passes::PassContext   &ctx,
    {
        if (gr.metadata(nh).get<NodeType>().t == NodeType::OP)
        {
+            // If the operation is known to be intrinsic and is NOT
+            // implemented in the package, just skip it - there should
+            // be some pass which handles it.
            auto &op = gr.metadata(nh).get<Op>();
+            if (is_intrinsic(op.k.name) && !kernels.includesAPI(op.k.name)) {
+                gr.metadata().set(HasIntrinsics{});
+                continue;
+            }
+            // FIXME: And this logic is terribly wrong. The right
+            // thing is to assign an intrinsic to a particular island
+            // if and only if it is:
+            // (a) surrounded by nodes of backend X, AND
+            // (b) is supported by backend X.
+            // Here we may have multiple backends supporting an
+            // intrinsic but only one of those gets selected.  And
+            // this is exactly a situation we need multiple versions
+            // of the same kernel to be presented in the kernel
+            // package (as it was designed originally).
+
            cv::gapi::GBackend selected_backend;
            cv::GKernelImpl    selected_impl;
            std::tie(selected_backend, selected_impl) = kernels.lookup(op.k.name);
@@ -181,6 +219,12 @@ void cv::gimpl::passes::expandKernels(ade::passes::PassContext &ctx, const gapi:
            if (gr.metadata(nh).get<NodeType>().t == NodeType::OP)
            {
                const auto& op = gr.metadata(nh).get<Op>();
+                // FIXME: Essentially the same problem as in the above resolveKernels
+                if (is_intrinsic(op.k.name) && !kernels.includesAPI(op.k.name)) {
+                    // Note: There's no need to set HasIntrinsics flag here
+                    // since resolveKernels would do it later.
+                    continue;
+                }

                cv::gapi::GBackend selected_backend;
                cv::GKernelImpl    selected_impl;

--- a/modules/gapi/src/compiler/passes/passes.hpp
+++ b/modules/gapi/src/compiler/passes/passes.hpp
@@ -31,7 +31,11 @@ namespace gapi {
    struct GNetPackage;
 }  // namespace gapi

-namespace gimpl { namespace passes {
+namespace gimpl {
+
+bool is_intrinsic(const std::string &op_name);
+
+namespace passes {

 void dumpDot(const ade::Graph &g, std::ostream& os);
 void dumpDot(ade::passes::PassContext &ctx, std::ostream& os);
@@ -66,6 +70,9 @@ void applyTransformations(ade::passes::PassContext &ctx,

 void addStreaming(ade::passes::PassContext &ctx);

+void intrinDesync(ade::passes::PassContext &ctx);
+void intrinFinalize(ade::passes::PassContext &ctx);
+
 }} // namespace gimpl::passes

 } // namespace cv

--- a/modules/gapi/src/compiler/transactions.hpp
+++ b/modules/gapi/src/compiler/transactions.hpp
@@ -14,6 +14,7 @@

 #include <ade/graph.hpp>

+#include "opencv2/gapi/util/util.hpp" // Seq
 #include "opencv2/gapi/own/assert.hpp"

 enum class Direction: int {Invalid, In, Out};
@@ -21,8 +22,50 @@ enum class Direction: int {Invalid, In, Out};
 ////////////////////////////////////////////////////////////////////////////
 ////
 // TODO: Probably it can be moved to ADE
-
-namespace Change
+template<class H, class... Metatypes>
+class Preserved
+{
+    using S = typename cv::detail::MkSeq<sizeof...(Metatypes)>::type;
+    std::tuple<cv::util::optional<Metatypes>...> m_data;
+
+    template<class T>
+    cv::util::optional<T> get(ade::ConstTypedGraph<Metatypes...> g, H h) {
+        return g.metadata(h).template contains<T>()
+            ? cv::util::make_optional(g.metadata(h).template get<T>())
+            : cv::util::optional<T>{};
+    }
+    template<std::size_t Id>
+    int set(ade::TypedGraph<Metatypes...> &g, H &h) {
+        const auto &opt = std::get<Id>(m_data);
+        if (opt.has_value())
+            g.metadata(h).set(opt.value());
+        return 0;
+    }
+    template<int... IIs>
+    void copyTo_impl(ade::TypedGraph<Metatypes...> &g, H h, cv::detail::Seq<IIs...>) {
+        int unused[] = {0, set<IIs>(g, h)...};
+        (void) unused;
+    }
+public:
+    Preserved(const ade::Graph &g, H h) {
+        ade::ConstTypedGraph<Metatypes...> tg(g);
+        m_data = std::make_tuple(get<Metatypes>(tg, h)...);
+    }
+    void copyTo(ade::Graph &g, H h) {
+        ade::TypedGraph<Metatypes...> tg(g);
+        copyTo_impl(tg, h, S{});
+    }
+};
+// Do nothing if there's no metadata
+template<class H>
+class Preserved<H> {
+public:
+    Preserved(const ade::Graph &, H) {}
+    void copyTo(ade::Graph &, H) {}
+};
+
+template<class... Metatypes>
+struct ChangeT
 {
    struct Base
    {
@@ -31,6 +74,8 @@ namespace Change
        virtual ~Base() = default;
    };

+    template<typename H> using Preserved = ::Preserved<H, Metatypes...>;
+
    class NodeCreated final: public Base
    {
        ade::NodeHandle m_node;
@@ -39,11 +84,7 @@ namespace Change
        virtual void rollback(ade::Graph &g) override { g.erase(m_node); }
    };

-    // NB: Drops all metadata stored in the EdgeHandle,
-    // which is not restored even in the rollback
-
-    // FIXME: either add a way for users to preserve meta manually
-    // or extend ADE to manipulate with meta such way
+    // FIXME: maybe extend ADE to clone/copy the whole metadata?
    class DropLink final: public Base
    {
        ade::NodeHandle m_node;
@@ -51,13 +92,15 @@ namespace Change

        ade::NodeHandle m_sibling;

+        Preserved<ade::EdgeHandle> m_meta;
+
    public:
        DropLink(ade::Graph &g,
                 const ade::NodeHandle &node,
                 const ade::EdgeHandle &edge)
-            : m_node(node), m_dir(node == edge->srcNode()
-                                  ? Direction::Out
-                                  : Direction::In)
+            : m_node(node)
+            , m_dir(node == edge->srcNode() ? Direction::Out : Direction::In)
+            , m_meta(g, edge)
        {
            m_sibling = (m_dir == Direction::In
                         ? edge->srcNode()
@@ -67,12 +110,17 @@ namespace Change

        virtual void rollback(ade::Graph &g) override
        {
+            // FIXME: Need to preserve metadata here!
+            // GIslandModel edges now have metadata
+            ade::EdgeHandle eh;
            switch(m_dir)
            {
-            case Direction::In:  g.link(m_sibling, m_node); break;
-            case Direction::Out: g.link(m_node, m_sibling); break;
+            case Direction::In:  eh = g.link(m_sibling, m_node); break;
+            case Direction::Out: eh = g.link(m_node, m_sibling); break;
            default: GAPI_Assert(false);
            }
+            GAPI_Assert(eh != nullptr);
+            m_meta.copyTo(g, eh);
        }
    };

@@ -82,10 +130,15 @@ namespace Change

    public:
        NewLink(ade::Graph &g,
-                  const ade::NodeHandle &prod,
-                  const ade::NodeHandle &cons)
+                const ade::NodeHandle &prod,
+                const ade::NodeHandle &cons,
+                const ade::EdgeHandle &copy_from = ade::EdgeHandle())
            : m_edge(g.link(prod, cons))
        {
+            if (copy_from != nullptr)
+            {
+                Preserved<ade::EdgeHandle>(g, copy_from).copyTo(g, m_edge);
+            }
        }

        virtual void rollback(ade::Graph &g) override
@@ -141,7 +194,7 @@ namespace Change
            }
        }
    };
-} // namespace Change
+}; // struct Change
 ////////////////////////////////////////////////////////////////////////////

 #endif // OPENCV_GAPI_COMPILER_TRANSACTIONS_HPP
--- a/modules/gapi/src/executor/conc_queue.hpp
+++ b/modules/gapi/src/executor/conc_queue.hpp
@@ -119,8 +119,7 @@ void concurrent_bounded_queue<T>::set_capacity(std::size_t capacity) {
 // Clear the queue. Similar to the TBB version, this method is not
 // thread-safe.
 template<typename T>
-void concurrent_bounded_queue<T>::clear()
-{
+void concurrent_bounded_queue<T>::clear() {
    m_data = std::queue<T>{};
 }


--- a/modules/gapi/src/executor/gstreamingexecutor.cpp
+++ b/modules/gapi/src/executor/gstreamingexecutor.cpp
--- a/modules/gapi/src/executor/gstreamingexecutor.hpp
+++ b/modules/gapi/src/executor/gstreamingexecutor.hpp
@@ -14,6 +14,8 @@

 #include <memory> // unique_ptr, shared_ptr
 #include <thread> // thread
+#include <vector>
+#include <unordered_map>

 #if defined(HAVE_TBB)
 #  include <tbb/concurrent_queue.h> // FIXME: drop it from here!
@@ -22,6 +24,7 @@ template<typename T> using QueueClass = tbb::concurrent_bounded_queue<T>;
 #  include "executor/conc_queue.hpp"
 template<typename T> using QueueClass = cv::gapi::own::concurrent_bounded_queue<T>;
 #endif // TBB
+#include "executor/last_value.hpp"

 #include <ade/graph.hpp>

@@ -40,14 +43,61 @@ struct Stop {
    cv::GRunArg cdata; // const data for CNST stop
 };

+struct Result {
+    cv::GRunArgs      args;  // Full results vector
+    std::vector<bool> flags; // Availability flags (in case of desync)
+};
+
 using Cmd = cv::util::variant
    < cv::util::monostate
    , Start        // Tells emitters to start working. Not broadcasted to workers.
    , Stop         // Tells emitters to stop working. Broadcasted to workers.
    , cv::GRunArg  // Workers data payload to process.
-    , cv::GRunArgs // Full results vector
+    , Result       // Pipeline's data for gout()
    >;
-using Q = QueueClass<Cmd>;
+
+// Interface over a queue. The underlying queue implementation may be
+// different. This class is mainly introduced to bring some
+// abstraction over the real queues (bounded in-order) and a
+// desynchronized data slots (see required to implement
+// cv::gapi::desync)
+
+class Q {
+public:
+    virtual void push(const Cmd &cmd) = 0;
+    virtual void pop(Cmd &cmd) = 0;
+    virtual bool try_pop(Cmd &cmd) = 0;
+    virtual void clear() = 0;
+    virtual ~Q() = default;
+};
+
+// A regular queue implementation
+class SyncQueue final: public Q {
+    QueueClass<Cmd> m_q;    // FIXME: OWN or WRAP??
+
+public:
+    virtual void push(const Cmd &cmd) override { m_q.push(cmd); }
+    virtual void pop(Cmd &cmd)        override { m_q.pop(cmd);  }
+    virtual bool try_pop(Cmd &cmd)    override { return m_q.try_pop(cmd); }
+    virtual void clear()              override { m_q.clear(); }
+
+    void set_capacity(std::size_t c) { m_q.set_capacity(c);}
+};
+
+// Desynchronized "queue" implementation
+// Every push overwrites value which is not yet popped
+// This container can hold 0 or 1 element
+// Special handling for Stop is implemented (FIXME: not really)
+class DesyncQueue final: public Q {
+    cv::gapi::own::last_written_value<Cmd> m_v;
+
+public:
+    virtual void push(const Cmd &cmd) override { m_v.push(cmd); }
+    virtual void pop(Cmd &cmd)        override { m_v.pop(cmd);  }
+    virtual bool try_pop(Cmd &cmd)    override { return m_v.try_pop(cmd); }
+    virtual void clear()              override { m_v.clear(); }
+};
+
 } // namespace stream

 // FIXME: Currently all GExecutor comments apply also
@@ -87,6 +137,7 @@ protected:
    util::optional<bool> m_reshapable;

    cv::gimpl::GIslandModel::Graph m_gim; // FIXME: make const?
+    const bool m_desync;

    // FIXME: Naive executor details are here for now
    // but then it should be moved to another place
@@ -117,11 +168,27 @@ protected:
    std::vector<ade::NodeHandle> m_sinks;

    std::vector<std::thread> m_threads;
-    std::vector<stream::Q>   m_emitter_queues;
-    std::vector<stream::Q*>  m_const_emitter_queues; // a view over m_emitter_queues
-    std::vector<stream::Q*>  m_sink_queues;
-    std::unordered_set<stream::Q*> m_internal_queues;
-    stream::Q m_out_queue;
+    std::vector<stream::SyncQueue>   m_emitter_queues;
+
+    // a view over m_emitter_queues
+    std::vector<stream::SyncQueue*>  m_const_emitter_queues;
+
+    std::vector<stream::Q*>          m_sink_queues;
+
+    // desync path tags for outputs. -1 means that output
+    // doesn't belong to a desync path
+    std::vector<int>                 m_sink_sync;
+
+    std::unordered_set<stream::Q*>   m_internal_queues;
+    stream::SyncQueue m_out_queue;
+
+    // Describes mapping from desync paths to collector threads
+    struct CollectorThreadInfo {
+        std::vector<stream::Q*>  queues;
+        std::vector<int> mapping;
+    };
+    std::unordered_map<int, CollectorThreadInfo> m_collector_map;
+

    void wait_shutdown();

@@ -132,6 +199,7 @@ public:
    void setSource(GRunArgs &&args);
    void start();
    bool pull(cv::GRunArgsP &&outs);
+    bool pull(cv::GOptRunArgsP &&outs);
    bool try_pull(cv::GRunArgsP &&outs);
    void stop();
    bool running() const;

--- a/modules/gapi/src/executor/last_value.hpp
+++ b/modules/gapi/src/executor/last_value.hpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#ifndef OPENCV_GAPI_EXECUTOR_LAST_VALUE_HPP
+#define OPENCV_GAPI_EXECUTOR_LAST_VALUE_HPP
+
+#include <mutex>
+#include <condition_variable>
+
+#include <opencv2/gapi/util/optional.hpp>
+#include <opencv2/gapi/own/assert.hpp>
+
+namespace cv {
+namespace gapi {
+namespace own {
+
+// This class implements a "Last Written Value" thing.  Writer threads
+// (in our case, it is just one) can write as many values there as it
+// can.
+//
+// The reader thread gets only a value it gets at the time (or blocks
+// if there was no value written since the last read).
+//
+// Again, the implementation is highly inefficient right now.
+template<class T>
+class last_written_value {
+    cv::util::optional<T> m_data;
+
+    std::mutex m_mutex;
+    std::condition_variable m_cond_empty;
+
+    void unsafe_pop(T &t);
+
+public:
+    last_written_value() {}
+    last_written_value(const last_written_value<T> &cc)
+        : m_data(cc.m_data) {
+        // FIXME: what to do with all that locks, etc?
+    }
+    last_written_value(last_written_value<T> &&cc)
+        : m_data(std::move(cc.m_data)) {
+        // FIXME: what to do with all that locks, etc?
+    }
+
+    // FIXME: && versions
+    void push(const T &t);
+    void pop(T &t);
+    bool try_pop(T &t);
+
+    // Not thread-safe
+    void clear();
+};
+
+// Internal: do shared pop things assuming the lock is already there
+template<typename T>
+void last_written_value<T>::unsafe_pop(T &t) {
+    GAPI_Assert(m_data.has_value());
+    t = std::move(m_data.value());
+    m_data.reset();
+}
+
+// Push an element to the queue. Blocking if there's no space left
+template<typename T>
+void last_written_value<T>::push(const T& t) {
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_data = cv::util::make_optional(t);
+    lock.unlock();
+    m_cond_empty.notify_one();
+}
+
+// Pop an element from the queue. Blocking if there's no items
+template<typename T>
+void last_written_value<T>::pop(T &t) {
+    std::unique_lock<std::mutex> lock(m_mutex);
+    if (!m_data.has_value()) {
+        // if there is no data, wait
+        m_cond_empty.wait(lock, [&](){return m_data.has_value();});
+    }
+    unsafe_pop(t);
+}
+
+// Try pop an element from the queue. Returns false if queue is empty
+template<typename T>
+bool last_written_value<T>::try_pop(T &t) {
+    std::unique_lock<std::mutex> lock(m_mutex);
+    if (!m_data.has_value()) {
+        // if there is no data, return
+        return false;
+    }
+    unsafe_pop(t);
+    return true;
+}
+
+// Clear the value holder. This method is not thread-safe.
+template<typename T>
+void last_written_value<T>::clear() {
+    m_data.reset();
+}
+
+}}} // namespace cv::gapi::own
+
+#endif //  OPENCV_GAPI_EXECUTOR_CONC_QUEUE_HPP
--- a/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
+++ b/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2020 Intel Corporation


 #include "../test_precomp.hpp"
@@ -29,7 +29,9 @@ namespace
                                    , ""
                                    , nullptr
                                    , { GShape::GMAT }
-                                    , { D::OpaqueKind::CV_UNKNOWN } }).pass(m).yield(0);
+                                    , { D::OpaqueKind::CV_UNKNOWN }
+                                    , { cv::detail::HostCtor{cv::util::monostate{}} }
+                                    }).pass(m).yield(0);
    }

    cv::GMat binaryOp(cv::GMat m1, cv::GMat m2)
@@ -38,7 +40,9 @@ namespace
                                    , ""
                                    , nullptr
                                    , { GShape::GMAT }
-                                    , { D::OpaqueKind::CV_UNKNOWN, D::OpaqueKind::CV_UNKNOWN } }).pass(m1, m2).yield(0);
+                                    , { D::OpaqueKind::CV_UNKNOWN, D::OpaqueKind::CV_UNKNOWN }
+                                    , { cv::detail::HostCtor{cv::util::monostate{}} }
+                                    }).pass(m1, m2).yield(0);
    }

    std::vector<ade::NodeHandle> collectOperations(const cv::gimpl::GModel::Graph& gr)

--- a/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
+++ b/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
@@ -513,7 +513,65 @@ TEST(IslandFusion, Regression_ShouldFuseAll)
    EXPECT_EQ(1u, isl_nhs.size());  // 1 island
 }

-// FIXME: add more tests on mixed (hetero) graphs
+TEST(IslandFusion, Test_Desync_NoFuse)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = in*0.5f;
+    cv::GMat tmp2 = tmp1 + in;
+
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat tmp4 = tmp3*0.1f;
+
+    const auto in_meta = cv::GMetaArg(cv::GMatDesc{CV_8U,1,cv::Size(32,32)});
+    cv::GComputation comp(cv::GIn(in), cv::GOut(tmp2, tmp4));
+
+    //////////////////////////////////////////////////////////////////
+    // Compile the graph in "regular" mode, it should produce a single island
+    {
+        using namespace cv::gimpl;
+
+        GCompiler compiler(comp, {in_meta}, cv::compile_args());
+        GCompiler::GPtr graph = compiler.generateGraph();
+        compiler.runPasses(*graph);
+
+        auto isl_model = GModel::ConstGraph(*graph).metadata()
+            .get<IslandModel>().model;
+        GIslandModel::ConstGraph gim(*isl_model);
+
+        const auto is_island = [&](ade::NodeHandle nh) {
+            return (NodeKind::ISLAND == gim.metadata(nh).get<NodeKind>().k);
+        };
+        const auto num_isl = std::count_if(gim.nodes().begin(),
+                                           gim.nodes().end(),
+                                           is_island);
+        EXPECT_EQ(1, num_isl);
+    }
+    //////////////////////////////////////////////////////////////////
+    // Now compile the graph in the streaming mode.
+    // It has to produce two islands
+    {
+        using namespace cv::gimpl;
+
+        GCompiler compiler(comp, {in_meta}, cv::compile_args());
+        GCompiler::GPtr graph = compiler.generateGraph();
+        GModel::Graph(*graph).metadata().set(Streaming{});
+        compiler.runPasses(*graph);
+
+        auto isl_model = GModel::ConstGraph(*graph).metadata()
+             .get<IslandModel>().model;
+        GIslandModel::ConstGraph gim(*isl_model);
+
+        const auto is_island = [&](ade::NodeHandle nh) {
+            return (NodeKind::ISLAND == gim.metadata(nh).get<NodeKind>().k);
+        };
+        const auto num_isl = std::count_if(gim.nodes().begin(),
+                                           gim.nodes().end(),
+                                           is_island);
+        EXPECT_EQ(2, num_isl);
+    }
+}
+
+// Fixme: add more tests on mixed (hetero) graphs
 // ADE-222, ADE-223

 // FIXME: add test on combination of user-specified island

--- a/modules/gapi/test/internal/gapi_transactions_test.cpp
+++ b/modules/gapi/test/internal/gapi_transactions_test.cpp
@@ -2,11 +2,14 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018 - 2020 Intel Corporation


 #include "../test_precomp.hpp"
+
 #include <ade/graph.hpp>
+#include <ade/typed_graph.hpp>
+
 #include "compiler/transactions.hpp"

 namespace opencv_test
@@ -33,10 +36,11 @@ struct SimpleGraph

    enum { node_nums = 5 };
    ade::Graph        graph;
-    ade::NodeHandle   fused_nh;                     /* For check that fusion  node is connected to the
-                                                               inputs of the prod and the outputs of the cons */
+    ade::NodeHandle   fused_nh;  // For check that fusion  node is connected to the
+                                 // inputs of the prod and the outputs of the cons
    std::array<ade::NodeHandle, node_nums>     nhs;
    std::array<ade::EdgeHandle, node_nums - 1> ehs;
+    using Change = ChangeT<>;
    Change::List changes;

    SimpleGraph()
@@ -192,8 +196,6 @@ TEST_F(Transactions, DropNode_Commit)

 TEST_F(Transactions, Fusion_Commit)
 {
-    namespace C = Change;
-
    fuse();
    commit();

@@ -204,8 +206,6 @@ TEST_F(Transactions, Fusion_Commit)

 TEST_F(Transactions, Fusion_RollBack)
 {
-    namespace C = Change;
-
    fuse();
    rollback();

@@ -219,4 +219,151 @@ TEST_F(Transactions, Fusion_RollBack)
    }
 }

+namespace
+{
+    struct MetaInt {
+        static const char *name() { return "int_meta"; }
+        int x;
+    };
+
+    struct MetaStr {
+        static const char *name() { return "string_meta"; }
+        std::string s;
+    };
+}
+
+TEST(PreservedMeta, TestMetaCopy_Full)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto src_nh = tg.createNode();
+    tg.metadata(src_nh).set(MetaInt{42});
+    tg.metadata(src_nh).set(MetaStr{"hi"});
+
+    auto dst_nh = tg.createNode();
+
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    // Here we specify all the meta types we know about the src node
+    // Assume Preserved copies its all for us
+    Preserved<ade::NodeHandle, MetaInt, MetaStr>(g, src_nh).copyTo(g, dst_nh);
+
+    ASSERT_TRUE(tg.metadata(dst_nh).contains<MetaInt>());
+    ASSERT_TRUE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    EXPECT_EQ(42,   tg.metadata(dst_nh).get<MetaInt>().x);
+    EXPECT_EQ("hi", tg.metadata(dst_nh).get<MetaStr>().s);
+}
+
+
+TEST(PreservedMeta, TestMetaCopy_Partial_Dst)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto tmp_nh1 = tg.createNode();
+    auto tmp_nh2 = tg.createNode();
+    auto src_eh  = tg.link(tmp_nh1, tmp_nh2);
+
+    tg.metadata(src_eh).set(MetaInt{42});
+    tg.metadata(src_eh).set(MetaStr{"hi"});
+
+    auto tmp_nh3 = tg.createNode();
+    auto tmp_nh4 = tg.createNode();
+    auto dst_eh  = tg.link(tmp_nh3, tmp_nh4);
+
+    EXPECT_FALSE(tg.metadata(dst_eh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_eh).contains<MetaStr>());
+
+    // Here we specify just a single meta type for the src node
+    // Assume Preserved copies only this type and nothing else
+    Preserved<ade::EdgeHandle, MetaStr>(g, src_eh).copyTo(g, dst_eh);
+
+    ASSERT_FALSE(tg.metadata(dst_eh).contains<MetaInt>());
+    ASSERT_TRUE (tg.metadata(dst_eh).contains<MetaStr>());
+
+    EXPECT_EQ("hi", tg.metadata(dst_eh).get<MetaStr>().s);
+}
+
+TEST(PreservedMeta, TestMetaCopy_Partial_Src)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto src_nh = tg.createNode();
+    tg.metadata(src_nh).set(MetaInt{42});
+
+    auto dst_nh = tg.createNode();
+
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    // Here we specify all the meta types we know about the src node
+    // but the src node has just one of them.
+    // A valid situation, only MetaInt to be copied.
+    Preserved<ade::NodeHandle, MetaInt, MetaStr>(g, src_nh).copyTo(g, dst_nh);
+
+    ASSERT_TRUE (tg.metadata(dst_nh).contains<MetaInt>());
+    ASSERT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    EXPECT_EQ(42, tg.metadata(dst_nh).get<MetaInt>().x);
+}
+
+TEST(PreservedMeta, TestMetaCopy_Nothing)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto src_nh = tg.createNode();
+    auto dst_nh = tg.createNode();
+
+    EXPECT_FALSE(tg.metadata(src_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(src_nh).contains<MetaStr>());
+
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    EXPECT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+
+    // Here we specify all the meta types we know about the src node
+    // but the src node has none of those. See how it works now
+    Preserved<ade::NodeHandle, MetaInt, MetaStr>(g, src_nh).copyTo(g, dst_nh);
+
+    ASSERT_FALSE(tg.metadata(dst_nh).contains<MetaInt>());
+    ASSERT_FALSE(tg.metadata(dst_nh).contains<MetaStr>());
+}
+
+TEST(PreservedMeta, DropEdge)
+{
+    ade::Graph g;
+    ade::TypedGraph<MetaInt, MetaStr> tg(g);
+
+    auto nh1 = tg.createNode();
+    auto nh2 = tg.createNode();
+    auto eh  = tg.link(nh1, nh2);
+
+    tg.metadata(eh).set(MetaInt{42});
+    tg.metadata(eh).set(MetaStr{"hi"});
+
+    // Drop an edge using the transaction API
+    using Change = ChangeT<MetaInt, MetaStr>;
+    Change::List changes;
+    changes.enqueue<Change::DropLink>(g, nh1, eh);
+
+    EXPECT_EQ(0u,      nh1->outNodes().size());
+    EXPECT_EQ(nullptr, eh);
+
+    // Now restore the edge and check if it's meta was restored
+    changes.rollback(g);
+
+    ASSERT_EQ(1u,      nh1->outNodes().size());
+    eh = *nh1->outEdges().begin();
+
+    ASSERT_TRUE(tg.metadata(eh).contains<MetaInt>());
+    ASSERT_TRUE(tg.metadata(eh).contains<MetaStr>());
+
+    EXPECT_EQ(42,   tg.metadata(eh).get<MetaInt>().x);
+    EXPECT_EQ("hi", tg.metadata(eh).get<MetaStr>().s);
+}
+
 } // opencv_test
--- a/modules/gapi/test/own/conc_queue_tests.cpp
+++ b/modules/gapi/test/own/conc_queue_tests.cpp
@@ -55,7 +55,7 @@ TEST(ConcQueue, Clear)
    EXPECT_FALSE(q.try_pop(x));
 }

-// In this test, every writer thread produce its own range of integer
+// In this test, every writer thread produces its own range of integer
 // numbers, writing those to a shared queue.
 //
 // Every reader thread pops elements from the queue (until -1 is
@@ -64,12 +64,12 @@ TEST(ConcQueue, Clear)
 // Finally, the master thread waits for completion of all other
 // threads and verifies that all the necessary data is
 // produced/obtained.
+namespace
+{
 using StressParam = std::tuple<int           // Num writer threads
                              ,int           // Num elements per writer
                              ,int           // Num reader threads
                              ,std::size_t>; // Queue capacity
-namespace
-{
 constexpr int STOP_SIGN = -1;
 constexpr int BASE      = 1000;
 }

--- a/modules/gapi/test/own/last_written_value_tests.cpp
+++ b/modules/gapi/test/own/last_written_value_tests.cpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2020 Intel Corporation
+
+#include "../test_precomp.hpp"
+
+#include <unordered_set>
+#include <thread>
+
+#include "executor/last_value.hpp"
+
+namespace opencv_test {
+using namespace cv::gapi;
+
+TEST(LastValue, PushPop) {
+    own::last_written_value<int> v;
+    for (int i = 0; i < 100; i++) {
+        v.push(i);
+
+        int x = 1;
+        v.pop(x);
+        EXPECT_EQ(x, i);
+    }
+}
+
+TEST(LastValue, TryPop) {
+    own::last_written_value<int> v;
+    int x = 0;
+    EXPECT_FALSE(v.try_pop(x));
+
+    v.push(1);
+    EXPECT_TRUE(v.try_pop(x));
+    EXPECT_EQ(1, x);
+}
+
+TEST(LastValue, Clear) {
+    own::last_written_value<int> v;
+    v.push(42);
+    v.clear();
+
+    int x = 0;
+    EXPECT_FALSE(v.try_pop(x));
+}
+
+TEST(LastValue, Overwrite) {
+    own::last_written_value<int> v;
+    v.push(42);
+    v.push(0);
+
+    int x = -1;
+    EXPECT_TRUE(v.try_pop(x));
+    EXPECT_EQ(0, x);
+}
+
+// In this test, every writer thread produces its own range of integer
+// numbers, writing those to a shared queue.
+//
+// Every reader thread pops elements from the queue (until -1 is
+// reached) and stores those in its own associated set.
+//
+// Finally, the master thread waits for completion of all other
+// threads and verifies that all the necessary data is
+// produced/obtained.
+namespace {
+using StressParam = std::tuple<int   // Num writer threads
+                              ,int   // Num elements per writer
+                              ,int>; // Num reader threads
+constexpr int STOP_SIGN = -1;
+constexpr int BASE      = 1000;
+}
+struct LastValue_: public ::testing::TestWithParam<StressParam> {
+    using V = own::last_written_value<int>;
+    using S = std::unordered_set<int>;
+
+    static void writer(int base, int writes, V& v) {
+        for (int i = 0; i < writes; i++) {
+            if (i % 2) {
+                std::this_thread::sleep_for(std::chrono::milliseconds{1});
+            }
+            v.push(base + i);
+        }
+        v.push(STOP_SIGN);
+    }
+
+    static void reader(V& v, S& s) {
+        int x = 0;
+        while (true) {
+            v.pop(x);
+            if (x == STOP_SIGN) {
+                // If this thread was lucky enough to read this STOP_SIGN,
+                // push it back to v to make other possible readers able
+                // to read it again (note due to the last_written_value
+                // semantic, those STOP_SIGN could be simply lost i.e.
+                // overwritten.
+                v.push(STOP_SIGN);
+                return;
+            }
+            s.insert(x);
+        }
+    }
+};
+
+TEST_P(LastValue_, Test)
+{
+    int num_writers = 0;
+    int num_writes  = 0;
+    int num_readers = 0;
+    std::tie(num_writers, num_writes, num_readers) = GetParam();
+
+    CV_Assert(num_writers <   20);
+    CV_Assert(num_writes  < BASE);
+
+    V v;
+
+    // Start reader threads
+    std::vector<S> storage(num_readers);
+    std::vector<std::thread> readers;
+    for (S& s : storage) {
+        readers.emplace_back(reader, std::ref(v), std::ref(s));
+    }
+
+    // Start writer threads, also pre-generate reference numbers
+    S reference;
+    std::vector<std::thread> writers;
+    for (int w = 0; w < num_writers; w++) {
+        writers.emplace_back(writer, w*BASE, num_writes, std::ref(v));
+        for (int r = 0; r < num_writes; r++) {
+            reference.insert(w*BASE + r);
+        }
+    }
+
+    // Wait for completions
+    for (auto &t : readers) t.join();
+    for (auto &t : writers) t.join();
+
+    // Validate the result. Some values are read, and the values are
+    // correct (i.e. such values have been written)
+    std::size_t num_values_read = 0u;
+    for (const auto &s : storage) {
+        num_values_read += s.size();
+        for (auto &x : s) {
+            EXPECT_TRUE(reference.count(x) > 0);
+        }
+    }
+    // NOTE: Some combinations may end-up in 0 values read
+    // it is normal, the main thing is that the test shouldn't hang!
+    EXPECT_LE(0u, num_values_read);
+}
+
+INSTANTIATE_TEST_CASE_P(LastValueStress, LastValue_,
+                        Combine( Values(1, 2, 4, 8, 16)  // writers
+                               , Values(32, 96, 256)     // writes
+                               , Values(1, 2, 10)));     // readers
+} // namespace opencv_test
--- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp
+++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp
@@ -2,11 +2,13 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2019 Intel Corporation
+// Copyright (C) 2019-2020 Intel Corporation


 #include "../test_precomp.hpp"

+#include <thread> // sleep_for (Delay)
+
 #include <opencv2/gapi/cpu/core.hpp>
 #include <opencv2/gapi/cpu/imgproc.hpp>

@@ -18,6 +20,7 @@
 #include <opencv2/gapi/ocl/imgproc.hpp>

 #include <opencv2/gapi/streaming/cap.hpp>
+#include <opencv2/gapi/streaming/desync.hpp>

 namespace opencv_test
 {
@@ -100,6 +103,16 @@ struct GAPI_Streaming: public ::testing::TestWithParam<KernelPackage> {
    }
 };

+G_API_OP(Delay, <cv::GMat(cv::GMat, int)>, "org.opencv.test.delay") {
+    static cv::GMatDesc outMeta(const cv::GMatDesc &in, int) { return in; }
+};
+GAPI_OCV_KERNEL(OCVDelay, Delay) {
+    static void run(const cv::Mat &in, int ms, cv::Mat &out) {
+        std::this_thread::sleep_for(std::chrono::milliseconds{ms});
+        in.copyTo(out);
+    }
+};
+
 } // anonymous namespace

 TEST_P(GAPI_Streaming, SmokeTest_ConstInput_GMat)
@@ -794,6 +807,104 @@ TEST(GAPI_Streaming_Types, OutputVector)
    EXPECT_LT(0u, num_frames);
 }

+G_API_OP(DimsChans,
+         <std::tuple<cv::GArray<int>, cv::GOpaque<int>>(cv::GMat)>,
+         "test.streaming.dims_chans") {
+    static std::tuple<cv::GArrayDesc, cv::GOpaqueDesc> outMeta(const cv::GMatDesc &) {
+        return std::make_tuple(cv::empty_array_desc(),
+                               cv::empty_gopaque_desc());
+    }
+};
+
+GAPI_OCV_KERNEL(OCVDimsChans, DimsChans) {
+    static void run(const cv::Mat &in, std::vector<int> &ov, int &oi) {
+        ov = {in.cols, in.rows};
+        oi = in.channels();
+    }
+};
+
+struct GAPI_Streaming_TemplateTypes: ::testing::Test {
+    // There was a problem in GStreamingExecutor
+    // when outputs were formally not used by the graph
+    // but still should be in place as operation need
+    // to produce them, and host data type constructors
+    // were missing for GArray and GOpaque in this case.
+    // This test tests exactly this.
+
+    GAPI_Streaming_TemplateTypes() {
+        // Prepare everything for the test:
+        // Graph itself
+        blur = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+        cv::GMat blur_d = cv::gapi::streaming::desync(blur);
+        std::tie(vec, opq) = DimsChans::on(blur_d);
+
+        // Kernel package
+        pkg = cv::gapi::kernels<OCVDimsChans>();
+
+        // Input mat
+        in_mat = cv::Mat::eye(cv::Size(320,240), CV_8UC3);
+    }
+
+    cv::GMat in;
+    cv::GMat blur;
+    cv::GArray<int> vec;
+    cv::GOpaque<int> opq;
+    cv::gapi::GKernelPackage pkg;
+    cv::Mat in_mat;
+};
+
+TEST_F(GAPI_Streaming_TemplateTypes, UnusedVectorIsOK)
+{
+    // Declare graph without listing vec as output
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(blur, opq))
+        .compileStreaming(cv::compile_args(pkg));
+    sc.setSource(cv::gin(in_mat));
+    sc.start();
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<int> out_int;
+
+    int counter = 0;
+    while (sc.pull(cv::gout(out_mat, out_int))) {
+        if (counter++ == 10) {
+            // Stop the test after 10 iterations
+            sc.stop();
+            break;
+        }
+        GAPI_Assert(out_mat || out_int);
+        if (out_int) {
+            EXPECT_EQ(  3, out_int.value());
+        }
+    }
+}
+
+TEST_F(GAPI_Streaming_TemplateTypes, UnusedOpaqueIsOK)
+{
+    // Declare graph without listing opq as output
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(blur, vec))
+        .compileStreaming(cv::compile_args(pkg));
+    sc.setSource(cv::gin(in_mat));
+    sc.start();
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<std::vector<int> > out_vec;
+
+    int counter = 0;
+    while (sc.pull(cv::gout(out_mat, out_vec))) {
+        if (counter++ == 10) {
+            // Stop the test after 10 iterations
+            sc.stop();
+            break;
+        }
+        GAPI_Assert(out_mat || out_vec);
+        if (out_vec) {
+            EXPECT_EQ(320, out_vec.value()[0]);
+            EXPECT_EQ(240, out_vec.value()[1]);
+        }
+    }
+}
+
 struct GAPI_Streaming_Unit: public ::testing::Test {
    cv::Mat m;

@@ -882,7 +993,7 @@ TEST_F(GAPI_Streaming_Unit, StartStopStart_NoSetSource)
    EXPECT_NO_THROW(sc.setSource(cv::gin(m, m)));
    EXPECT_NO_THROW(sc.start());
    EXPECT_NO_THROW(sc.stop());
-    EXPECT_ANY_THROW(sc.start()); // Should fails since setSource was not called
+    EXPECT_ANY_THROW(sc.start()); // Should fail since setSource was not called
 }

 TEST_F(GAPI_Streaming_Unit, StartStopStress_Const)
@@ -1018,4 +1129,380 @@ TEST(Streaming, Python_Pull_Overload)
    EXPECT_FALSE(ccomp.running());
 }

+TEST(GAPI_Streaming_Desync, SmokeTest_Regular)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+    cv::GMat out1 = cv::gapi::Canny(tmp1, 32, 128, 3);
+
+    // FIXME: Unary desync should not require tie!
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = tmp2 / cv::gapi::Sobel(tmp2, CV_8U, 1, 1);;
+
+    cv::Mat test_in = cv::Mat::eye(cv::Size(32,32), CV_8UC3);
+    cv::Mat test_out1, test_out2;
+    cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .apply(cv::gin(test_in), cv::gout(test_out1, test_out2));
+}
+
+TEST(GAPI_Streaming_Desync, SmokeTest_Streaming)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+    cv::GMat out1 = cv::gapi::Canny(tmp1, 32, 128, 3);
+
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = Delay::on(tmp2,10) / cv::gapi::Sobel(tmp2, CV_8U, 1, 1);
+
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .compileStreaming(cv::compile_args(cv::gapi::kernels<OCVDelay>()));
+    auto sc_file = findDataFile("cv/video/768x576.avi");
+    auto sc_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(sc_file);
+    sc.setSource(cv::gin(sc_src));
+    sc.start();
+
+    std::size_t out1_hits = 0u;
+    std::size_t out2_hits = 0u;
+    cv::optional<cv::Mat> test_out1, test_out2;
+    while (sc.pull(cv::gout(test_out1, test_out2))) {
+        GAPI_Assert(test_out1 || test_out2);
+        if (test_out1) out1_hits++;
+        if (test_out2) out2_hits++;
+    }
+    EXPECT_EQ(100u, out1_hits);      // out1 must be available for all frames
+    EXPECT_LE(out2_hits, out1_hits); // out2 must appear less times than out1
+    std::cout << "Got " << out1_hits << " out1's and " << out2_hits << " out2's" << std::endl;
+}
+
+TEST(GAPI_Streaming_Desync, SmokeTest_Streaming_TwoParts)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+    cv::GMat out1 = cv::gapi::Canny(tmp1, 32, 128, 3);
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = tmp2 / cv::gapi::Sobel(tmp2, CV_8U, 1, 1);
+
+    // Desynchronized path 2
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out3 = 0.5*tmp3 +  0.5*cv::gapi::medianBlur(tmp3, 7);
+
+    // The code should compile and execute well (desynchronized parts don't cross)
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2, out3))
+        .compileStreaming();
+    auto sc_file = findDataFile("cv/video/768x576.avi");
+    auto sc_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(sc_file);
+    sc.setSource(cv::gin(sc_src));
+    sc.start();
+
+    std::size_t test_frames = 0u;
+    cv::optional<cv::Mat> test_out1, test_out2, test_out3;
+    while (sc.pull(cv::gout(test_out1, test_out2, test_out3))) {
+        GAPI_Assert(test_out1 || test_out2 || test_out3);
+        if (test_out1) {
+            // count frames only for synchronized output
+            test_frames++;
+        }
+    }
+    EXPECT_EQ(100u, test_frames);
+}
+
+TEST(GAPI_Streaming_Desync, Negative_NestedDesync_Tier0)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = cv::gapi::medianBlur(tmp2, 3);
+
+    // Desynchronized path 2, nested from 1 (directly from desync)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp2);
+    cv::GMat out2 = 0.5*tmp3;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_NestedDesync_Tier1)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = cv::gapi::medianBlur(tmp2, 3);
+
+    // Desynchronized path 2, nested from 1 (indirectly from desync)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(out1);
+    cv::GMat out2 = 0.5*tmp3;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossMainPart_Tier0)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path: depends on both tmp1 and tmp2
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp1 + 0.5*tmp2;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(in, out1).compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossMainPart_Tier1)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path: depends on both tmp1 and tmp2
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp1 + 0.5*cv::gapi::medianBlur(tmp2, 3);
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(in, out1).compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossOtherDesync_Tier0)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp2;
+
+    // Desynchronized path 2 (depends on 1)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = 0.5*tmp3 + tmp2;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_CrossOtherDesync_Tier1)
+{
+    cv::GMat in;
+    cv::GMat tmp1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    // Desynchronized path 1
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out1 = 0.5*tmp2;
+
+    // Desynchronized path 2 (depends on 1)
+    cv::GMat tmp3 = cv::gapi::streaming::desync(tmp1);
+    cv::GMat out2 = 0.5*cv::gapi::medianBlur(tmp3,3) + 1.0*tmp2;
+
+    // This shouldn't compile
+    EXPECT_ANY_THROW(cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+                     .compileStreaming());
+}
+
+TEST(GAPI_Streaming_Desync, Negative_SynchronizedPull)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(out1);
+    cv::GMat out2 = 0.5*tmp1;
+
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .compileStreaming();
+
+    auto sc_file = findDataFile("cv/video/768x576.avi");
+    auto sc_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(sc_file);
+    sc.setSource(cv::gin(sc_src));
+    sc.start();
+
+    cv::Mat o1, o2;
+    EXPECT_ANY_THROW(sc.pull(cv::gout(o1, o2)));
+}
+
+TEST(GAPI_Streaming_Desync, UseSpecialPull)
+{
+    initTestDataPath();
+
+    cv::GMat in;
+    cv::GMat out1 = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(out1);
+    cv::GMat out2 = 0.5*tmp1;
+
+    auto sc = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2))
+        .compileStreaming();
+
+    auto sc_file = findDataFile("cv/video/768x576.avi");
+    auto sc_src = gapi::wip::make_src<cv::gapi::wip::GCaptureSource>(sc_file);
+    sc.setSource(cv::gin(sc_src));
+    sc.start();
+
+    cv::optional<cv::Mat> o1, o2;
+    std::size_t num_frames = 0u;
+
+    while (sc.pull(cv::gout(o1, o2))) {
+        if (o1) num_frames++;
+    }
+    EXPECT_EQ(100u, num_frames);
+}
+
+G_API_OP(ProduceVector, <cv::GArray<int>(cv::GMat)>, "test.desync.vector") {
+    static cv::GArrayDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_array_desc();
+    }
+};
+
+G_API_OP(ProduceOpaque, <cv::GOpaque<int>(cv::GMat)>, "test.desync.opaque") {
+    static cv::GOpaqueDesc outMeta(const cv::GMatDesc &) {
+        return cv::empty_gopaque_desc();
+    }
+};
+
+GAPI_OCV_KERNEL(OCVVector, ProduceVector) {
+    static void run(const cv::Mat& in, std::vector<int> &out) {
+        out = {in.cols, in.rows};
+    }
+};
+
+GAPI_OCV_KERNEL(OCVOpaque, ProduceOpaque) {
+    static void run(const cv::Mat &in, int &v) {
+        v = in.channels();
+    }
+};
+
+namespace {
+cv::GStreamingCompiled desyncTestObject() {
+    cv::GMat in;
+    cv::GMat blur = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat blur_d = cv::gapi::copy(cv::gapi::streaming::desync(blur));
+    cv::GMat d1 = Delay::on(blur_d, 10);
+    cv::GMat d2 = Delay::on(blur_d, 30);
+
+    cv::GArray<int>  vec = ProduceVector::on(d1);
+    cv::GOpaque<int> opq = ProduceOpaque::on(d2);
+
+    auto pkg = cv::gapi::kernels<OCVDelay, OCVVector, OCVOpaque>();
+    return cv::GComputation(cv::GIn(in), cv::GOut(blur, vec, opq))
+        .compileStreaming(cv::compile_args(pkg));
+}
+} // anonymous namespace
+
+TEST(GAPI_Streaming_Desync, MultipleDesyncOutputs_1) {
+    auto sc = desyncTestObject();
+    const cv::Mat in_mat = cv::Mat::eye(cv::Size(320,240), CV_8UC3);
+
+    sc.setSource(cv::gin(in_mat));
+    sc.start();
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<std::vector<int> > out_vec;
+    cv::optional<int> out_int;
+
+    int counter = 0;
+    while (sc.pull(cv::gout(out_mat, out_vec, out_int))) {
+        if (counter++ == 1000) {
+            // Stop the test after 1000 iterations
+            sc.stop();
+            break;
+        }
+        GAPI_Assert(out_mat || out_vec || out_int);
+
+        // out_vec and out_int are on the same desynchronized path
+        // they MUST arrive together. If one is available, the other
+        // also must be available.
+        if (out_vec) { ASSERT_TRUE(out_int.has_value()); }
+        if (out_int) { ASSERT_TRUE(out_vec.has_value()); }
+
+        if (out_vec || out_int) {
+            EXPECT_EQ(320, out_vec.value()[0]);
+            EXPECT_EQ(240, out_vec.value()[1]);
+            EXPECT_EQ(  3, out_int.value());
+        }
+    }
+}
+
+TEST(GAPI_Streaming_Desync, StartStop_Stress) {
+    auto sc = desyncTestObject();
+    const cv::Mat in_mat = cv::Mat::eye(cv::Size(320,240), CV_8UC3);
+
+    cv::optional<cv::Mat> out_mat;
+    cv::optional<std::vector<int> > out_vec;
+    cv::optional<int> out_int;
+
+    for (int i = 0; i < 10; i++) {
+        sc.setSource(cv::gin(in_mat));
+        sc.start();
+        int counter = 0;
+        while (counter++ < 100) {
+            sc.pull(cv::gout(out_mat, out_vec, out_int));
+            GAPI_Assert(out_mat || out_vec || out_int);
+            if (out_vec) { ASSERT_TRUE(out_int.has_value()); }
+            if (out_int) { ASSERT_TRUE(out_vec.has_value()); }
+        }
+        sc.stop();
+    }
+}
+
+GAPI_FLUID_KERNEL(FluidCopy, cv::gapi::core::GCopy, false) {
+    static const int Window = 1;
+
+    static void run(const cv::gapi::fluid::View &in,
+                          cv::gapi::fluid::Buffer &out) {
+        const uint8_t *in_ptr = in.InLineB(0);
+        uint8_t *out_ptr = out.OutLineB(0);
+
+        const auto in_type = CV_MAKETYPE(in.meta().depth, in.meta().chan);
+        const auto out_type = CV_MAKETYPE(out.meta().depth, out.meta().chan);
+        GAPI_Assert(in_type == out_type);
+        std::copy_n(in_ptr, in.length()*CV_ELEM_SIZE(in_type), out_ptr);
+    }
+};
+
+
+TEST(GAPI_Streaming_Desync, DesyncObjectConsumedByTwoIslandsViaSeparateDesync) {
+    // See comment in the implementation of cv::gapi::streaming::desync (.cpp)
+    cv::GMat in;
+    cv::GMat tmp = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(tmp);
+    cv::GMat out1 = cv::gapi::copy(tmp1); // ran via Fluid backend
+
+    cv::GMat tmp2 = cv::gapi::streaming::desync(tmp);
+    cv::GMat out2 = tmp2 * 0.5;           // ran via OCV backend
+
+    auto c = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2));
+    auto p = cv::gapi::kernels<FluidCopy>();
+
+    EXPECT_NO_THROW(c.compileStreaming(cv::compile_args(p)));
+}
+
+TEST(GAPI_Streaming_Desync, DesyncObjectConsumedByTwoIslandsViaSameDesync) {
+    // See comment in the implementation of cv::gapi::streaming::desync (.cpp)
+    cv::GMat in;
+    cv::GMat tmp = cv::gapi::boxFilter(in, -1, cv::Size(3,3));
+
+    cv::GMat tmp1 = cv::gapi::streaming::desync(tmp);
+    cv::GMat out1 = cv::gapi::copy(tmp1); // ran via Fluid backend
+    cv::GMat out2 = out1 - 0.5*tmp1;      // ran via OCV backend
+
+    auto c = cv::GComputation(cv::GIn(in), cv::GOut(out1, out2));
+    auto p = cv::gapi::kernels<FluidCopy>();
+
+    EXPECT_NO_THROW(c.compileStreaming(cv::compile_args(p)));
+}
+
 } // namespace opencv_test