From dc31e20925064e9434cd5f879fc354ae51e355aa Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Thu, 18 Mar 2021 23:33:19 +0300 Subject: [PATCH] Merge pull request #19709 from TolyaTalamanov:at/async-inferlist-infer2 G-API: Implement async version for InferList & Infer2 * Implement async version for InferList & Infer2 * Fix warning * Fix bug with roi ordering * Post input meta instead of empty * Fix comments to review --- modules/gapi/src/backends/ie/giebackend.cpp | 299 +++++++++--------- .../gapi/test/infer/gapi_infer_ie_test.cpp | 2 - 2 files changed, 149 insertions(+), 152 deletions(-) diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 3af09d4381..0fb7062fce 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -18,6 +18,7 @@ #include #include +#include #include @@ -289,7 +290,7 @@ public: } template - std::vector& outVecR(std::size_t output) { + std::vector& outVecR(std::size_t output) { return outVecRef(output).wref(); } @@ -298,8 +299,9 @@ public: const cv::Mat& inMat (std::size_t input) const; const cv::MediaFrame& inFrame(std::size_t input) const; - cv::Mat& outMatR(std::size_t idx); - cv::GRunArgP output (std::size_t idx); + const cv::GRunArg& input (std::size_t idx) const; + cv::GRunArgP output (std::size_t idx); + cv::Mat& outMatR(std::size_t idx); const IEUnit &uu; cv::gimpl::GIslandExecutable::IOutput &out; @@ -385,6 +387,10 @@ cv::GRunArgP IECallContext::output(std::size_t idx) { return m_output_objs[idx].second; }; +const cv::GRunArg& IECallContext::input(std::size_t idx) const { + return m_input_objs[idx].second; +} + cv::detail::VectorRef& IECallContext::outVecRef(std::size_t idx) { return cv::util::get(m_results.at(idx)); } @@ -424,7 +430,6 @@ cv::GArg IECallContext::packArg(const cv::GArg &arg) { } } - struct IECallable { static const char *name() { return "IERequestCallable"; } using Run = std::function, cv::gimpl::ie::RequestPool&)>; @@ -513,7 +518,7 @@ public: explicit RequestPool(std::vector&& requests); - void execute(Task&& t, bool async = true); + void execute(Task&& t); void waitAndShutdown(); private: @@ -531,23 +536,12 @@ cv::gimpl::ie::RequestPool::RequestPool(std::vector(it), std::get<1>(it)); } @@ -752,12 +746,65 @@ static void PostOutputs(InferenceEngine::InferRequest &request, IE::Blob::Ptr this_blob = request.GetBlob(ctx->uu.params.output_names[i]); copyFromIE(this_blob, out_mat); auto output = ctx->output(i); - ctx->out.meta(output, cv::GRunArg::Meta{}); + ctx->out.meta(output, ctx->input(0).meta); ctx->out.post(std::move(output)); } } +class PostOutputsList { +public: + PostOutputsList(size_t size, + std::shared_ptr ctx, + std::vector>&& cached_dims); + + void operator()(InferenceEngine::InferRequest &request, size_t pos) const; + +private: + struct Priv { + size_t size; + std::atomic finished{0u}; + std::shared_ptr ctx; + std::vector> cached_dims; + }; + std::shared_ptr m_priv; +}; + +PostOutputsList::PostOutputsList(size_t size, + std::shared_ptr ctx, + std::vector>&& cached_dims) + : m_priv(new Priv()) { + m_priv->size = size; + m_priv->ctx = ctx; + m_priv->cached_dims = std::move(cached_dims); +} + +void PostOutputsList::operator()(InferenceEngine::InferRequest &req, size_t pos) const { + auto&& ctx = m_priv->ctx; + auto&& cached_dims = m_priv->cached_dims; + auto&& finished = m_priv->finished; + auto&& size = m_priv->size; + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + std::vector &out_vec = ctx->outVecR(i); + + IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); + GAPI_Assert(out_blob); + + // FIXME: Avoid data copy. Not sure if it is possible though + out_vec[pos].create(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); + copyFromIE(out_blob, out_vec[pos]); + } + ++finished; + + if (finished == size) { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + auto output = ctx->output(i); + ctx->out.meta(output, ctx->input(0).meta); + ctx->out.post(std::move(output)); + } + } +} + struct Infer: public cv::detail::KernelTag { using API = cv::GInferBase; static cv::gapi::GBackend backend() { return cv::gapi::ie::backend(); } @@ -977,65 +1024,44 @@ struct InferList: public cv::detail::KernelTag { static void run(std::shared_ptr ctx, cv::gimpl::ie::RequestPool &reqPool) { - using namespace std::placeholders; - reqPool.execute( - cv::gimpl::ie::RequestPool::Task { - [ctx](InferenceEngine::InferRequest &req) { - // non-generic version for now: - // - assumes zero input is always ROI list - // - assumes all inputs/outputs are always Mats - const auto& in_roi_vec = ctx->inArg(0u).rref(); - // NB: In case there is no input data need to post output anyway - if (in_roi_vec.empty()) { - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - auto output = ctx->output(i); - ctx->out.meta(output, cv::GRunArg::Meta{}); - ctx->out.post(std::move(output)); - } - return; - } - - IE::Blob::Ptr this_blob = extractBlob(*ctx, 1); - - // FIXME: This could be done ONCE at graph compile stage! - std::vector> cached_dims(ctx->uu.params.num_out); - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); - cached_dims[i] = toCV(ie_out->getTensorDesc().getDims()); - // FIXME: Isn't this should be done automatically - // by some resetInternalData(), etc? (Probably at the GExecutor level) - ctx->outVecR(i).clear(); - } - - for (auto&& rc : in_roi_vec) { - IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc)); - req.SetBlob(ctx->uu.params.input_names[0u], roi_blob); - - req.Infer(); - - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - std::vector &out_vec = ctx->outVecR(i); - - IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); - GAPI_Assert(out_blob); + const auto& in_roi_vec = ctx->inArg(0u).rref(); + // NB: In case there is no input data need to post output anyway + if (in_roi_vec.empty()) { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + auto output = ctx->output(i); + ctx->out.meta(output, ctx->input(0).meta); + ctx->out.post(std::move(output)); + } + return; + } - cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); - // FIXME: Avoid data copy. Not sure if it is possible though - copyFromIE(out_blob, out_mat); - out_vec.push_back(std::move(out_mat)); - } - } + IE::Blob::Ptr this_blob = extractBlob(*ctx, 1); + std::vector> cached_dims(ctx->uu.params.num_out); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); + cached_dims[i] = toCV(ie_out->getTensorDesc().getDims()); + // FIXME: Isn't this should be done automatically + // by some resetInternalData(), etc? (Probably at the GExecutor level) + auto& out_vec = ctx->outVecR(i); + out_vec.clear(); + out_vec.resize(in_roi_vec.size()); + } - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - auto output = ctx->output(i); - ctx->out.meta(output, cv::GRunArg::Meta{}); - ctx->out.post(std::move(output)); - } + PostOutputsList callback(in_roi_vec.size(), ctx, std::move(cached_dims)); + for (auto&& it : ade::util::indexed(in_roi_vec)) { + auto pos = ade::util::index(it); + const auto& rc = ade::util::value(it); + reqPool.execute( + cv::gimpl::ie::RequestPool::Task { + [ctx, rc, this_blob](InferenceEngine::InferRequest &req) { + IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc)); + req.SetBlob(ctx->uu.params.input_names[0u], roi_blob); + req.StartAsync(); }, - [](InferenceEngine::InferRequest &) { /* do nothing */ } - }, - false /* not async */ - ); + std::bind(callback, std::placeholders::_1, pos) + } + ); + } } }; @@ -1136,86 +1162,59 @@ struct InferList2: public cv::detail::KernelTag { static void run(std::shared_ptr ctx, cv::gimpl::ie::RequestPool &reqPool) { - reqPool.execute( - cv::gimpl::ie::RequestPool::Task { - [ctx](InferenceEngine::InferRequest &req) { - GAPI_Assert(ctx->inArgs().size() > 1u - && "This operation must have at least two arguments"); - - IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0); - - // Take the next argument, which must be vector (of any kind). - // Use it only to obtain the ROI list size (sizes of all other - // vectors must be equal to this one) - const auto list_size = ctx->inArg(1u).size(); - if (list_size == 0u) { - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - auto output = ctx->output(i); - ctx->out.meta(output, cv::GRunArg::Meta{}); - ctx->out.post(std::move(output)); - } - return; - } - - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - ctx->outVecR(i).resize(list_size); - } - - // FIXME: This could be done ONCE at graph compile stage! - std::vector< std::vector > cached_dims(ctx->uu.params.num_out); - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); - cached_dims[i] = toCV(ie_out->getTensorDesc().getDims()); - // FIXME: Isn't this should be done automatically - // by some resetInternalData(), etc? (Probably at the GExecutor level) - ctx->outVecR(i).clear(); - } + GAPI_Assert(ctx->inArgs().size() > 1u + && "This operation must have at least two arguments"); + IE::Blob::Ptr blob_0 = extractBlob(*ctx, 0); + const auto list_size = ctx->inArg(1u).size(); + if (list_size == 0u) { + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + auto output = ctx->output(i); + ctx->out.meta(output, ctx->input(0).meta); + ctx->out.post(std::move(output)); + } + return; + } + // FIXME: This could be done ONCE at graph compile stage! + std::vector< std::vector > cached_dims(ctx->uu.params.num_out); + for (auto i : ade::util::iota(ctx->uu.params.num_out)) { + const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); + cached_dims[i] = toCV(ie_out->getTensorDesc().getDims()); + // FIXME: Isn't this should be done automatically + // by some resetInternalData(), etc? (Probably at the GExecutor level) + auto& out_vec = ctx->outVecR(i); + out_vec.clear(); + out_vec.resize(list_size); + } - for (const auto &list_idx : ade::util::iota(list_size)) { - for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) { - const auto &this_vec = ctx->inArg(in_idx+1u); - GAPI_Assert(this_vec.size() == list_size); - IE::Blob::Ptr this_blob; - if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) { - const auto &vec = this_vec.rref(); - this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx])); - } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { - const auto &vec = this_vec.rref(); - const auto &mat = vec[list_idx]; - this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR); - } else { - GAPI_Assert(false && - "Only Rect and Mat types are supported for infer list 2!"); - } - - req.SetBlob(ctx->uu.params.input_names[in_idx], this_blob); - } - - req.Infer(); - - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - std::vector &out_vec = ctx->outVecR(i); - - IE::Blob::Ptr out_blob = req.GetBlob(ctx->uu.params.output_names[i]); - GAPI_Assert(out_blob); - - cv::Mat out_mat(cached_dims[i], toCV(out_blob->getTensorDesc().getPrecision())); - // FIXME: Avoid data copy. Not sure if it is possible though - copyFromIE(out_blob, out_mat); - out_vec.push_back(std::move(out_mat)); - } + PostOutputsList callback(list_size, ctx, std::move(cached_dims)); + for (const auto &list_idx : ade::util::iota(list_size)) { + reqPool.execute( + cv::gimpl::ie::RequestPool::Task { + [ctx, list_idx, list_size, blob_0](InferenceEngine::InferRequest &req) { + for (auto in_idx : ade::util::iota(ctx->uu.params.num_in)) { + const auto &this_vec = ctx->inArg(in_idx+1u); + GAPI_Assert(this_vec.size() == list_size); + IE::Blob::Ptr this_blob; + if (this_vec.getKind() == cv::detail::OpaqueKind::CV_RECT) { + const auto &vec = this_vec.rref(); + this_blob = IE::make_shared_blob(blob_0, toIE(vec[list_idx])); + } else if (this_vec.getKind() == cv::detail::OpaqueKind::CV_MAT) { + const auto &vec = this_vec.rref(); + const auto &mat = vec[list_idx]; + this_blob = wrapIE(mat, cv::gapi::ie::TraitAs::TENSOR); + } else { + GAPI_Assert(false && + "Only Rect and Mat types are supported for infer list 2!"); } - for (auto i : ade::util::iota(ctx->uu.params.num_out)) { - auto output = ctx->output(i); - ctx->out.meta(output, cv::GRunArg::Meta{}); - ctx->out.post(std::move(output)); - } - }, - [](InferenceEngine::InferRequest &) { /* do nothing */ } + req.SetBlob(ctx->uu.params.input_names[in_idx], this_blob); + } + req.StartAsync(); }, - false /* not async */ + std::bind(callback, std::placeholders::_1, list_idx) + } // task ); + } // for } }; diff --git a/modules/gapi/test/infer/gapi_infer_ie_test.cpp b/modules/gapi/test/infer/gapi_infer_ie_test.cpp index b950a9a0c4..d81b80a4d8 100644 --- a/modules/gapi/test/infer/gapi_infer_ie_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_ie_test.cpp @@ -1255,7 +1255,6 @@ TEST(InferList, TestStreamingInfer) }.cfgOutputLayers({ "age_conv3", "prob" }) .cfgNumRequests(4u); - std::size_t num_frames = 0u; std::size_t max_frames = 10u; @@ -1308,7 +1307,6 @@ TEST(InferList, TestStreamingInfer) ++num_frames; cap >> in_mat; } - pipeline.stop(); } TEST(Infer2, TestStreamingInfer) -- GitLab