region_layer.cpp 29.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
/*M ///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Copyright (C) 2017, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include "../precomp.hpp"
44
#include "../op_cuda.hpp"
45 46
#include <opencv2/dnn/shape_utils.hpp>
#include <opencv2/dnn/all_layers.hpp>
47
#include "../nms.inl.hpp"
48 49

#ifdef HAVE_OPENCL
L
Li Peng 已提交
50
#include "opencl_kernels_dnn.hpp"
51
#endif
52

53 54 55 56
#ifdef HAVE_DNN_NGRAPH
#include "../ie_ngraph.hpp"
#endif

57 58 59 60 61
#ifdef HAVE_CUDA
#include "../cuda4dnn/primitives/region.hpp"
using namespace cv::dnn::cuda4dnn;
#endif

62

63 64 65 66 67
namespace cv
{
namespace dnn
{

68
class RegionLayerImpl CV_FINAL : public RegionLayer
69 70 71
{
public:
    int coords, classes, anchors, classfix;
72
    float thresh, scale_x_y;
73
    int new_coords;
74
    bool useSoftmax, useLogistic;
75 76 77
#ifdef HAVE_OPENCL
    UMat blob_umat;
#endif
78 79 80 81 82 83 84 85 86 87 88 89

    RegionLayerImpl(const LayerParams& params)
    {
        setParamsFrom(params);
        CV_Assert(blobs.size() == 1);

        thresh = params.get<float>("thresh", 0.2);
        coords = params.get<int>("coords", 4);
        classes = params.get<int>("classes", 0);
        anchors = params.get<int>("anchors", 5);
        classfix = params.get<int>("classfix", 0);
        useSoftmax = params.get<bool>("softmax", false);
90
        useLogistic = params.get<bool>("logistic", false);
91
        nmsThreshold = params.get<float>("nms_threshold", 0.4);
92
        scale_x_y = params.get<float>("scale_x_y", 1.0); // Yolov4
93
        new_coords = params.get<int>("new_coords", 0); // Yolov4x-mish
94 95 96 97 98

        CV_Assert(nmsThreshold >= 0.);
        CV_Assert(coords == 4);
        CV_Assert(classes >= 1);
        CV_Assert(anchors >= 1);
99 100 101
        CV_Assert(useLogistic || useSoftmax);
        if (params.get<bool>("softmax_tree", false))
            CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
102 103 104 105 106
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
107
                         std::vector<MatShape> &internals) const CV_OVERRIDE
108 109
    {
        CV_Assert(inputs.size() > 0);
110
        // channels == cell_size*anchors
111
        CV_Assert(inputs[0][3] == (1 + coords + classes)*anchors);
112 113 114 115 116
        int batch_size = inputs[0][0];
        if(batch_size > 1)
            outputs = std::vector<MatShape>(1, shape(batch_size, inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
        else
            outputs = std::vector<MatShape>(1, shape(inputs[0][1] * inputs[0][2] * anchors, inputs[0][3] / anchors));
117 118 119
        return false;
    }

120 121 122 123
    virtual bool supportBackend(int backendId) CV_OVERRIDE
    {
#ifdef HAVE_DNN_NGRAPH
    if (backendId == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH)
124
        return INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2020_2) && preferableTarget != DNN_TARGET_MYRIAD && new_coords == 0;
125 126 127
#endif
#ifdef HAVE_CUDA
        if (backendId == DNN_BACKEND_CUDA)
128
            return true;
129 130 131 132
#endif
        return backendId == DNN_BACKEND_OPENCV;
    }

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    float logistic_activate(float x) { return 1.F / (1.F + exp(-x)); }

    void softmax_activate(const float* input, const int n, const float temp, float* output)
    {
        int i;
        float sum = 0;
        float largest = -FLT_MAX;
        for (i = 0; i < n; ++i) {
            if (input[i] > largest) largest = input[i];
        }
        for (i = 0; i < n; ++i) {
            float e = exp((input[i] - largest) / temp);
            sum += e;
            output[i] = e;
        }
        for (i = 0; i < n; ++i) {
            output[i] /= sum;
        }
    }

L
Li Peng 已提交
153 154 155
#ifdef HAVE_OPENCL
    bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
    {
156 157 158
        if (blob_umat.empty())
            blobs[0].copyTo(blob_umat);

L
Li Peng 已提交
159 160 161
        std::vector<UMat> inputs;
        std::vector<UMat> outputs;

162
        // TODO: implement a logistic activation to classification scores.
L
Li Peng 已提交
163
        if (useLogistic || inps.depth() == CV_16S)
164 165
            return false;

L
Li Peng 已提交
166 167 168 169 170 171 172 173 174 175 176
        inps.getUMatVector(inputs);
        outs.getUMatVector(outputs);

        CV_Assert(inputs.size() >= 1);
        int const cell_size = classes + coords + 1;

        for (size_t ii = 0; ii < outputs.size(); ii++)
        {
            UMat& inpBlob = inputs[ii];
            UMat& outBlob = outputs[ii];

177
            int batch_size = inpBlob.size[0];
L
Li Peng 已提交
178 179 180
            int rows = inpBlob.size[1];
            int cols = inpBlob.size[2];

181 182 183
            // channels == cell_size*anchors, see l. 94
            int sample_size = cell_size*rows*cols*anchors;

L
Li Peng 已提交
184
            ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
185 186
            size_t nanchors = rows*cols*anchors*batch_size;
            logistic_kernel.set(0, (int)nanchors);
L
Li Peng 已提交
187 188 189
            logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
            logistic_kernel.set(2, (int)cell_size);
            logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
190
            logistic_kernel.run(1, &nanchors, NULL, false);
L
Li Peng 已提交
191 192 193 194 195 196

            if (useSoftmax)
            {
                // Yolo v2
                // softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
                ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
197 198
                size_t nanchors = rows*cols*anchors*batch_size;
                softmax_kernel.set(0, (int)nanchors);
L
Li Peng 已提交
199 200 201 202 203 204 205 206 207 208
                softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
                softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
                softmax_kernel.set(3, (int)cell_size);
                softmax_kernel.set(4, (int)classes);
                softmax_kernel.set(5, (int)classfix);
                softmax_kernel.set(6, (int)rows);
                softmax_kernel.set(7, (int)cols);
                softmax_kernel.set(8, (int)anchors);
                softmax_kernel.set(9, (float)thresh);
                softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
209
                if (!softmax_kernel.run(1, &nanchors, NULL, false))
L
Li Peng 已提交
210 211 212 213 214 215
                    return false;
            }

            if (nmsThreshold > 0) {
                Mat mat = outBlob.getMat(ACCESS_WRITE);
                float *dstData = mat.ptr<float>();
216 217
                for (int b = 0; b < batch_size; ++b)
                    do_nms_sort(dstData + b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
L
Li Peng 已提交
218 219 220 221 222 223 224 225
            }

        }

        return true;
    }
#endif

226
    void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE
L
Li Peng 已提交
227 228 229 230
    {
        CV_TRACE_FUNCTION();
        CV_TRACE_ARG_VALUE(name, "name", name.c_str());

231
        CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
L
Li Peng 已提交
232 233
                   forward_ocl(inputs_arr, outputs_arr, internals_arr))

234 235 236 237 238
        if (inputs_arr.depth() == CV_16S)
        {
            forward_fallback(inputs_arr, outputs_arr, internals_arr);
            return;
        }
L
Li Peng 已提交
239

240 241 242 243
        std::vector<Mat> inputs, outputs, internals;
        inputs_arr.getMatVector(inputs);
        outputs_arr.getMatVector(outputs);
        internals_arr.getMatVector(internals);
244 245

        CV_Assert(inputs.size() >= 1);
246
        CV_Assert(outputs.size() == 1);
247 248 249 250 251 252
        int const cell_size = classes + coords + 1;

        const float* biasData = blobs[0].ptr<float>();

        for (size_t ii = 0; ii < outputs.size(); ii++)
        {
253
            Mat &inpBlob = inputs[ii];
254 255
            Mat &outBlob = outputs[ii];

256
            int batch_size = inpBlob.size[0];
257 258
            int rows = inpBlob.size[1];
            int cols = inpBlob.size[2];
259 260 261 262 263 264 265 266

            // address length for one image in batch, both for input and output
            int sample_size = cell_size*rows*cols*anchors;

            // assert that the comment above is true
            CV_Assert(sample_size*batch_size == inpBlob.total());
            CV_Assert(sample_size*batch_size == outBlob.total());

267 268 269
            CV_Assert(inputs.size() < 2 || inputs[1].dims == 4);
            int hNorm = inputs.size() > 1 ? inputs[1].size[2] : rows;
            int wNorm = inputs.size() > 1 ? inputs[1].size[3] : cols;
270 271 272 273

            const float *srcData = inpBlob.ptr<float>();
            float *dstData = outBlob.ptr<float>();

274 275
            if (new_coords == 0) {
                // logistic activation for t0, for each grid cell (X x Y x Anchor-index)
276
                for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
277
                    int index = cell_size*i;
278 279
                    float x = srcData[index + 4];
                    dstData[index + 4] = logistic_activate(x);	// logistic activation
280
                }
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295

                if (useSoftmax) {  // Yolo v2
                    for (int i = 0; i < batch_size*rows*cols*anchors; ++i) {
                        int index = cell_size*i;
                        softmax_activate(srcData + index + 5, classes, 1, dstData + index + 5);
                    }
                }
                else if (useLogistic) {  // Yolo v3
                    for (int i = 0; i < batch_size*rows*cols*anchors; ++i){
                        int index = cell_size*i;
                        const float* input = srcData + index + 5;
                        float* output = dstData + index + 5;
                        for (int c = 0; c < classes; ++c)
                            output[c] = logistic_activate(input[c]);
                    }
296 297
                }
            }
298 299 300 301 302 303 304 305 306
            for (int b = 0; b < batch_size; ++b)
                for (int x = 0; x < cols; ++x)
                    for(int y = 0; y < rows; ++y)
                        for (int a = 0; a < anchors; ++a) {
                            // relative start address for image b within the batch data
                            int index_sample_offset = sample_size*b;
                            int index = (y*cols + x)*anchors + a;  // index for each grid-cell & anchor
                            int p_index = index_sample_offset + index * cell_size + 4;
                            float scale = dstData[p_index];
307 308 309 310
                            if (classfix == -1 && scale < .5)
                            {
                                scale = 0;  // if(t0 < 0.5) t0 = 0;
                            }
311 312
                            int box_index = index_sample_offset + index * cell_size;

313 314 315 316 317 318 319
                            if (new_coords == 1) {
                                float x_tmp = (srcData[box_index + 0] - 0.5f) * scale_x_y + 0.5f;
                                float y_tmp = (srcData[box_index + 1] - 0.5f) * scale_x_y + 0.5f;
                                dstData[box_index + 0] = (x + x_tmp) / cols;
                                dstData[box_index + 1] = (y + y_tmp) / rows;
                                dstData[box_index + 2] = (srcData[box_index + 2]) * (srcData[box_index + 2]) * 4 * biasData[2 * a] / wNorm;
                                dstData[box_index + 3] = (srcData[box_index + 3]) * (srcData[box_index + 3]) * 4 * biasData[2 * a + 1] / hNorm;
Z
Zhi-Qiang Zhou 已提交
320
                                dstData[box_index + 4] = srcData[p_index];
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347

                                scale = srcData[p_index];
                                if (classfix == -1 && scale < thresh)
                                {
                                    scale = 0;  // if(t0 < 0.5) t0 = 0;
                                }

                                int class_index = index_sample_offset + index * cell_size + 5;
                                for (int j = 0; j < classes; ++j) {
                                    float prob = scale*srcData[class_index + j];  // prob = IoU(box, object) = t0 * class-probability
                                    dstData[class_index + j] = (prob > thresh) ? prob : 0;  // if (IoU < threshold) IoU = 0;
                                }
                            }
                            else
                            {
                                float x_tmp = (logistic_activate(srcData[box_index + 0]) - 0.5f) * scale_x_y + 0.5f;
                                float y_tmp = (logistic_activate(srcData[box_index + 1]) - 0.5f) * scale_x_y + 0.5f;
                                dstData[box_index + 0] = (x + x_tmp) / cols;
                                dstData[box_index + 1] = (y + y_tmp) / rows;
                                dstData[box_index + 2] = exp(srcData[box_index + 2]) * biasData[2 * a] / wNorm;
                                dstData[box_index + 3] = exp(srcData[box_index + 3]) * biasData[2 * a + 1] / hNorm;

                                int class_index = index_sample_offset + index * cell_size + 5;
                                for (int j = 0; j < classes; ++j) {
                                    float prob = scale*dstData[class_index + j];  // prob = IoU(box, object) = t0 * class-probability
                                    dstData[class_index + j] = (prob > thresh) ? prob : 0;  // if (IoU < threshold) IoU = 0;
                                }
348
                            }
349
                        }
350
            if (nmsThreshold > 0) {
351 352 353
                for (int b = 0; b < batch_size; ++b){
                    do_nms_sort(dstData+b*sample_size, rows*cols*anchors, thresh, nmsThreshold);
                }
354 355 356 357
            }
        }
    }

358
    void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
359
    {
360
        std::vector<Rect2d> boxes(total);
361
        std::vector<float> scores(total);
362

363 364
        for (int i = 0; i < total; ++i)
        {
365
            Rect2d &b = boxes[i];
366
            int box_index = i * (classes + coords + 1);
367 368 369 370
            b.width = detections[box_index + 2];
            b.height = detections[box_index + 3];
            b.x = detections[box_index + 0] - b.width / 2;
            b.y = detections[box_index + 1] - b.height / 2;
371 372
        }

373 374 375 376 377 378 379 380 381
        std::vector<int> indices;
        for (int k = 0; k < classes; ++k)
        {
            for (int i = 0; i < total; ++i)
            {
                int box_index = i * (classes + coords + 1);
                int class_index = box_index + 5;
                scores[i] = detections[class_index + k];
                detections[class_index + k] = 0;
382
            }
383
            NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
384 385 386 387 388
            for (int i = 0, n = indices.size(); i < n; ++i)
            {
                int box_index = indices[i] * (classes + coords + 1);
                int class_index = box_index + 5;
                detections[class_index + k] = scores[indices[i]];
389 390 391 392
            }
        }
    }

393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
#ifdef HAVE_CUDA
    Ptr<BackendNode> initCUDA(
        void *context_,
        const std::vector<Ptr<BackendWrapper>>& inputs,
        const std::vector<Ptr<BackendWrapper>>& outputs
    ) override
    {
        auto context = reinterpret_cast<csl::CSLContext*>(context_);

        if (coords != 4)
            CV_Error(Error::StsNotImplemented, "Only upright rectangular boxes are supported in RegionLayer.");

        std::size_t height_norm, width_norm;
        if (inputs.size() == 1)
        {
            auto input_wrapper = inputs[0].dynamicCast<CUDABackendWrapper>();
            auto input_shape = input_wrapper->getShape();
            height_norm = input_shape[1];
            width_norm = input_shape[2];
        }
        else
        {
            auto input_wrapper = inputs[1].dynamicCast<CUDABackendWrapper>();
            auto input_shape = input_wrapper->getShape();
            CV_Assert(input_shape.size() == 4);
            height_norm = input_shape[2];
            width_norm = input_shape[3];
        }

        cuda4dnn::SquashMethod squash_method;
        if(useLogistic)
            squash_method = cuda4dnn::SquashMethod::SIGMOID;
        else if (useSoftmax)
            squash_method = cuda4dnn::SquashMethod::SOFTMAX;

        /* exactly one must be true */
        CV_Assert((useLogistic || useSoftmax) && !(useLogistic && useSoftmax));

        cuda4dnn::RegionConfiguration<float> config;
        config.squash_method = squash_method;
        config.classes = classes;
        config.boxes_per_cell = anchors;

        config.height_norm = height_norm;
        config.width_norm = width_norm;

Y
YashasSamaga 已提交
439 440
        config.scale_x_y = scale_x_y;

441
        config.object_prob_cutoff = (classfix == -1) ? thresh : 0.f;
442 443 444 445
        config.class_prob_cutoff = thresh;

        config.nms_iou_threshold = nmsThreshold;

446
        config.new_coords = (new_coords == 1);
447 448 449 450
        return make_cuda_node<cuda4dnn::RegionOp>(preferableTarget, std::move(context->stream), blobs[0], config);
    }
#endif

451
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
452
                           const std::vector<MatShape> &outputs) const CV_OVERRIDE
453
    {
H
Hamdi Sahloul 已提交
454
        CV_UNUSED(outputs); // suppress unused variable warning
455 456 457 458 459 460 461 462

        int64 flops = 0;
        for(int i = 0; i < inputs.size(); i++)
        {
            flops += 60*total(inputs[i]);
        }
        return flops;
    }
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494

#ifdef HAVE_DNN_NGRAPH
    virtual Ptr<BackendNode> initNgraph(const std::vector<Ptr<BackendWrapper> > &inputs,
                                        const std::vector<Ptr<BackendNode> >& nodes) CV_OVERRIDE
    {
        auto& input = nodes[0].dynamicCast<InfEngineNgraphNode>()->node;
        auto parent_shape = input->get_shape();
        int64_t b = parent_shape[0];
        int64_t h = parent_shape[1];
        int64_t w = parent_shape[2];
        int64_t c = parent_shape[3];

        int64_t cols = b * h * w * anchors;
        int64_t rows = c / anchors;
        auto shape_node = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2},  std::vector<int64_t>{cols, rows});
        auto tr_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 0});

        std::shared_ptr<ngraph::Node> input2d;
        {
            input2d = std::make_shared<ngraph::op::v1::Reshape>(input, shape_node, true);
            input2d = std::make_shared<ngraph::op::Transpose>(input2d, tr_axes);
        }

        std::shared_ptr<ngraph::Node> region;
        {
            auto new_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
            auto tr_input = std::make_shared<ngraph::op::Transpose>(input, new_axes);

            std::vector<float> anchors_vec(blobs[0].ptr<float>(), blobs[0].ptr<float>() + blobs[0].total());
            std::vector<int64_t> mask(anchors, 1);
            region = std::make_shared<ngraph::op::RegionYolo>(tr_input, coords, classes, anchors, useSoftmax, mask, 1, 3, anchors_vec);

495
            auto tr_shape = tr_input->get_shape();
496
            auto shape_as_inp = std::make_shared<ngraph::op::Constant>(ngraph::element::i64,
497 498
                                                                       ngraph::Shape{tr_shape.size()},
                                                                       std::vector<int64_t>(tr_shape.begin(), tr_shape.end()));
499 500 501 502 503 504 505 506 507 508 509 510 511 512

            region = std::make_shared<ngraph::op::v1::Reshape>(region, shape_as_inp, true);
            new_axes = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 2, 3, 1});
            region = std::make_shared<ngraph::op::Transpose>(region, new_axes);

            region = std::make_shared<ngraph::op::v1::Reshape>(region, shape_node, true);
            region = std::make_shared<ngraph::op::Transpose>(region, tr_axes);
        }

        auto strides = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, 1});
        std::vector<int64_t> boxes_shape{b, anchors, h, w};
        auto shape_3d = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{boxes_shape.size()}, boxes_shape.data());

        ngraph::Shape box_broad_shape{1, (size_t)anchors, (size_t)h, (size_t)w};
513 514
        auto scale_x_y_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &scale_x_y);
        auto shift_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{0.5});
515

516 517 518
        auto axis = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{}, {0});
        auto splits = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{5}, {1, 1, 1, 1, rows - 4});
        auto split = std::make_shared<ngraph::op::v1::VariadicSplit>(input2d, axis, splits);
519 520
        std::shared_ptr<ngraph::Node> box_x;
        {
521
            box_x = std::make_shared<ngraph::op::Sigmoid>(split->output(0));
522 523 524
            box_x = std::make_shared<ngraph::op::v1::Subtract>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
            box_x = std::make_shared<ngraph::op::v1::Multiply>(box_x, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
            box_x = std::make_shared<ngraph::op::v1::Add>(box_x, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
525 526 527 528
            box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, shape_3d, true);

            std::vector<float> x_indices(w * h * anchors);
            auto begin = x_indices.begin();
529
            for (int i = 0; i < w; i++)
530 531 532 533
            {
                std::fill(begin + i * anchors, begin + (i + 1) * anchors, i);
            }

534
            for (int j = 1; j < h; j++)
535
            {
536
                std::copy(begin, begin + w * anchors, begin + j * w * anchors);
537 538 539 540 541 542 543 544 545 546
            }
            auto horiz = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, x_indices.data());
            box_x = std::make_shared<ngraph::op::v1::Add>(box_x, horiz, ngraph::op::AutoBroadcastType::NUMPY);

            auto cols_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{float(w)});
            box_x = std::make_shared<ngraph::op::v1::Divide>(box_x, cols_node, ngraph::op::AutoBroadcastType::NUMPY);
        }

        std::shared_ptr<ngraph::Node> box_y;
        {
547
            box_y = std::make_shared<ngraph::op::Sigmoid>(split->output(1));
548 549 550
            box_y = std::make_shared<ngraph::op::v1::Subtract>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
            box_y = std::make_shared<ngraph::op::v1::Multiply>(box_y, scale_x_y_node, ngraph::op::AutoBroadcastType::NUMPY);
            box_y = std::make_shared<ngraph::op::v1::Add>(box_y, shift_node, ngraph::op::AutoBroadcastType::NUMPY);
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
            box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, shape_3d, true);

            std::vector<float> y_indices(h * anchors);
            for (int i = 0; i < h; i++)
            {
                std::fill(y_indices.begin() + i * anchors, y_indices.begin() + (i + 1) * anchors, i);
            }

            auto vert = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, (size_t)anchors, (size_t)h, 1}, y_indices.data());
            box_y = std::make_shared<ngraph::op::v1::Add>(box_y, vert, ngraph::op::AutoBroadcastType::NUMPY);
            auto rows_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{float(h)});
            box_y = std::make_shared<ngraph::op::v1::Divide>(box_y, rows_node, ngraph::op::AutoBroadcastType::NUMPY);
        }

        std::shared_ptr<ngraph::Node> box_w, box_h;
        {
            int hNorm, wNorm;
            if (nodes.size() > 1)
            {
                auto node_1_shape = nodes[1].dynamicCast<InfEngineNgraphNode>()->node->get_shape();
                hNorm = node_1_shape[2];
                wNorm = node_1_shape[3];
            }
            else
            {
                hNorm = h;
                wNorm = w;
            }

            std::vector<float> anchors_w(anchors), anchors_h(anchors);
            for (size_t a = 0; a < anchors; ++a)
            {
                anchors_w[a] = blobs[0].at<float>(0, 2 * a) / wNorm;
                anchors_h[a] = blobs[0].at<float>(0, 2 * a + 1) / hNorm;
            }

            std::vector<float> bias_w(w * h * anchors), bias_h(w * h * anchors);
            for (int j = 0; j < h; j++)
            {
                std::copy(anchors_w.begin(), anchors_w.end(), bias_w.begin() + j * anchors);
                std::copy(anchors_h.begin(), anchors_h.end(), bias_h.begin() + j * anchors);
            }

            for (int i = 1; i < w; i++)
            {
                std::copy(bias_w.begin(), bias_w.begin() + h * anchors, bias_w.begin() + i * h * anchors);
                std::copy(bias_h.begin(), bias_h.begin() + h * anchors, bias_h.begin() + i * h * anchors);
            }

600
            box_w = std::make_shared<ngraph::op::v0::Exp>(split->output(2));
601 602 603 604
            box_w = std::make_shared<ngraph::op::v1::Reshape>(box_w, shape_3d, true);
            auto anchor_w_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, bias_w.data());
            box_w = std::make_shared<ngraph::op::v1::Multiply>(box_w, anchor_w_node, ngraph::op::AutoBroadcastType::NUMPY);

605
            box_h = std::make_shared<ngraph::op::v0::Exp>(split->output(3));
606 607 608 609 610
            box_h = std::make_shared<ngraph::op::v1::Reshape>(box_h, shape_3d, true);
            auto anchor_h_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, box_broad_shape, bias_h.data());
            box_h = std::make_shared<ngraph::op::v1::Multiply>(box_h, anchor_h_node, ngraph::op::AutoBroadcastType::NUMPY);
        }

611 612 613
        auto region_splits = ngraph::op::Constant::create<int64_t>(ngraph::element::i64, ngraph::Shape{3}, {4, 1, rows - 5});
        auto region_split = std::make_shared<ngraph::op::v1::VariadicSplit>(region, axis, region_splits);

614 615
        std::shared_ptr<ngraph::Node> scale;
        {
616 617 618 619 620
            float thr = classfix == -1 ? 0.5 : 0;
            auto thresh_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, std::vector<float>{thr});
            auto mask = std::make_shared<ngraph::op::v1::Less>(region_split->output(1), thresh_node);
            auto zero_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, mask->get_shape(), std::vector<float>(cols, 0));
            scale = std::make_shared<ngraph::op::v1::Select>(mask, zero_node, region_split->output(1));
621 622 623 624
        }

        std::shared_ptr<ngraph::Node> probs;
        {
625
            probs = std::make_shared<ngraph::op::v1::Multiply>(region_split->output(2), scale, ngraph::op::AutoBroadcastType::NUMPY);
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
            auto thresh_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1}, &thresh);
            auto mask = std::make_shared<ngraph::op::v1::Greater>(probs, thresh_node);
            auto zero_node = std::make_shared<ngraph::op::Constant>(ngraph::element::f32, mask->get_shape(), std::vector<float>((rows - 5) * cols, 0));
            probs = std::make_shared<ngraph::op::v1::Select>(mask, probs, zero_node);
        }


        auto concat_shape = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{1, cols});
        box_x = std::make_shared<ngraph::op::v1::Reshape>(box_x, concat_shape, true);
        box_y = std::make_shared<ngraph::op::v1::Reshape>(box_y, concat_shape, true);
        box_w = std::make_shared<ngraph::op::v1::Reshape>(box_w, concat_shape, true);
        box_h = std::make_shared<ngraph::op::v1::Reshape>(box_h, concat_shape, true);

        ngraph::NodeVector inp_nodes{box_x, box_y, box_w, box_h, scale, probs};
        std::shared_ptr<ngraph::Node> result = std::make_shared<ngraph::op::Concat>(inp_nodes, 0);
        result = std::make_shared<ngraph::op::Transpose>(result, tr_axes);
        if (b > 1)
        {
644
            std::vector<int64_t> sizes{b, static_cast<int64_t>(result->get_shape()[0]) / b, static_cast<int64_t>(result->get_shape()[1])};
645 646 647 648 649 650 651 652
            auto shape_node = std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{sizes.size()}, sizes.data());
            result = std::make_shared<ngraph::op::v1::Reshape>(result, shape_node, true);
        }

        return Ptr<BackendNode>(new InfEngineNgraphNode(result));
    }
#endif  // HAVE_DNN_NGRAPH

653 654 655 656 657 658 659 660 661
};

Ptr<RegionLayer> RegionLayer::create(const LayerParams& params)
{
    return Ptr<RegionLayer>(new RegionLayerImpl(params));
}

}  // namespace dnn
}  // namespace cv