onnx_importer.cpp 73.6 KB
Newer Older
1 2 3 4 5 6 7 8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.

// Copyright (C) 2018, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.

#include "../precomp.hpp"
9
#include <opencv2/dnn/shape_utils.hpp>
10

11 12 13 14 15
#include <opencv2/core/utils/logger.defines.hpp>
#undef CV_LOG_STRIP_LEVEL
#define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1
#include <opencv2/core/utils/logger.hpp>

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
#ifdef HAVE_PROTOBUF

#include <iostream>
#include <fstream>
#include <string>
#include <limits>
#include <algorithm>


#if defined(__GNUC__) && __GNUC__ >= 5
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
#include "opencv-onnx.pb.h"
#if defined(__GNUC__) && __GNUC__ >= 5
#pragma GCC diagnostic pop
#endif

D
Dmitry Kurtaev 已提交
34 35
#include "onnx_graph_simplifier.hpp"

36 37 38 39 40 41 42 43 44 45 46
namespace cv {
namespace dnn {
CV__DNN_EXPERIMENTAL_NS_BEGIN


class ONNXImporter
{
    opencv_onnx::ModelProto model_proto;
    struct LayerInfo {
        int layerId;
        int outputId;
47
        LayerInfo(int _layerId = 0, int _outputId = 0) : layerId(_layerId), outputId(_outputId) {}
48 49 50 51
    };

    std::map<std::string, Mat> getGraphTensors(
                                    const opencv_onnx::GraphProto& graph_proto);
52 53
    Mat getBlob(const opencv_onnx::NodeProto& node_proto, int index);
    Mat getBlob(const std::string& input_name);
54 55 56 57

    LayerParams getLayerParams(const opencv_onnx::NodeProto& node_proto);
    bool isCeilMode(const LayerParams& layerParams);

58 59 60
    void addConstant(const std::string& name, const Mat& blob);
    void addLayer(LayerParams& layerParams,
                  const opencv_onnx::NodeProto& node_proto);
61

62 63
public:

64 65
    ONNXImporter(Net& net, const char *onnxFile)
        : dstNet(net)
66
    {
67 68 69
        CV_Assert(onnxFile);
        CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile);

70
        std::fstream input(onnxFile, std::ios::in | std::ios::binary);
71 72 73 74
        if (!input)
        {
            CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile));
        }
75 76

        if (!model_proto.ParseFromIstream(&input))
77 78 79 80 81
        {
            CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile));
        }

        populateNet();
82 83
    }

84 85
    ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer)
        : dstNet(net)
86
    {
87 88
        CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)");

89 90 91 92 93 94 95 96 97 98 99 100 101 102
        struct _Buf : public std::streambuf
        {
            _Buf(const char* buffer, size_t sizeBuffer)
            {
                char* p = const_cast<char*>(buffer);
                setg(p, p, p + sizeBuffer);
            }
        };

        _Buf buf(buffer, sizeBuffer);
        std::istream input(&buf);

        if (!model_proto.ParseFromIstream(&input))
            CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array.");
103 104

        populateNet();
105 106
    }

107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
    void populateNet();

protected:
    Net& dstNet;

    opencv_onnx::GraphProto graph_proto;
    std::string framework_name;

    std::map<std::string, Mat> constBlobs;

    std::map<std::string, MatShape> outShapes;  // List of internal blobs shapes.
    typedef std::map<std::string, MatShape>::iterator IterShape_t;

    std::map<std::string, LayerInfo> layer_id;
    typedef std::map<std::string, LayerInfo>::iterator IterLayerId_t;

    void handleNode(const opencv_onnx::NodeProto& node_proto);
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
};

inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey)
{
    if (layerParams.has(oldKey)) {
        layerParams.set(newKey, layerParams.get(oldKey));
        layerParams.erase(oldKey);
    }
}

void releaseONNXTensor(opencv_onnx::TensorProto& tensor_proto)
{
    if (!tensor_proto.raw_data().empty()) {
        delete tensor_proto.release_raw_data();
    }
}

141
void runLayer(LayerParams& params, const std::vector<Mat>& inputs,
142 143
              std::vector<Mat>& outputs)
{
144
    Ptr<Layer> layer = LayerFactory::createLayerInstance(params.type, params);
A
Alexander Alekhin 已提交
145 146
    CV_Assert((bool)layer);

147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
    std::vector<MatShape> inpShapes(inputs.size());
    int ddepth = CV_32F;
    for (size_t i = 0; i < inputs.size(); ++i)
    {
        inpShapes[i] = shape(inputs[i]);
        if (i > 0 && ddepth != inputs[i].depth())
            CV_Error(Error::StsNotImplemented, "Mixed input data types.");
        ddepth = inputs[i].depth();
    }

    std::vector<MatShape> outShapes, internalShapes;
    layer->getMemoryShapes(inpShapes, 0, outShapes, internalShapes);

    std::vector<Mat> internals(internalShapes.size());
    outputs.resize(outShapes.size());
    for (size_t i = 0; i < outShapes.size(); ++i)
        outputs[i].create(outShapes[i], ddepth);
    for (size_t i = 0; i < internalShapes.size(); ++i)
        internals[i].create(internalShapes[i], ddepth);

    layer->finalize(inputs, outputs);
    layer->forward(inputs, outputs, internals);
}

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
std::map<std::string, Mat> ONNXImporter::getGraphTensors(
                                        const opencv_onnx::GraphProto& graph_proto)
{
  opencv_onnx::TensorProto tensor_proto;
  std::map<std::string, Mat> layers_weights;

  for (int i = 0; i < graph_proto.initializer_size(); i++)
  {
    tensor_proto = graph_proto.initializer(i);
    Mat mat = getMatFromTensor(tensor_proto);
    releaseONNXTensor(tensor_proto);
    layers_weights.insert(std::make_pair(tensor_proto.name(), mat));
  }
  return layers_weights;
}

187 188 189 190 191 192
static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protobuf::int64>& src) {
    std::vector<int32_t> dst(src.size());
    convertInt64ToInt32(src, dst, src.size());
    return DictValue::arrayInt(&dst[0], src.size());
}

193 194 195 196 197 198 199 200 201 202
LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto)
{
    LayerParams lp;
    for(int i = 0; i < node_proto.attribute_size(); i++)
    {
        opencv_onnx::AttributeProto attribute_proto = node_proto.attribute(i);
        std::string attribute_name = attribute_proto.name();

        if(attribute_name == "kernel_shape")
        {
203 204
            CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
            lp.set("kernel_size", parse(attribute_proto.ints()));
205 206 207
        }
        else if(attribute_name == "strides")
        {
208 209
            CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
            lp.set("stride", parse(attribute_proto.ints()));
210 211 212
        }
        else if(attribute_name == "pads")
        {
D
Dmitry Kurtaev 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
            if (node_proto.op_type() == "Pad")
            {
                // Padding layer.
                // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
                // We need to shuffle it to begin0, end0, begin1, end1, ...
                CV_Assert(attribute_proto.ints_size() % 2 == 0);
                const int dims = attribute_proto.ints_size() / 2;
                std::vector<int32_t> paddings;
                paddings.reserve(attribute_proto.ints_size());
                for (int i = 0; i < dims; ++i)
                {
                    paddings.push_back(attribute_proto.ints(i));
                    paddings.push_back(attribute_proto.ints(dims + i));
                }
                lp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size()));
            }
            else
            {
                // Convolution or pooling.
232 233
                CV_Assert(attribute_proto.ints_size() == 4 || attribute_proto.ints_size() == 6);
                lp.set("pad", parse(attribute_proto.ints()));
D
Dmitry Kurtaev 已提交
234
            }
235 236 237 238 239 240 241 242 243 244 245 246
        }
        else if(attribute_name == "auto_pad")
        {
            if (attribute_proto.s() == "SAME_UPPER" || attribute_proto.s() == "SAME_LOWER") {
                lp.set("pad_mode",  "SAME");
            }
            else if (attribute_proto.s() == "VALID") {
                lp.set("pad_mode", "VALID");
            }
        }
        else if(attribute_name == "dilations")
        {
247 248
            CV_Assert(attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3);
            lp.set("dilation", parse(attribute_proto.ints()));
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
        }
        else if (attribute_proto.has_i())
        {
            ::google::protobuf::int64 src = attribute_proto.i();
            if (src < std::numeric_limits<int32_t>::min() || src > std::numeric_limits<int32_t>::max())
                CV_Error(Error::StsOutOfRange, "Input is out of OpenCV 32S range");
            else
                lp.set(attribute_name, saturate_cast<int32_t>(src));
        }
        else if (attribute_proto.has_f())
        {
            lp.set(attribute_name, attribute_proto.f());
        }
        else if (attribute_proto.has_s())
        {
            lp.set(attribute_name, attribute_proto.s());
        }
        else if (attribute_proto.floats_size() > 0)
        {
            lp.set(attribute_name, DictValue::arrayReal(
D
Dmitry Kurtaev 已提交
269
                attribute_proto.floats().data(), attribute_proto.floats_size()));
270 271 272
        }
        else if (attribute_proto.ints_size() > 0)
        {
273
            lp.set(attribute_name, parse(attribute_proto.ints()));
274 275 276 277 278 279 280
        }
        else if (attribute_proto.has_t())
        {
            opencv_onnx::TensorProto tensor = attribute_proto.t();
            Mat blob = getMatFromTensor(tensor);
            lp.blobs.push_back(blob);
        }
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
        else if (attribute_proto.has_g())
        {
            CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: 'Graph' is not supported", attribute_name.c_str()));
        }
        else if (attribute_proto.graphs_size() > 0)
        {
            CV_Error(Error::StsNotImplemented,
                    cv::format("DNN/ONNX/Attribute[%s]: 'Graphs' (%d) in attributes is not supported",
                            attribute_name.c_str(), attribute_proto.graphs_size())
            );
        }
        else if (attribute_proto.strings_size() > 0)
        {
            std::string msg = cv::format("DNN/ONNX/Attribute[%s]: 'Strings' (%d) are not supported",
                    attribute_name.c_str(), attribute_proto.strings_size());
            CV_LOG_ERROR(NULL, msg);
            for (int i = 0; i < attribute_proto.strings_size(); i++)
            {
                CV_LOG_ERROR(NULL, "    Attribute[" << attribute_name << "].string(" << i << ") = '" << attribute_proto.strings(i) << "'");
            }
            CV_Error(Error::StsNotImplemented, msg);
        }
        else if (attribute_proto.tensors_size() > 0)
304
        {
305 306 307 308
            CV_Error(Error::StsNotImplemented,
                    cv::format("DNN/ONNX/Attribute[%s]: 'Tensors' (%d) in attributes are not supported",
                            attribute_name.c_str(), attribute_proto.tensors_size())
            );
309 310
        }
        else
311 312 313
        {
            CV_Error(Error::StsNotImplemented, cv::format("DNN/ONNX/Attribute[%s]: unsupported attribute format", attribute_name.c_str()));
        }
314 315 316 317
    }
    return lp;
}

318
Mat ONNXImporter::getBlob(const opencv_onnx::NodeProto& node_proto, int index)
319 320
{
    CV_Assert(index < node_proto.input_size());
321 322 323 324 325 326 327 328 329 330
    const std::string& input_name = node_proto.input(index);
    return getBlob(input_name);
}

Mat ONNXImporter::getBlob(const std::string& input_name)
{
    std::map<std::string, Mat>::const_iterator constBlob = constBlobs.find(input_name);
    if (constBlob == constBlobs.end())
    {
        CV_Error(Error::StsBadArg, std::string("Blob ") + input_name + " not found in const blobs");
331 332 333 334
    }
    return constBlob->second;
}

335 336
void ONNXImporter::addLayer(LayerParams& layerParams,
                            const opencv_onnx::NodeProto& node_proto)
337 338 339 340 341 342 343 344 345
{
    int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams);
    for (int i = 0; i < node_proto.output_size(); ++i)
    {
        layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i)));
    }

    std::vector<MatShape> layerInpShapes, layerOutShapes, layerInternalShapes;
    int inpNum = 0;
346 347 348 349
    for (int j = 0; j < node_proto.input_size(); j++)
    {
        const std::string& input_name = node_proto.input(j);
        IterLayerId_t layerId = layer_id.find(input_name);
350 351 352 353
        if (layerId != layer_id.end()) {
            dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum);
            ++inpNum;
            // Collect input shapes.
354
            IterShape_t shapeIt = outShapes.find(input_name);
355 356 357 358 359
            CV_Assert(shapeIt != outShapes.end());
            layerInpShapes.push_back(shapeIt->second);
        }
    }
    // Compute shape of output blob for this layer.
360
    Ptr<Layer> layer = dstNet.getLayer(id);  // FIXIT: avoid instantiation of layers during the import stage
361 362 363 364 365 366 367
    layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes);
    for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i)
    {
        outShapes[node_proto.output(i)] = layerOutShapes[i];
    }
}

368
void ONNXImporter::addConstant(const std::string& name, const Mat& blob)
369 370 371 372 373
{
    constBlobs.insert(std::make_pair(name, blob));
    outShapes.insert(std::make_pair(name, shape(blob)));
}

374
void ONNXImporter::populateNet()
375 376
{
    CV_Assert(model_proto.has_graph());
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
    graph_proto = model_proto.graph();

    std::string framework_version;
    if (model_proto.has_producer_name())
        framework_name = model_proto.producer_name();
    if (model_proto.has_producer_version())
        framework_version = model_proto.producer_version();

    CV_LOG_INFO(NULL, "DNN/ONNX: loading ONNX"
            << (model_proto.has_ir_version() ? cv::format(" v%d", (int)model_proto.ir_version()) : cv::String())
            << " model produced by '" << framework_name << "'"
            << (framework_version.empty() ? cv::String() : cv::format(":%s", framework_version.c_str()))
            << ". Number of nodes = " << graph_proto.node_size()
            << ", inputs = " << graph_proto.input_size()
            << ", outputs = " << graph_proto.output_size()
            );
D
Dmitry Kurtaev 已提交
393 394 395

    simplifySubgraphs(graph_proto);

396 397 398 399
    const int layersSize = graph_proto.node_size();
    CV_LOG_DEBUG(NULL, "DNN/ONNX: graph simplified to " << layersSize << " nodes");

    constBlobs = getGraphTensors(graph_proto);
400 401 402
    // Add all the inputs shapes. It includes as constant blobs as network's inputs shapes.
    for (int i = 0; i < graph_proto.input_size(); ++i)
    {
403 404
        const opencv_onnx::ValueInfoProto& valueInfoProto = graph_proto.input(i);
        CV_Assert(valueInfoProto.has_name());
405 406 407 408 409 410 411 412 413 414 415 416
        CV_Assert(valueInfoProto.has_type());
        opencv_onnx::TypeProto typeProto = valueInfoProto.type();
        CV_Assert(typeProto.has_tensor_type());
        opencv_onnx::TypeProto::Tensor tensor = typeProto.tensor_type();
        CV_Assert(tensor.has_shape());
        opencv_onnx::TensorShapeProto tensorShape = tensor.shape();

        MatShape inpShape(tensorShape.dim_size());
        for (int j = 0; j < inpShape.size(); ++j)
        {
            inpShape[j] = tensorShape.dim(j).dim_value();
        }
L
Liubov Batanina 已提交
417 418 419 420
        if (!inpShape.empty())
        {
            inpShape[0] = std::max(inpShape[0], 1); // It's OK to have undetermined batch size
        }
421 422
        outShapes[valueInfoProto.name()] = inpShape;
    }
423 424 425 426 427 428 429 430 431 432 433 434 435 436

    // create map with network inputs (without const blobs)
    // fill map: push layer name, layer id and output id
    std::vector<String> netInputs;
    for (int j = 0; j < graph_proto.input_size(); j++)
    {
        const std::string& name = graph_proto.input(j).name();
        if (constBlobs.find(name) == constBlobs.end()) {
            netInputs.push_back(name);
            layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1)));
        }
    }
    dstNet.setInputsNames(netInputs);

437
    for(int li = 0; li < layersSize; li++)
438
    {
439 440 441
        const opencv_onnx::NodeProto& node_proto = graph_proto.node(li);
        handleNode(node_proto);
    }
442

443 444 445 446 447 448
    CV_LOG_DEBUG(NULL, "DNN/ONNX: import completed!");
}

void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_)
{
    opencv_onnx::NodeProto node_proto = node_proto_;  // TODO FIXIT
D
Dmitry Kurtaev 已提交
449

450 451 452 453 454 455 456 457 458 459 460 461 462 463
    CV_Assert(node_proto.output_size() >= 1);
    std::string name = node_proto.output(0);
    std::string layer_type = node_proto.op_type();
    CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
            << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
    );

    try
    {
        // FIXIT not all cases can be repacked into "LayerParams". Importer should handle such cases directly for each "layer_type"
        LayerParams layerParams = getLayerParams(node_proto);

        layerParams.name = name;
        layerParams.type = layer_type;
464

465 466 467 468
        if (layer_type == "MaxPool")
        {
            layerParams.type = "Pooling";
            layerParams.set("pool", "MAX");
469
            layerParams.set("ceil_mode", layerParams.has("pad_mode"));
470 471 472 473 474
        }
        else if (layer_type == "AveragePool")
        {
            layerParams.type = "Pooling";
            layerParams.set("pool", "AVE");
475
            layerParams.set("ceil_mode", layerParams.has("pad_mode"));
476 477
            layerParams.set("ave_pool_padded_area", framework_name == "pytorch");
        }
478
        else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" ||
L
Liubov Batanina 已提交
479
                layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")
480
        {
481
            CV_Assert(node_proto.input_size() == 1);
482
            layerParams.type = "Pooling";
483
            String pool;
L
Liubov Batanina 已提交
484
            if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax")
485 486 487 488 489 490
                pool = "MAX";
            else if (layer_type == "ReduceSum")
                pool = "SUM";
            else
                pool = "AVE";
            layerParams.set("pool", pool);
L
Liubov Batanina 已提交
491 492
            layerParams.set("global_pooling", !layerParams.has("axes"));
            if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
493 494 495
            {
                MatShape inpShape = outShapes[node_proto.input(0)];
                DictValue axes = layerParams.get("axes");
496 497 498 499 500 501 502 503 504 505 506
                bool keepdims = layerParams.get<int>("keepdims");
                MatShape targetShape = inpShape;
                for (int i = 0; i < axes.size(); i++) {
                    int axis = clamp(axes.get<int>(i), inpShape.size());
                    if (keepdims) {
                        targetShape[axis] = 1;
                    } else {
                        targetShape.erase(targetShape.begin() + axis);
                    }
                }

507 508
                if (inpShape.size() == 3 && axes.size() <= 2)
                {
509
                    int axis = clamp(axes.get<int>(0), inpShape.size());
510 511 512 513 514 515 516 517 518 519 520 521 522 523
                    CV_CheckNE(axis, 0, "");

                    LayerParams reshapeLp;
                    reshapeLp.name = layerParams.name + "/reshape";
                    reshapeLp.type = "Reshape";
                    CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
                    reshapeLp.set("axis", 0);
                    reshapeLp.set("num_axes", 1);
                    int newShape[] = {1, -1};
                    reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2));

                    opencv_onnx::NodeProto proto;
                    proto.add_input(node_proto.input(0));
                    proto.add_output(reshapeLp.name);
524
                    addLayer(reshapeLp, proto);
525 526 527 528 529

                    LayerParams avgLp;
                    avgLp.name = layerParams.name + "/avg";
                    avgLp.type = "Pooling";
                    CV_Assert(layer_id.find(avgLp.name) == layer_id.end());
530
                    avgLp.set("pool", pool);
531 532
                    if (axes.size() == 2)
                    {
533 534
                        CV_CheckEQ(clamp(axes.get<int>(0), inpShape.size()), 1, "Unsupported mode");
                        CV_CheckEQ(clamp(axes.get<int>(1), inpShape.size()), 2, "Unsupported mode");
535 536 537 538 539 540 541 542 543 544
                        avgLp.set("global_pooling", true);
                    }
                    else
                    {
                        avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true);
                        avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1);
                    }

                    node_proto.set_input(0, reshapeLp.name);
                    node_proto.set_output(0, avgLp.name);
545
                    addLayer(avgLp, node_proto);
546
                }
547 548 549
                else
                {
                    if (inpShape.size() != 4 && inpShape.size() != 5)
550
                        CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation.");
551

552 553 554
                    CV_Assert(axes.size() <= inpShape.size() - 2);
                    std::vector<int> kernel_size(inpShape.size() - 2, 1);
                    for (int i = 0; i < axes.size(); i++) {
555
                        int axis = clamp(axes.get<int>(i), inpShape.size());
556 557 558
                        CV_Assert_N(axis >= 2 + i, axis < inpShape.size());
                        kernel_size[axis - 2] = inpShape[axis];
                    }
559 560 561 562 563 564
                    LayerParams poolLp = layerParams;
                    poolLp.name = layerParams.name + "/avg";
                    CV_Assert(layer_id.find(poolLp.name) == layer_id.end());
                    poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size()));

                    node_proto.set_output(0, poolLp.name);
565
                    addLayer(poolLp, node_proto);
566
                }
567 568 569 570

                layerParams.type = "Reshape";
                layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size()));

L
Liubov Batanina 已提交
571 572 573 574 575
                node_proto.set_input(0, node_proto.output(0));
                node_proto.set_output(0, layerParams.name);
            }
            else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax"))
            {
576
                CV_CheckEQ(layerParams.get<int>("keepdims"), 0, "layer only supports keepdims = false");
L
Liubov Batanina 已提交
577 578 579 580 581 582 583 584 585 586
                LayerParams reshapeLp;
                reshapeLp.name = layerParams.name + "/reshape";
                reshapeLp.type = "Reshape";
                CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
                int newShape[] = {1, 1, 1, -1};
                reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4));

                opencv_onnx::NodeProto proto;
                proto.add_input(node_proto.input(0));
                proto.add_output(reshapeLp.name);
587
                addLayer(reshapeLp, proto);
L
Liubov Batanina 已提交
588 589 590 591 592 593 594

                LayerParams poolLp = layerParams;
                poolLp.name = layerParams.name + "/pool";
                CV_Assert(layer_id.find(poolLp.name) == layer_id.end());

                node_proto.set_input(0, reshapeLp.name);
                node_proto.set_output(0, poolLp.name);
595
                addLayer(poolLp, node_proto);
L
Liubov Batanina 已提交
596 597 598 599 600

                layerParams.type = "Reshape";
                int targetShape[] = {1};
                layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1));

601 602
                node_proto.set_input(0, node_proto.output(0));
                node_proto.set_output(0, layerParams.name);
603
            }
604
        }
605 606
        else if (layer_type == "Slice")
        {
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
            int axis = 0;
            std::vector<int> begin;
            std::vector<int> end;
            int inp_size = node_proto.input_size();

            if (inp_size == 1)
            {
                if (layerParams.has("steps"))
                {
                    DictValue steps = layerParams.get("steps");
                    for (int i = 0; i < steps.size(); ++i)
                    {
                        if (steps.get<int>(i) != 1)
                            CV_Error(Error::StsNotImplemented,
                                "Slice layer only supports steps = 1");
                    }
                }
                if (layerParams.has("axes")) {
                    DictValue axes = layerParams.get("axes");
                    for (int i = 1; i < axes.size(); ++i) {
                        CV_Assert(axes.get<int>(i - 1) == axes.get<int>(i) - 1);
                    }
                    axis = axes.get<int>(0);
630 631
                }

632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
                DictValue starts = layerParams.get("starts");
                DictValue ends = layerParams.get("ends");
                CV_Assert(starts.size() == ends.size());

                if (axis > 0) {
                    begin.resize(axis, 0);
                    end.resize(axis, -1);
                }
                for (int i = 0; i < starts.size(); ++i)
                {
                    begin.push_back(starts.get<int>(i));
                    int finish = ends.get<int>(i);
                    end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
                }
            } else {
                CV_Assert(inp_size >= 3);
                for (int i = 1; i < inp_size; i++) {
                    CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end());
                }
651 652
                Mat start_blob = getBlob(node_proto, 1);
                Mat end_blob   = getBlob(node_proto, 2);
653 654 655
                CV_Assert(start_blob.total() == end_blob.total());

                if (inp_size > 3) {
656
                    Mat axes_blob = getBlob(node_proto, 3);
657 658 659 660 661
                    const int* axes = (int*)axes_blob.data;
                    for (int i = 1; i < axes_blob.total(); ++i) {
                        CV_Assert(axes[i - 1] == axes[i] - 1);
                    }
                    axis = axes[0];
662 663
                }

664 665 666 667 668 669 670 671 672 673 674 675
                const int* starts = start_blob.ptr<int>();
                const int* ends   = end_blob.ptr<int>();
                if (axis > 0) {
                    begin.resize(axis, 0);
                    end.resize(axis, -1);
                }
                std::copy(starts, starts + start_blob.total(), std::back_inserter(begin));
                for (int i = 0; i < end_blob.total(); ++i)
                {
                    int finish = ends[i];
                    end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim
                }
676

677 678
                if (inp_size == 5) {
                    CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end());
679
                    Mat step_blob = getBlob(node_proto, 4);
680 681 682 683 684 685 686 687

                    // Very strange application for Slice op with tensor reversing.
                    // We just workaround it for 2d constants.
                    if (constBlobs.find(node_proto.input(0)) != constBlobs.end() &&
                        axis == 0 &&
                        start_blob.at<int>(0) == -1 && step_blob.at<int>(0) == -1 &&
                        end_blob.at<int>(0) == std::numeric_limits<int32_t>::min())
                    {
688
                        Mat inp = getBlob(node_proto, 0);
689 690 691 692
                        if (inp.dims == 2)
                        {
                            Mat flipped;
                            flip(inp, flipped, 0);
693 694
                            addConstant(layerParams.name, flipped);
                            return;
695 696
                        }
                    }
697 698
                    CV_CheckEQ(countNonZero(step_blob != 1), 0, "Slice layer only supports steps = 1");
                }
699
            }
700 701 702
            layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size()));
            layerParams.set("end", DictValue::arrayInt(&end[0], end.size()));
            layerParams.set("axis", axis);
703

704
            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
705
            {
706
                Mat inp = getBlob(node_proto, 0);
707 708 709 710
                std::vector<Mat> inputs, sliced;
                inputs.push_back(inp);
                runLayer(layerParams, inputs, sliced);
                CV_Assert(sliced.size() == 1);
711 712
                addConstant(layerParams.name, sliced[0]);
                return;
713
            }
714
        }
715 716
        else if (layer_type == "Split")
        {
717 718 719 720 721
            if (layerParams.has("split"))
            {
                DictValue splits = layerParams.get("split");
                const int numSplits = splits.size();
                CV_Assert(numSplits > 1);
722

723 724 725 726 727 728 729 730
                std::vector<int> slicePoints(numSplits - 1, splits.get<int>(0));
                for (int i = 1; i < splits.size() - 1; ++i)
                {
                    slicePoints[i] = slicePoints[i - 1] + splits.get<int>(i - 1);
                }
                layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size()));
            }
            else
731
            {
732
                layerParams.set("num_split", node_proto.output_size());
733 734
            }
            layerParams.type = "Slice";
735
        }
D
Dmitry Kurtaev 已提交
736
        else if (layer_type == "Add" || layer_type == "Sum" || layer_type == "Sub")
737
        {
D
Dmitry Kurtaev 已提交
738 739
            bool isSub = layer_type == "Sub";
            CV_CheckEQ(node_proto.input_size(), 2, "");
740 741 742
            bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end();
            bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end();
            if (is_const_0 && is_const_1)
743
            {
744 745
                Mat blob_0 = getBlob(node_proto, 0);
                Mat blob_1 = getBlob(node_proto, 1);
746 747
                CV_Assert(blob_0.size == blob_1.size);
                Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1);
748 749
                addConstant(layerParams.name, output);
                return;
750 751 752
            }
            else if (is_const_0 || is_const_1)
            {
753
                int const_blob_id = is_const_0 ? 0 : 1;
754
                Mat blob = getBlob(node_proto, const_blob_id);
755 756
                int blob_total = blob.total();
                if (blob_total == 1) {
757
                    layerParams.type = "Power";
D
Dmitry Kurtaev 已提交
758
                    layerParams.set("shift", (isSub ? -1 : 1) * blob.at<float>(0));
759 760
                }
                else {
761 762 763 764 765 766
                    MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)];
                    if (shape(blob) == inpShape)
                    {
                        LayerParams constParams;
                        constParams.name = layerParams.name + "/const";
                        constParams.type = "Const";
S
Shubham Singh 已提交
767
                        constParams.blobs.push_back((isSub ? -1 : 1) * blob);
768 769 770 771 772 773 774 775 776 777 778
                        int id = dstNet.addLayer(constParams.name, constParams.type, constParams);
                        layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0)));
                        outShapes[constParams.name] = shape(blob);

                        layerParams.type = "Eltwise";
                        node_proto.set_input(const_blob_id, constParams.name);
                    }
                    else
                    {
                        layerParams.type = "Scale";
                        layerParams.set("bias_term", true);
L
Liubov Batanina 已提交
779 780 781 782 783 784 785 786 787 788 789
                        int axis = 1;
                        for (int i = 0; i < graph_proto.initializer_size(); i++)
                        {
                            opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i);
                            if (tensor_proto.name() == node_proto.input(const_blob_id))
                            {
                                axis = inpShape.size() - tensor_proto.dims_size();
                                break;
                            }
                        }
                        layerParams.set("axis", axis);
790 791 792
                        blob = blob.reshape(1, 1);
                        layerParams.blobs.push_back((isSub ? -1 : 1) * blob);
                    }
793 794
                }
            }
D
Dmitry Kurtaev 已提交
795 796
            else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
            {
797
                layerParams.type = "Eltwise";
D
Dmitry Kurtaev 已提交
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
                if (isSub)
                {
                    static float subCoeffs[] = {1.f, -1.f};
                    layerParams.set("coeff", DictValue::arrayReal<float*>(subCoeffs, 2));
                }
            }
            else
            {
                if (isSub)
                {
                    LayerParams powerParams;
                    powerParams.name = layerParams.name + "/neg";
                    powerParams.type = "Power";
                    powerParams.set("scale", -1);

                    //Create Power layer
                    int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
                    //Connect to input
816
                    IterLayerId_t layerId = layer_id.find(node_proto.input(1));
D
Dmitry Kurtaev 已提交
817 818 819 820 821 822 823 824 825 826 827
                    CV_Assert(layerId != layer_id.end());
                    dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
                    //Add shape
                    layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
                    outShapes[powerParams.name] = outShapes[node_proto.input(1)];

                    //Replace input to Power
                    node_proto.set_input(1, powerParams.name);
                }
                layerParams.type = "Scale";
                layerParams.set("bias_term", true);
828 829
            }
        }
L
Liubov Batanina 已提交
830 831 832 833 834
        else if (layer_type == "Pow")
        {
            if (layer_id.find(node_proto.input(1)) != layer_id.end())
                CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power");

835
            Mat blob = getBlob(node_proto, 1);
L
Liubov Batanina 已提交
836 837 838 839 840 841 842
            if (blob.total() != 1)
                CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power");

            blob.convertTo(blob, CV_32F);
            layerParams.type = "Power";
            layerParams.set("power", blob.at<float>(0));
        }
843 844 845 846 847
        else if (layer_type == "Max")
        {
            layerParams.type = "Eltwise";
            layerParams.set("operation", "max");
        }
848 849 850 851 852
        else if (layer_type == "Neg")
        {
            layerParams.type = "Power";
            layerParams.set("scale", -1);
        }
853 854 855 856
        else if (layer_type == "Constant")
        {
            CV_Assert(node_proto.input_size() == 0);
            CV_Assert(layerParams.blobs.size() == 1);
857 858
            addConstant(layerParams.name, layerParams.blobs[0]);
            return;
859
        }
D
Dmitry Kurtaev 已提交
860 861
        else if (layer_type == "LSTM")
        {
862 863 864
            LayerParams lstmParams = layerParams;
            lstmParams.name += "/lstm";

D
Dmitry Kurtaev 已提交
865
            // https://pytorch.org/docs/stable/nn.html#lstm
D
Dmitry Kurtaev 已提交
866
            CV_Assert(node_proto.input_size() == 7);
867 868 869 870 871
            Mat Wx = getBlob(node_proto, 1);
            Mat Wh = getBlob(node_proto, 2);
            Mat b = getBlob(node_proto, 3);
            CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h");
            CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c");
D
Dmitry Kurtaev 已提交
872
            b = b.reshape(1, b.size[0]);
D
Dmitry Kurtaev 已提交
873

874
            const int numHidden = lstmParams.get<int>("hidden_size");
D
Dmitry Kurtaev 已提交
875 876 877 878 879
            const int numDirs = Wx.size[0];  // Is 1 for forward only and 2 for bidirectional LSTM.
            const int numFeatures = Wx.size[2];
            Mat bx = b.colRange(0, b.cols / 2);
            Mat bh = b.colRange(b.cols / 2, b.cols);
            b = bx + bh;
D
Dmitry Kurtaev 已提交
880

D
Dmitry Kurtaev 已提交
881
            // IFGO->IGFO
D
Dmitry Kurtaev 已提交
882
            for (int k = 0; k < numDirs; ++k)
D
Dmitry Kurtaev 已提交
883
            {
D
Dmitry Kurtaev 已提交
884 885 886 887
                float* WxData = Wx.ptr<float>(k);
                float* WhData = Wh.ptr<float>(k);
                float* biasData = b.ptr<float>(k);
                for (int j = 0; j < numHidden; ++j)
D
Dmitry Kurtaev 已提交
888
                {
D
Dmitry Kurtaev 已提交
889 890 891 892 893 894 895 896 897 898 899
                    for (int i = 0; i < numFeatures; ++i)
                    {
                        std::swap(WxData[(numHidden + j) * numFeatures + i],
                                  WxData[(numHidden * 2 + j) * numFeatures + i]);
                    }
                    for (int i = 0; i < numHidden; ++i)
                    {
                        std::swap(WhData[(numHidden + j) * numHidden + i],
                                  WhData[(numHidden * 2 + j) * numHidden + i]);
                    }
                    std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]);
D
Dmitry Kurtaev 已提交
900 901
                }
            }
D
Dmitry Kurtaev 已提交
902 903
            Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]);
            Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]);
904 905 906 907 908

            lstmParams.blobs.resize(3);
            lstmParams.blobs[0] = Wh;
            lstmParams.blobs[1] = Wx;
            lstmParams.blobs[2] = b;
D
Dmitry Kurtaev 已提交
909
            lstmParams.set("bidirectional", lstmParams.get<String>("direction", "") == "bidirectional");
910 911

            node_proto.set_output(0, lstmParams.name);  // set different name so output shapes will be registered on that name
912
            addLayer(lstmParams, node_proto);
913 914 915 916 917 918 919 920 921 922

            MatShape lstmShape = outShapes[node_proto.output(0)];

            // Add fake 1 as it is done in ONNX
            lstmShape.insert(lstmShape.begin() + 1, 1);

            layerParams.type = "Reshape";
            layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size()));
            node_proto.set_input(0, lstmParams.name);  // redirect input to LSTM
            node_proto.set_output(0, layerParams.name);  // keep origin LSTM's name
D
Dmitry Kurtaev 已提交
923
        }
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
        else if (layer_type == "ImageScaler")
        {
            const float scale = layerParams.has("scale") ? layerParams.get<float>("scale") : 1.0f;
            layerParams.erase("scale");

            if (layerParams.has("bias"))
            {
                layerParams.type = "Scale";
                layerParams.blobs.push_back(
                    Mat(Size(1,  layerParams.get("bias").size()), CV_32FC1, scale));

                layerParams.set("bias_term", true);
                Mat bias(1, layerParams.get("bias").size(), CV_32FC1);
                for (int j = 0; j < bias.total(); j++) {
                    bias.at<float>(0, j) = layerParams.get("bias").getRealValue(j);
                }
                layerParams.blobs.push_back(bias);
                layerParams.erase("bias");
            }
            else {
                layerParams.set("scale", scale);
                layerParams.type = "Power";
            }
        }
948 949 950 951 952 953 954
        else if (layer_type == "Clip")
        {
            layerParams.type = "ReLU6";
            replaceLayerParam(layerParams, "min", "min_value");
            replaceLayerParam(layerParams, "max", "max_value");

        }
955 956 957 958 959
        else if (layer_type == "LeakyRelu")
        {
            layerParams.type = "ReLU";
            replaceLayerParam(layerParams, "alpha", "negative_slope");
        }
D
Dmitry Kurtaev 已提交
960 961 962 963
        else if (layer_type == "Relu")
        {
            layerParams.type = "ReLU";
        }
D
Dmitry Kurtaev 已提交
964 965 966 967
        else if (layer_type == "Elu")
        {
            layerParams.type = "ELU";
        }
L
Liubov Batanina 已提交
968 969 970 971
        else if (layer_type == "Tanh")
        {
            layerParams.type = "TanH";
        }
D
Dmitry Kurtaev 已提交
972 973 974
        else if (layer_type == "PRelu")
        {
            layerParams.type = "PReLU";
975
            layerParams.blobs.push_back(getBlob(node_proto, 1));
D
Dmitry Kurtaev 已提交
976
        }
977 978 979 980
        else if (layer_type == "LRN")
        {
            replaceLayerParam(layerParams, "size", "local_size");
        }
981 982 983 984 985 986 987
        else if (layer_type == "InstanceNormalization")
        {
            if (node_proto.input_size() != 3)
                CV_Error(Error::StsNotImplemented,
                         "Expected input, scale, bias");

            layerParams.blobs.resize(4);
988 989
            layerParams.blobs[2] = getBlob(node_proto, 1);  // weightData
            layerParams.blobs[3] = getBlob(node_proto, 2);  // biasData
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
            layerParams.set("has_bias", true);
            layerParams.set("has_weight", true);

            // Get number of channels in input
            int size = layerParams.blobs[2].total();
            layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean
            layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std

            LayerParams mvnParams;
            mvnParams.name = layerParams.name + "/MVN";
            mvnParams.type = "MVN";
            mvnParams.set("eps", layerParams.get<float>("epsilon"));
            layerParams.erase("epsilon");

            //Create MVN layer
            int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams);
            //Connect to input
1007
            IterLayerId_t layerId = layer_id.find(node_proto.input(0));
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
            CV_Assert(layerId != layer_id.end());
            dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
            //Add shape
            layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0)));
            outShapes[mvnParams.name] = outShapes[node_proto.input(0)];

            //Replace Batch Norm's input to MVN
            node_proto.set_input(0, mvnParams.name);
            layerParams.type = "BatchNorm";
        }
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
        else if (layer_type == "BatchNormalization")
        {
            if (node_proto.input_size() != 5)
                CV_Error(Error::StsNotImplemented,
                         "Expected input, scale, bias, mean and var");

            layerParams.type = "BatchNorm";
            replaceLayerParam(layerParams, "epsilon", "eps");
            replaceLayerParam(layerParams, "spatial", "use_global_stats");

1028 1029
            Mat meanData = getBlob(node_proto, 3);
            Mat stdData =  getBlob(node_proto, 4);
1030 1031 1032 1033 1034 1035

            layerParams.blobs.push_back(meanData);
            layerParams.blobs.push_back(stdData);

            if (!node_proto.input(1).empty()) {
                layerParams.set("has_weight", true);
1036
                layerParams.blobs.push_back(getBlob(node_proto, 1));  // weightData
1037 1038 1039 1040 1041 1042
            } else {
                layerParams.set("has_weight", false);
            }

            if (!node_proto.input(2).empty()) {
                layerParams.set("has_bias", true);
1043
                layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData
1044 1045 1046 1047 1048 1049 1050 1051
            } else {
                layerParams.set("has_bias", false);
            }
        }
        else if (layer_type == "Gemm")
        {
            CV_Assert(node_proto.input_size() >= 2);
            layerParams.type = "InnerProduct";
1052
            Mat weights = getBlob(node_proto, 1);
1053 1054 1055 1056 1057 1058 1059 1060
            int ind_num_out = 0;
            if (layerParams.has("transB") && !layerParams.get<int>("transB")) {
                transpose(weights, weights);
                ind_num_out = 1;
            }
            layerParams.blobs.push_back(weights);

            if (node_proto.input_size() == 3) {
1061
                Mat bias = getBlob(node_proto, 2);
1062 1063
                layerParams.blobs.push_back(bias);
            }
1064 1065
            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
            {
1066
                Mat inputBuf = getBlob(node_proto, 0);
1067 1068 1069 1070 1071 1072 1073 1074

                LayerParams constParams;
                constParams.name = node_proto.input(0);
                constParams.type = "Const";
                constParams.blobs.push_back(inputBuf);

                opencv_onnx::NodeProto proto;
                proto.add_output(constParams.name);
1075
                addLayer(constParams, proto);
1076
            }
1077 1078 1079 1080 1081 1082 1083 1084 1085

            layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]);
            layerParams.set("bias_term", node_proto.input_size() == 3);
        }
        else if (layer_type == "MatMul")
        {
            CV_Assert(node_proto.input_size() == 2);
            layerParams.type = "InnerProduct";
            layerParams.set("bias_term", false);
L
Liubov Batanina 已提交
1086 1087 1088
            CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end());
            int firstInpDims = outShapes[node_proto.input(0)].size();
            int secondInpDims;
1089 1090 1091

            if (constBlobs.find(node_proto.input(1)) != constBlobs.end())
            {
1092
                Mat blob = getBlob(node_proto, 1);
L
Liubov Batanina 已提交
1093
                secondInpDims = blob.dims;
1094 1095
                layerParams.blobs.push_back(blob.t());
                layerParams.set("num_output", layerParams.blobs[0].size[0]);
L
Liubov Batanina 已提交
1096 1097
            } else {
                secondInpDims = outShapes[node_proto.input(1)].size();
1098
            }
L
Liubov Batanina 已提交
1099
            layerParams.set("axis", firstInpDims - secondInpDims + 1);
1100
        }
1101
        else if (layer_type == "Mul" || layer_type == "Div")
1102 1103
        {
            CV_Assert(node_proto.input_size() == 2);
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116

            bool isDiv = layer_type == "Div";
            int constId = -1;
            bool haveVariables = false;
            for (int i = 0; i < 2; ++i)
            {
                if (constBlobs.find(node_proto.input(i)) != constBlobs.end())
                    constId = i;
                else
                    haveVariables = true;
            }
            if (constId != -1 && haveVariables)
            {
1117
                Mat blob = getBlob(node_proto, constId);
1118 1119
                blob = blob.reshape(1, 1);
                if (blob.total() == 1) {
1120 1121
                    float coeff = isDiv ? 1.0 / blob.at<float>(0) : blob.at<float>(0);
                    layerParams.set("scale", coeff);
1122 1123 1124
                    layerParams.type = "Power";
                }
                else {
1125 1126
                    if (isDiv)
                        divide(1.0, blob, blob);
1127 1128 1129 1130
                    layerParams.blobs.push_back(blob);
                    layerParams.type = "Scale";
                }
            }
D
Dmitry Kurtaev 已提交
1131 1132
            else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)])
            {
1133
                layerParams.type = "Eltwise";
1134 1135
                layerParams.set("operation", isDiv ? "div" : "prod");
            }
D
Dmitry Kurtaev 已提交
1136 1137
            else
            {
1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
                // Scale layer allocate output with the first input shape
                if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)]))
                {
                    opencv_onnx::NodeProto proto;
                    proto.add_input(node_proto.input(1));
                    proto.add_input(node_proto.input(0));
                    proto.add_output(layerParams.name);
                    node_proto = proto;
                }

D
Dmitry Kurtaev 已提交
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
                if (isDiv)
                {
                    LayerParams powerParams;
                    powerParams.name = layerParams.name + "/inv";
                    powerParams.type = "Power";
                    powerParams.set("power", -1);

                    //Create Power layer
                    int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams);
                    //Connect to input
1158
                    IterLayerId_t layerId = layer_id.find(node_proto.input(1));
D
Dmitry Kurtaev 已提交
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
                    CV_Assert(layerId != layer_id.end());
                    dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0);
                    //Add shape
                    layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0)));
                    outShapes[powerParams.name] = outShapes[node_proto.input(1)];

                    //Replace input to Power
                    node_proto.set_input(1, powerParams.name);
                }
                layerParams.type = "Scale";
            }
1170 1171 1172

            if (!haveVariables)
            {
1173 1174
                Mat inp0 = getBlob(node_proto, 0);
                Mat inp1 = getBlob(node_proto, 1);
1175
                if (inp0.size != inp1.size && inp1.total() != 1)
1176 1177
                    CV_Error(Error::StsNotImplemented, "Constant multiply with different shapes");

1178
                Mat out = isDiv ? inp0 / inp1 : inp0.mul(inp1);
1179 1180
                out = out.reshape(1, inp0.dims, inp0.size);
                out.dims = inp0.dims;  // to workaround dims == 1
1181 1182
                addConstant(layerParams.name, out);
                return;
1183 1184 1185 1186 1187 1188 1189
            }
        }
        else if (layer_type == "Conv")
        {
            CV_Assert(node_proto.input_size() >= 2);
            layerParams.type = "Convolution";
            for (int j = 1; j < node_proto.input_size(); j++) {
1190 1191
                if (constBlobs.find(node_proto.input(j)) != constBlobs.end())
                {
1192
                    layerParams.blobs.push_back(getBlob(node_proto, j));
1193
                }
1194
            }
1195 1196
            int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0];
            layerParams.set("num_output", outCn);
1197
        }
1198 1199 1200 1201 1202
        else if (layer_type == "ConvTranspose")
        {
            CV_Assert(node_proto.input_size() >= 2);
            layerParams.type = "Deconvolution";
            for (int j = 1; j < node_proto.input_size(); j++) {
1203
                layerParams.blobs.push_back(getBlob(node_proto, j));
1204
            }
A
Ayush Pandey 已提交
1205
            layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get<int>("group", 1));
1206
            layerParams.set("bias_term", node_proto.input_size() == 3);
1207

1208 1209 1210 1211
            if (!layerParams.has("kernel_size"))
                CV_Error(Error::StsNotImplemented,
                         "Required attribute 'kernel_size' is not present.");

1212 1213 1214
            if (layerParams.has("output_shape"))
            {
                const DictValue& outShape = layerParams.get("output_shape");
1215 1216
                DictValue strides = layerParams.get("stride");
                DictValue kernel = layerParams.get("kernel_size");
1217

1218 1219 1220
                String padMode;
                std::vector<int> adjust_pads;
                if (layerParams.has("pad_mode"))
1221
                {
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
                    padMode = toUpperCase(layerParams.get<String>("pad_mode"));
                    if (padMode != "SAME" && padMode != "VALID")
                        CV_Error(Error::StsError, "Unsupported padding mode " + padMode);

                    for (int i = 0; i < strides.size(); i++)
                    {
                        int sz = outShape.get<int>(2 + i);
                        int stride = strides.get<int>(i);
                        adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride :
                                                                 (sz - kernel.get<int>(i)) % stride);
                    }
                    layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size()));
1234 1235
                }
            }
L
Liubov Batanina 已提交
1236 1237
            else if (layerParams.has("output_padding"))
            {
1238
                replaceLayerParam(layerParams, "output_padding", "adj");
L
Liubov Batanina 已提交
1239
            }
1240
        }
1241 1242 1243 1244
        else if (layer_type == "Transpose")
        {
            layerParams.type = "Permute";
            replaceLayerParam(layerParams, "perm", "order");
1245 1246 1247 1248

            CV_Assert(node_proto.input_size() == 1);
            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
            {
1249
                std::vector<Mat> inputs(1, getBlob(node_proto, 0)), transposed;
1250 1251
                runLayer(layerParams, inputs, transposed);
                CV_Assert(transposed.size() == 1);
1252 1253
                addConstant(layerParams.name, transposed[0]);
                return;
1254
            }
1255
        }
1256 1257 1258
        else if (layer_type == "Squeeze")
        {
            CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes"));
D
Dmitry Kurtaev 已提交
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
            DictValue axes_dict = layerParams.get("axes");
            MatShape inpShape = outShapes[node_proto.input(0)];

            std::vector<bool> maskedAxes(inpShape.size(), false);
            for (int i = 0; i < axes_dict.size(); ++i)
            {
                int axis = axes_dict.getIntValue(i);
                CV_CheckLE(axis, static_cast<int>(inpShape.size()), "Squeeze axis");
                maskedAxes[axis] = inpShape[axis] == 1;
            }
            MatShape outShape;
            for (int i = 0; i < inpShape.size(); ++i)
            {
                if (!maskedAxes[i])
                    outShape.push_back(inpShape[i]);
            }
            if (outShape.size() != inpShape.size())
            {
                layerParams.type = "Reshape";
                layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
            }
            else
                layerParams.type = "Identity";
1282 1283 1284

            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
            {
1285
                Mat inp = getBlob(node_proto, 0);
1286 1287
                Mat out = inp.reshape(1, outShape);
                out.dims = outShape.size();  // to workaround dims == 1
1288 1289
                addConstant(layerParams.name, out);
                return;
1290
            }
1291
        }
1292 1293 1294 1295 1296
        else if (layer_type == "Flatten")
        {
            CV_CheckEQ(node_proto.input_size(), 1, "");
            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
            {
1297
                Mat input = getBlob(node_proto, 0);
1298 1299 1300 1301 1302
                int axis = clamp(layerParams.get<int>("axis", 1), input.dims);

                std::vector<int> out_size(&input.size[0], &input.size[0] + axis);
                out_size.push_back(input.total(axis));
                Mat output = input.reshape(1, out_size);
1303 1304
                addConstant(layerParams.name, output);
                return;
1305 1306
            }
        }
1307 1308 1309 1310
        else if (layer_type == "Unsqueeze")
        {
            CV_Assert(node_proto.input_size() == 1);
            DictValue axes = layerParams.get("axes");
1311 1312 1313
            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
            {
                // Constant input.
1314
                Mat input = getBlob(node_proto, 0);
1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325

                std::vector<int> dims;
                for (int j = 0; j < input.dims; j++) {
                    dims.push_back(input.size[j]);
                }
                CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size());
                for (int j = 0; j < axes.size(); j++) {
                    dims.insert(dims.begin() + axes.getIntValue(j), 1);
                }

                Mat out = input.reshape(0, dims);
1326 1327
                addConstant(layerParams.name, out);
                return;
1328 1329
            }

1330 1331 1332 1333
            // Variable input.
            if (axes.size() != 1)
                CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze");

1334 1335 1336 1337 1338
            MatShape inpShape = outShapes[node_proto.input(0)];
            int axis = axes.getIntValue(0);
            CV_Assert(0 <= axis && axis <= inpShape.size());
            std::vector<int> outShape = inpShape;
            outShape.insert(outShape.begin() + axis, 1);
1339
            layerParams.type = "Reshape";
1340
            layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size()));
1341
        }
1342 1343 1344
        else if (layer_type == "Expand")
        {
            CV_CheckEQ(node_proto.input_size(), 2, "");
1345 1346 1347
            const std::string& input0 = node_proto.input(0);
            const std::string& input1 = node_proto.input(1);
            Mat newShapeMat = getBlob(input1);
1348 1349
            MatShape targetShape(newShapeMat.ptr<int>(), newShapeMat.ptr<int>() + newShapeMat.total());

1350
            MatShape inpShape;
1351
            bool haveVariables = constBlobs.find(input0) == constBlobs.end();
1352 1353
            if (haveVariables)
            {
1354
                IterShape_t shapeIt = outShapes.find(input0);
1355 1356 1357 1358 1359
                CV_Assert(shapeIt != outShapes.end());
                inpShape = shapeIt->second;
            }
            else
            {
1360
                inpShape = shape(getBlob(input0));
1361 1362
            }

1363
            String srcName = input0;
1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
            // Unsqueeze and repeat along new axis
            if (targetShape.size() == inpShape.size() + 1)
            {
                for (int i = 0; i < targetShape.size(); i++)
                {
                    if (targetShape[i] == -1 && i < inpShape.size())
                        targetShape[i] = inpShape[i];
                    else if (i < inpShape.size() && targetShape[i] != inpShape[i])
                        inpShape.insert(inpShape.begin() + i, 1);
                }
                if (haveVariables)
                {
                    LayerParams reshapeLp;
                    reshapeLp.name = layerParams.name + "/reshape";
                    reshapeLp.type = "Reshape";
                    CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end());
                    reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));

                    opencv_onnx::NodeProto proto;
                    proto.add_input(node_proto.input(0));
                    proto.add_output(reshapeLp.name);
1385
                    addLayer(reshapeLp, proto);
1386 1387 1388
                    srcName = reshapeLp.name;
                }
            }
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
            CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims");

            std::vector<int> broadcast_axes;
            for (int i = 0; i < targetShape.size(); i++)
            {
                if (targetShape[i] != inpShape[i])
                {
                    if (inpShape[i] == 1)
                        broadcast_axes.push_back(i);
                    else
                        CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i));
                }
            }

1403 1404 1405 1406 1407
            if (!haveVariables)
            {
                if (broadcast_axes.size() != 1)
                    CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input");

1408
                Mat input = getBlob(node_proto, 0);
1409 1410 1411
                input = input.reshape(0, total(inpShape, 0, broadcast_axes[0]));
                Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]);
                output = output.reshape(0, targetShape);
1412 1413
                addConstant(layerParams.name, output);
                return;
1414 1415
            }

1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428
            if (broadcast_axes.size() == 2 &&
                broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1)
            {
                LayerParams constParams;
                constParams.name = layerParams.name + "/const";
                CV_Assert(layer_id.find(constParams.name) == layer_id.end());
                constParams.type = "Const";

                Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr<int>(), CV_32F);
                constParams.blobs.push_back(inp);

                opencv_onnx::NodeProto proto;
                proto.add_output(constParams.name);
1429
                addLayer(constParams, proto);
1430 1431 1432 1433

                layerParams.type = "Scale";
                layerParams.set("bias_term", false);
                node_proto.set_input(0, constParams.name);
1434
                node_proto.set_input(1, srcName);
1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
            }
            else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1)
            {
                String base_name = layerParams.name + "/copy_";
                std::vector<std::string> input_names;
                for (int j = 0; j < targetShape[broadcast_axes[0]]; j++)
                {
                    std::ostringstream ss;
                    ss << j;
                    LayerParams copyLP;
                    copyLP.name = base_name + ss.str();
                    copyLP.type = "Identity";
                    CV_Assert(layer_id.find(copyLP.name) == layer_id.end());
                    input_names.push_back(copyLP.name);

1450
                    node_proto.set_input(0, srcName);
1451
                    node_proto.set_output(0, copyLP.name);
1452
                    addLayer(copyLP, node_proto);
1453 1454 1455 1456 1457 1458 1459 1460
                }
                node_proto.clear_input();
                for (int i = 0; i < input_names.size(); i++)
                {
                    node_proto.add_input(input_names[i]);
                }
                layerParams.set("axis", broadcast_axes[0]);
                layerParams.type = "Concat";
1461
                node_proto.set_output(0, layerParams.name);
1462 1463 1464 1465
            }
            else
                CV_Error(Error::StsNotImplemented, "Unsupported Expand op");
        }
1466 1467 1468 1469 1470
        else if (layer_type == "Reshape")
        {
            CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape"));

            if (node_proto.input_size() == 2) {
1471
                Mat blob = getBlob(node_proto, 1);
1472 1473
                CV_Assert(blob.type() == CV_32SC1);

1474 1475 1476
                layerParams.set("dim", DictValue::arrayInt<int*>(
                            blob.ptr<int>(), blob.total() ));

1477
                if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
1478
                    std::vector<Mat> inputs(1, getBlob(node_proto, 0)), outputs;
1479
                    runLayer(layerParams, inputs, outputs);
1480 1481
                    addConstant(layerParams.name, outputs[0]);
                    return;
1482 1483 1484 1485 1486 1487 1488 1489 1490 1491
                }
            }
            else {
                DictValue shape = layerParams.get("shape");
                std::vector<int> dim;
                for (int j = 0; j < shape.size(); j++) {
                    dim.push_back(shape.getIntValue(j));
                }

                if (layer_id.find(node_proto.input(0)) == layer_id.end()) {
1492
                    Mat input = getBlob(node_proto, 0);
1493
                    Mat out = input.reshape(0, dim);
1494 1495
                    addConstant(layerParams.name, out);
                    return;
1496 1497 1498 1499
                }
                replaceLayerParam(layerParams, "shape", "dim");
            }
        }
D
Dmitry Kurtaev 已提交
1500 1501 1502
        else if (layer_type == "Pad")
        {
            layerParams.type = "Padding";
1503 1504 1505 1506 1507
            replaceLayerParam(layerParams, "mode", "type");
            if (node_proto.input_size() == 3 || node_proto.input_size() == 2)
            {
                // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN.
                // We need to shuffle it to begin0, end0, begin1, end1, ...
1508
                Mat paddings = getBlob(node_proto, 1).reshape(1, 2);
1509 1510 1511 1512 1513
                paddings = paddings.t();
                layerParams.set("paddings", DictValue::arrayInt(paddings.ptr<int>(), paddings.total()));

                if (node_proto.input_size() == 3)
                {
1514
                    Mat value = getBlob(node_proto, 2);
1515 1516 1517
                    layerParams.set("value", value.at<float>(0));
                }
            }
D
Dmitry Kurtaev 已提交
1518
        }
1519 1520 1521
        else if (layer_type == "Shape")
        {
            CV_Assert(node_proto.input_size() == 1);
1522
            IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1523
            CV_Assert(shapeIt != outShapes.end());
1524
            const MatShape& inpShape = shapeIt->second;
1525 1526 1527 1528 1529 1530

            Mat shapeMat(inpShape.size(), 1, CV_32S);
            for (int j = 0; j < inpShape.size(); ++j)
                shapeMat.at<int>(j) = inpShape[j];
            shapeMat.dims = 1;

1531 1532
            addConstant(layerParams.name, shapeMat);
            return;
1533
        }
1534 1535 1536 1537
        else if (layer_type == "Cast")
        {
            if (constBlobs.find(node_proto.input(0)) != constBlobs.end())
            {
1538
                Mat blob = getBlob(node_proto, 0);
1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551
                int type;
                switch (layerParams.get<int>("to"))
                {
                    case opencv_onnx::TensorProto_DataType_FLOAT:   type = CV_32F; break;
                    case opencv_onnx::TensorProto_DataType_UINT8:   type = CV_8U; break;
                    case opencv_onnx::TensorProto_DataType_UINT16:  type = CV_16U; break;
                    case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break;
                    case opencv_onnx::TensorProto_DataType_INT8:
                    case opencv_onnx::TensorProto_DataType_INT16:
                    case opencv_onnx::TensorProto_DataType_INT32:
                    case opencv_onnx::TensorProto_DataType_INT64:   type = CV_32S; break;
                    default: type = blob.type();
                }
L
Liubov Batanina 已提交
1552 1553 1554
                Mat dst;
                blob.convertTo(dst, type);
                dst.dims = blob.dims;
1555 1556
                addConstant(layerParams.name, dst);
                return;
1557 1558 1559 1560
            }
            else
                layerParams.type = "Identity";
        }
D
Dmitry Kurtaev 已提交
1561 1562
        else if (layer_type == "ConstantOfShape" || layer_type == "ConstantFill")
        {
1563
            int depth = CV_32F;
D
Dmitry Kurtaev 已提交
1564 1565 1566 1567
            float fill_value;
            if (!layerParams.blobs.empty())
            {
                CV_Assert(!layerParams.has("value"));
1568 1569 1570 1571
                depth = layerParams.blobs[0].depth();
                Mat floats;
                layerParams.blobs[0].convertTo(floats, CV_32F);
                fill_value = floats.at<float>(0, 0);
D
Dmitry Kurtaev 已提交
1572 1573 1574 1575
            }
            else
                fill_value = layerParams.get("value", 0);

1576
            MatShape inpShape = getBlob(node_proto, 0);
1577 1578
            for (int i = 0; i < inpShape.size(); i++)
                CV_CheckGT(inpShape[i], 0, "");
1579
            Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value));
1580 1581
            addConstant(layerParams.name, tensor);
            return;
1582
        }
1583 1584 1585
        else if (layer_type == "Gather")
        {
            CV_Assert(node_proto.input_size() == 2);
1586
            Mat indexMat = getBlob(node_proto, 1);
1587 1588
            CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1);
            int index = indexMat.at<int>(0);
1589
            int axis = layerParams.get<int>("axis", 0);
1590

1591
            if ((constBlobs.find(node_proto.input(0)) != constBlobs.end()))
D
Dmitry Kurtaev 已提交
1592
            {
1593
                Mat input = getBlob(node_proto, 0);
1594
                Mat out;
D
Dmitry Kurtaev 已提交
1595 1596
                std::vector<cv::Range> ranges(input.dims, Range::all());
                ranges[axis] = Range(index, index + 1);
1597

D
Dmitry Kurtaev 已提交
1598
                out = input(ranges);
1599 1600 1601 1602 1603
                MatShape outShape = shape(out);
                if (outShape.size() > 1)
                {
                    outShape.erase(outShape.begin() + axis);
                    out.reshape(0, outShape);
L
Liubov Batanina 已提交
1604 1605
                } else {
                    out.dims = 1;
1606
                }
1607 1608
                addConstant(layerParams.name, out);
                return;
D
Dmitry Kurtaev 已提交
1609 1610 1611
            }
            else
            {
1612
                IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
                CV_Assert(shapeIt != outShapes.end());
                MatShape inpShape = shapeIt->second;

                LayerParams sliceLp;
                sliceLp.type = "Slice";
                sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name;
                std::vector<int> begin(inpShape.size(), 0);
                std::vector<int> end(inpShape.size(), -1);
                begin[axis] = index;
                end[axis] = index + 1;

                cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size());
                cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size());
                sliceLp.set("begin", paramBegin);
                sliceLp.set("end", paramEnd);

                if (inpShape.size() > 1)
                {
                    opencv_onnx::NodeProto proto;
                    proto.add_input(node_proto.input(0));
                    proto.add_output(sliceLp.name);
1634
                    addLayer(sliceLp, proto);
1635 1636 1637

                    inpShape.erase(inpShape.begin() + axis);
                    layerParams.type = "Reshape";
L
Liubov Batanina 已提交
1638
                    layerParams.set("axis", 0);
1639 1640 1641 1642 1643 1644 1645
                    layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size()));
                    node_proto.set_input(0, sliceLp.name);
                }
                else
                {
                    layerParams = sliceLp;
                }
D
Dmitry Kurtaev 已提交
1646
            }
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664
        }
        else if (layer_type == "Concat")
        {
            bool hasVariableInps = false;
            for (int i = 0; i < node_proto.input_size(); ++i)
            {
                if (layer_id.find(node_proto.input(i)) != layer_id.end())
                {
                    hasVariableInps = true;
                    break;
                }
            }

            if (!hasVariableInps)
            {
                std::vector<Mat> inputs(node_proto.input_size()), concatenated;
                for (size_t i = 0; i < inputs.size(); ++i)
                {
1665
                    inputs[i] = getBlob(node_proto, i);
1666
                }
1667
                runLayer(layerParams, inputs, concatenated);
1668 1669

                CV_Assert(concatenated.size() == 1);
1670 1671
                addConstant(layerParams.name, concatenated[0]);
                return;
1672 1673
            }
        }
1674 1675 1676 1677 1678 1679
        else if (layer_type == "Resize")
        {
            for (int i = 1; i < node_proto.input_size(); i++)
                CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end());

            String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
1680
            CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");
1681 1682

            layerParams.set("align_corners", interp_mode == "align_corners");
1683
            Mat shapes = getBlob(node_proto, node_proto.input_size() - 1);
1684 1685
            CV_CheckEQ(shapes.size[0], 4, "");
            CV_CheckEQ(shapes.size[1], 1, "");
1686 1687 1688
            CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, "");
            if (shapes.depth() == CV_32F)
                shapes.convertTo(shapes, CV_32S);
1689 1690 1691 1692
            int height = shapes.at<int>(2);
            int width  = shapes.at<int>(3);
            if (node_proto.input_size() == 3)
            {
1693
                IterShape_t shapeIt = outShapes.find(node_proto.input(0));
1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707
                CV_Assert(shapeIt != outShapes.end());
                MatShape scales = shapeIt->second;
                height *= scales[2];
                width  *= scales[3];
            }
            layerParams.set("width", width);
            layerParams.set("height", height);

            if (layerParams.get<String>("mode") == "linear") {
                layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
                                        "opencv_linear" : "bilinear");
            }
            replaceLayerParam(layerParams, "mode", "interpolation");
        }
1708 1709
        else if (layer_type == "Upsample")
        {
1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725
            //fused from Resize Subgraph
            if (layerParams.has("coordinate_transformation_mode"))
            {
                String interp_mode = layerParams.get<String>("coordinate_transformation_mode");
                CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn");

                layerParams.set("align_corners", interp_mode == "align_corners");
                if (layerParams.get<String>("mode") == "linear")
                {
                    layerParams.set("mode", interp_mode == "pytorch_half_pixel" ?
                                            "opencv_linear" : "bilinear");
                }
            }
            if (layerParams.get<String>("mode") == "linear" && framework_name == "pytorch")
                layerParams.set("mode", "opencv_linear");

1726 1727 1728 1729 1730 1731 1732 1733 1734
            layerParams.type = "Resize";
            if (layerParams.has("scales"))
            {
                // Pytorch layer
                DictValue scales = layerParams.get("scales");
                CV_Assert(scales.size() == 4);
                layerParams.set("zoom_factor_y", scales.getIntValue(2));
                layerParams.set("zoom_factor_x", scales.getIntValue(3));
            }
1735
            else if (layerParams.has("height_scale") && layerParams.has("width_scale"))
1736 1737 1738 1739 1740
            {
                // Caffe2 layer
                replaceLayerParam(layerParams, "height_scale", "zoom_factor_y");
                replaceLayerParam(layerParams, "width_scale", "zoom_factor_x");
            }
1741 1742 1743
            else
            {
                // scales as input
1744
                Mat scales = getBlob(node_proto, 1);
1745 1746 1747 1748
                CV_Assert(scales.total() == 4);
                layerParams.set("zoom_factor_y", scales.at<float>(2));
                layerParams.set("zoom_factor_x", scales.at<float>(3));
            }
1749
            replaceLayerParam(layerParams, "mode", "interpolation");
1750
        }
D
Dmitry Kurtaev 已提交
1751
        else if (layer_type == "SoftMax" || layer_type == "LogSoftmax")
D
dianlujitao 已提交
1752 1753
        {
            layerParams.type = "Softmax";
D
Dmitry Kurtaev 已提交
1754
            layerParams.set("log_softmax", layer_type == "LogSoftmax");
D
dianlujitao 已提交
1755
        }
1756 1757 1758 1759 1760
        else if (layer_type == "DetectionOutput")
        {
            CV_CheckEQ(node_proto.input_size(), 3, "");
            if (constBlobs.find(node_proto.input(2)) != constBlobs.end())
            {
1761
                Mat priors = getBlob(node_proto, 2);
1762 1763 1764 1765 1766 1767 1768 1769

                LayerParams constParams;
                constParams.name = layerParams.name + "/priors";
                constParams.type = "Const";
                constParams.blobs.push_back(priors);

                opencv_onnx::NodeProto priorsProto;
                priorsProto.add_output(constParams.name);
1770
                addLayer(constParams, priorsProto);
1771 1772 1773 1774

                node_proto.set_input(2, constParams.name);
            }
        }
1775 1776 1777 1778
        else
        {
            for (int j = 0; j < node_proto.input_size(); j++) {
                if (layer_id.find(node_proto.input(j)) == layer_id.end())
1779
                    layerParams.blobs.push_back(getBlob(node_proto, j));
1780
            }
D
dianlujitao 已提交
1781
        }
1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
        addLayer(layerParams, node_proto);
    }
    catch (const cv::Exception& e)
    {
        CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: "
                << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str())
        );
        for (int i = 0; i < node_proto.input_size(); i++)
        {
            CV_LOG_INFO(NULL, "    Input[" << i << "] = '" << node_proto.input(i) << "'");
        }
        for (int i = 0; i < node_proto.output_size(); i++)
        {
            CV_LOG_INFO(NULL, "    Output[" << i << "] = '" << node_proto.output(i) << "'");
        }
        CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what()));
D
dianlujitao 已提交
1798 1799
    }
}
1800 1801 1802 1803

Net readNetFromONNX(const String& onnxFile)
{
    Net net;
1804
    ONNXImporter onnxImporter(net, onnxFile.c_str());
1805 1806 1807
    return net;
}

1808 1809 1810
Net readNetFromONNX(const char* buffer, size_t sizeBuffer)
{
    Net net;
1811
    ONNXImporter onnxImporter(net, buffer, sizeBuffer);
1812 1813 1814 1815 1816 1817 1818 1819
    return net;
}

Net readNetFromONNX(const std::vector<uchar>& buffer)
{
    return readNetFromONNX(reinterpret_cast<const char*>(buffer.data()), buffer.size());
}

1820 1821 1822
Mat readTensorFromONNX(const String& path)
{
    std::fstream input(path.c_str(), std::ios::in | std::ios::binary);
1823 1824 1825 1826 1827 1828 1829 1830 1831
    if (!input)
    {
        CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", path.c_str()));
    }

    opencv_onnx::TensorProto tensor_proto = opencv_onnx::TensorProto();
    if (!tensor_proto.ParseFromIstream(&input))
    {
        CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX data: %s", path.c_str()));
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
    }
    Mat mat = getMatFromTensor(tensor_proto);
    releaseONNXTensor(tensor_proto);
    return mat;
}

CV__DNN_EXPERIMENTAL_NS_END
}} // namespace

#endif