diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 084e4ac6daa560b9c4779d609804e986c8410918..39c2309394742425003e6ccbddf3b21d6e8bb674 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -510,2051 +510,2274 @@ protected: private: void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); + + typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); + typedef std::map DispatchMap; + + const DispatchMap dispatch; + static const DispatchMap buildDispatchMap(); + + void parseConvolution (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseBias (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMatMul (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseReshape (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseFlatten (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseTranspose (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseConstant (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseLrn (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseConcat (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMaxPool (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseAvgPool (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMaxPoolGrad (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parsePlaceholder (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseSplit (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseSlice (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseStridedSlice (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMul (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseFusedBatchNorm (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseConv2DBackpropInput(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseBlockLSTM (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseResize (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseL2Normalize (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parsePriorBox (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseSoftmax (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseCropAndResize (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMean (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parsePack (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseClipByValue (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseLeakyRelu (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseActivation (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + + void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); }; -TFImporter::TFImporter(Net& net, const char *model, const char *config) - : dstNet(net) +const TFImporter::DispatchMap TFImporter::buildDispatchMap() { - if (model && model[0]) - { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model); - ReadTFNetParamsFromBinaryFileOrDie(model, &netBin); - } - if (config && config[0]) - { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config); - ReadTFNetParamsFromTextFileOrDie(config, &netTxt); - } - - populateNet(); + static DispatchMap dispatch; + dispatch["Conv2D"] = dispatch["SpaceToBatchND"] = dispatch["DepthwiseConv2dNative"] = + dispatch["Pad"] = dispatch["MirrorPad"] = dispatch["Conv3D"] = &TFImporter::parseConvolution; + dispatch["BiasAdd"] = dispatch["Add"] = dispatch["AddV2"] = dispatch["Sub"] = dispatch["AddN"] = &TFImporter::parseBias; + dispatch["MatMul"] = &TFImporter::parseMatMul; + dispatch["Reshape"] = &TFImporter::parseReshape; + dispatch["Flatten"] = dispatch["Squeeze"] = &TFImporter::parseFlatten; + dispatch["Transpose"] = &TFImporter::parseTranspose; + dispatch["Const"] = &TFImporter::parseConstant; + dispatch["LRN"] = &TFImporter::parseLrn; + dispatch["Concat"] = dispatch["ConcatV2"] = &TFImporter::parseConcat; + dispatch["MaxPool"] = dispatch["MaxPool3D"] = &TFImporter::parseMaxPool; + dispatch["AvgPool"] = dispatch["AvgPool3D"] = &TFImporter::parseAvgPool; + dispatch["MaxPoolGrad"] = &TFImporter::parseMaxPoolGrad; + dispatch["Placeholder"] = &TFImporter::parsePlaceholder; + dispatch["Split"] = &TFImporter::parseSplit; + dispatch["Slice"] = &TFImporter::parseSlice; + dispatch["StridedSlice"] = &TFImporter::parseStridedSlice; + dispatch["Mul"] = dispatch["RealDiv"] = &TFImporter::parseMul; + dispatch["FusedBatchNorm"] = dispatch["FusedBatchNormV3"] = &TFImporter::parseFusedBatchNorm; + dispatch["Conv2DBackpropInput"] = &TFImporter::parseConv2DBackpropInput; + dispatch["BlockLSTM"] = &TFImporter::parseBlockLSTM; + dispatch["ResizeNearestNeighbor"] = dispatch["ResizeBilinear"] = dispatch["FusedResizeAndPadConv2D"] = &TFImporter::parseResize; + dispatch["L2Normalize"] = &TFImporter::parseL2Normalize; + dispatch["PriorBox"] = &TFImporter::parsePriorBox; + dispatch["Softmax"] = &TFImporter::parseSoftmax; + dispatch["CropAndResize"] = &TFImporter::parseCropAndResize; + dispatch["Mean"] = dispatch["Sum"] = &TFImporter::parseMean; + dispatch["Pack"] = &TFImporter::parsePack; + dispatch["ClipByValue"] = &TFImporter::parseClipByValue; + dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu; + dispatch["Abs"] = dispatch["Tanh"] = dispatch["Sigmoid"] = dispatch["Relu"] = + dispatch["Elu"] = dispatch["Exp"] = dispatch["Identity"] = dispatch["Relu6"] = &TFImporter::parseActivation; + + return dispatch; } -TFImporter::TFImporter( - Net& net, - const char *dataModel, size_t lenModel, - const char *dataConfig, size_t lenConfig -) - : dstNet(net) +void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) { - if (dataModel != NULL && lenModel > 0) - { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)"); - ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin); - } - if (dataConfig != NULL && lenConfig > 0) + tensorflow::NodeDef layer = layer_; + std::string name = layer.name(); + std::string type = layer.op(); + int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + // The first node of dilated convolution subgraph. + // Extract input node, dilation rate and paddings. + std::string input = layer.input(0); + StrIntVector next_layers; + if (type == "SpaceToBatchND" || type == "Pad") { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)"); - ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt); + next_layers = getNextLayers(net, name, "Conv2D"); + if (next_layers.empty()) + next_layers = getNextLayers(net, name, "DepthwiseConv2dNative"); } - populateNet(); -} - -void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) -{ - MatShape shape; - blobShapeFromTensor(tensor, shape); - int dims = (int)shape.size(); - - // TODO: other blob types - CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || - tensor.dtype() == tensorflow::DT_HALF); - CV_Assert(dims == 4 || dims == 5); - int out_c, input_c, depth, height, width; - if (dims == 4) + if (type == "SpaceToBatchND") { - // REORDER kernel HWIO to OIHW - swap(shape[0], shape[2]); // IWHO - swap(shape[1], shape[3]); // IOHW - swap(shape[0], shape[1]); // OIHW - depth = 1; height = shape[2]; width = shape[3]; + // op: "SpaceToBatchND" + // input: "input" + // input: "SpaceToBatchND/block_shape" + // input: "SpaceToBatchND/paddings" + CV_CheckEQ(num_inputs, 3, ""); + + DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); + CV_Assert(dilation.size() == 2); + layerParams.set("dilation_h", dilation.get(0)); + layerParams.set("dilation_w", dilation.get(1)); + + Mat paddings; + parseTensor(getConstBlob(layer, value_id, 2), paddings); + + // paddings is a 2x2 matrix: [[top, bot], [left, right]] + layerParams.set("pad_h", paddings.at(0)); + layerParams.set("pad_w", paddings.at(2)); + + CV_Assert(next_layers.size() == 1); + layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); + name = layer.name(); + type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } - else + else if (type == "Pad" || type == "MirrorPad") { - // REORDER kernel DHWIO to OIDHW - swap(shape[0], shape[4]); // OHWID - swap(shape[1], shape[3]); // OIWHD - swap(shape[2], shape[4]); // OIDHW - depth = shape[2]; height = shape[3]; width = shape[4]; - } - out_c = shape[0]; input_c = shape[1]; + Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(paddings.type() == CV_32SC1); + if (paddings.total() == 8) + { + // Perhaps, we have NHWC padding dimensions order. + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(2), paddings.at(6)); + std::swap(paddings.at(3), paddings.at(7)); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(4), paddings.at(6)); + std::swap(paddings.at(5), paddings.at(7)); + // N C H W + // 0 1 2 3 4 5 6 7 + } - dstBlob.create(shape, CV_32F); + if (next_layers.empty() || paddings.total() != 8 || + paddings.at(4) != paddings.at(5) || + paddings.at(6) != paddings.at(7) || type == "MirrorPad") + { + // Just a single padding layer. + layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); + if (type == "MirrorPad") + layerParams.set("type", "reflect"); - Mat tensorContent = getTensorContent(tensor, /*no copy*/false); - int size = tensorContent.total(); - CV_Assert(size == (int)dstBlob.total()); + int id = dstNet.addLayer(name, "Padding", layerParams); + layer_id[name] = id; - float *dstData = dstBlob.ptr(); - const float *data = reinterpret_cast(tensorContent.data); + connect(layer_id, dstNet, parsePin(input), id, 0); + return; + } + else + { + // Merge with subsequent convolutional layer. + CV_Assert(next_layers.size() == 1); - int total = out_c * input_c * depth * height * width; - for (int i_oc = 0; i_oc < out_c; i_oc++) { - for (int i_ic = 0; i_ic < input_c; i_ic++) { - for (int i_d = 0; i_d < depth; i_d++) { - for (int i_h = 0; i_h < height; i_h++) { - for (int i_w = 0; i_w < width; i_w++) { - int dst_i = input_c * depth * height * width * i_oc + - depth * height * width * i_ic + height * width * i_d + width * i_h + i_w; - int src_i = out_c * input_c * width * height * i_d + - out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc; - CV_Assert(dst_i < total); - CV_Assert(src_i < total); - dstData[dst_i] = data[src_i]; - } - } - } + layerParams.set("pad_h", paddings.at(4)); + layerParams.set("pad_w", paddings.at(6)); + + layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); + name = layer.name(); + type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } } -} -void TFImporter::connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, - const int input_layer_id, const int input_blob_id) -{ - std::map::const_iterator it = layers_name_id_map.find(outPin.name); - if (it == layers_name_id_map.end()) - CV_Error(Error::StsError, "Input layer not found: " + outPin.name); + // For the object detection networks, TensorFlow Object Detection API + // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) + // order. We can manage it at DetectionOutput layer parsing predictions + // or shuffle last convolution's weights. + bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && + getLayerAttr(layer, "loc_pred_transposed").b(); - std::vector::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name); - int blobIndex; - if (inpNameIt == netInputsNames.end()) - blobIndex = outPin.blobIndex; - else - blobIndex = inpNameIt - netInputsNames.begin(); - network.connect(it->second, blobIndex, input_layer_id, input_blob_id); -} + layerParams.set("bias_term", false); + layerParams.blobs.resize(1); -void TFImporter::connectToAllBlobs(const std::map& layer_id, Net& network, const Pin& outPin, - const int input_layer_id, const int input_blobs_count) -{ - for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++) - connect(layer_id, network, outPin, input_layer_id, input_blob_id); -} + next_layers = getNextLayers(net, name, "BiasAdd"); + if (next_layers.size() == 1) { + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); -const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map const_layers, - int input_blob_index, int* actual_inp_blob_idx) { - if (input_blob_index == -1) { - for(int i = 0; i < layer.input_size(); i++) { - Pin input = parsePin(layer.input(i)); - if (const_layers.find(input.name) != const_layers.end()) { - if (input_blob_index != -1) - CV_Error(Error::StsError, "More than one input is Const op"); + int weights_layer_index = next_layers[0].second; - input_blob_index = i; + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); + + // Shuffle bias from yxYX to xyXY. + if (locPredTransposed) + { + const int numWeights = layerParams.blobs[1].total(); + float* biasData = reinterpret_cast(layerParams.blobs[1].data); + CV_Assert(numWeights % 4 == 0); + for (int i = 0; i < numWeights; i += 2) + { + std::swap(biasData[i], biasData[i + 1]); } } } - if (input_blob_index == -1) - CV_Error(Error::StsError, "Const input blob for weights not found"); - - Pin kernel_inp = parsePin(layer.input(input_blob_index)); - if (const_layers.find(kernel_inp.name) == const_layers.end()) - CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) + - "] for node [" + layer.name() + "] not found"); - if (kernel_inp.blobIndex != 0) - CV_Error(Error::StsError, "Unsupported kernel input"); - - if(actual_inp_blob_idx) { - *actual_inp_blob_idx = input_blob_index; - } - - int nodeIdx = const_layers.at(kernel_inp.name); - if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name) + int kernelTensorInpId = -1; + const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId); + const String kernelTensorName = layer.input(kernelTensorInpId); + std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); + if (sharedWeightsIt == sharedWeights.end()) { - return netBin.node(nodeIdx).attr().at("value").tensor(); + kernelFromTensor(kernelTensor, layerParams.blobs[0]); + releaseTensor(const_cast(&kernelTensor)); + + int* kshape = layerParams.blobs[0].size.p; + const int outCh = kshape[0]; + const int inCh = kshape[1]; + const int height = kshape[2]; + const int width = kshape[3]; + if (type == "DepthwiseConv2dNative") + { + CV_Assert(!locPredTransposed); + const int chMultiplier = kshape[0]; + + Mat copy = layerParams.blobs[0].clone(); + float* src = (float*)copy.data; + float* dst = (float*)layerParams.blobs[0].data; + for (int i = 0; i < chMultiplier; ++i) + for (int j = 0; j < inCh; ++j) + for (int s = 0; s < height * width; ++s) + { + int src_i = (i * inCh + j) * height * width + s; + int dst_i = (j * chMultiplier + i) * height* width + s; + dst[dst_i] = src[src_i]; + } + // TODO Use reshape instead + kshape[0] = inCh * chMultiplier; + kshape[1] = 1; + size_t* kstep = layerParams.blobs[0].step.p; + kstep[0] = kstep[1]; // fix steps too + } + + // Shuffle output channels from yxYX to xyXY. + if (locPredTransposed) + { + const int slice = height * width * inCh; + for (int i = 0; i < outCh; i += 2) + { + cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr(i)); + cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr(i + 1)); + std::swap_ranges(src.begin(), src.end(), dst.begin()); + } + } + sharedWeights[kernelTensorName] = layerParams.blobs[0]; } else { - CV_Assert_N(nodeIdx < netTxt.node_size(), - netTxt.node(nodeIdx).name() == kernel_inp.name); - return netTxt.node(nodeIdx).attr().at("value").tensor(); + layerParams.blobs[0] = sharedWeightsIt->second; } -} + Mat weights = layerParams.blobs[0]; + layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2)); -static void addConstNodes(tensorflow::GraphDef& net, std::map& const_layers, - std::set& layers_to_ignore) -{ - CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes..."); - for (int li = 0; li < net.node_size(); li++) - { - const tensorflow::NodeDef &layer = net.node(li); - String name = layer.name(); - String type = layer.op(); + layerParams.set("num_output", layerParams.blobs[0].size[0]); - //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type); + setStrides(layerParams, layer); + if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) + setPadding(layerParams, layer); - try - { - if (type == "Dequantize") - { - // Example of Dequantize node: - // name: "conv2d_1/bias" - // op: "Dequantize" - // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) - // input: "conv2d_1/bias_quantized_min" - // input: "conv2d_1/bias_quantized_max" - // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) - // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) - CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only"); - for (int i = 0; i < 3; ++i) - CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); - CV_Assert(hasLayerAttr(layer, "mode") && - getLayerAttr(layer, "mode").s() == "MIN_FIRST"); + // The final node of dilated convolution subgraph. + next_layers = getNextLayers(net, name, "BatchToSpaceND"); + if (!next_layers.empty()) + { + CV_Assert(next_layers.size() == 1); + ExcludeLayer(net, next_layers[0].second, 0, false); + layers_to_ignore.insert(next_layers[0].first); + } - int tensorId = const_layers[layer.input(0)]; - int minId = const_layers[layer.input(1)]; - int maxId = const_layers[layer.input(2)]; + int id = dstNet.addLayer(name, "Convolution", layerParams); + layer_id[name] = id; - tensorflow::TensorProto* tensor = net.mutable_node(tensorId) - ->mutable_attr()->at("value") - .mutable_tensor(); - CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, ""); + // one input only + connect(layer_id, dstNet, parsePin(input), id, 0); - Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); - Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); - CV_CheckEQ(qMin.total(), (size_t)1, ""); - CV_CheckTypeEQ(qMin.type(), CV_32FC1, ""); - CV_CheckEQ(qMax.total(), (size_t)1, ""); - CV_CheckTypeEQ(qMax.type(), CV_32FC1, ""); - Mat content = getTensorContent(*tensor); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN) + data_layouts[name] = DATA_LAYOUT_NHWC; +} - float minVal = qMin.at(0); - float rangeScale = (qMax.at(0) - minVal) / 255; - CV_Assert(rangeScale >= 0); - content.convertTo(content, CV_32FC1, rangeScale, - rangeScale * cvRound(minVal / rangeScale)); +void TFImporter::parseBias(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); - tensor->set_dtype(tensorflow::DT_FLOAT); - tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); + CV_CheckGT(num_inputs, 0, ""); + bool haveConst = false; + for(int ii = 0; !haveConst && ii < num_inputs; ++ii) + { + Pin input = parsePin(layer.input(ii)); + haveConst = value_id.find(input.name) != value_id.end(); + } + CV_Assert(!haveConst || num_inputs == 2); - net.mutable_node(tensorId)->set_name(name); - CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); - layers_to_ignore.insert(name); - continue; - } - else if (type != "Const") - continue; // only Const parameters are supported + if (haveConst) + { + Mat values = getTensorContent(getConstBlob(layer, value_id)); + CV_Assert(values.type() == CV_32FC1); + if (type == "Sub") + values *= -1.0f; - if (layer.attr().find("value") != layer.attr().end()) - { - CV_Assert(const_layers.insert(std::make_pair(name, li)).second); - } - layers_to_ignore.insert(name); + int id; + if (values.total() == 1) // is a scalar. + { + layerParams.set("shift", values.at(0)); + id = dstNet.addLayer(name, "Power", layerParams); } - catch (const std::exception& e) + else // is a vector { - CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what()); - throw; + layerParams.blobs.resize(1, values); + id = dstNet.addLayer(name, "Shift", layerParams); + } + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } + else + { + layerParams.set("operation", "sum"); + if (type == "Sub") + { + static float subCoeffs[] = {1.f, -1.f}; + layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); + } + + int id = dstNet.addLayer(name, "Eltwise", layerParams); + layer_id[name] = id; + + for (int ii = 0; ii < num_inputs; ii++) + { + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + CV_Error(Error::StsError, "Input layer not found: " + inp.name); + connect(layer_id, dstNet, inp, id, ii); } } - CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size()); } -// If all inputs of specific layer have the same data layout we can say that -// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. -DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) +void TFImporter::parseMatMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { - DataLayout layout = getDataLayout(layer); - if (layout != DATA_LAYOUT_UNKNOWN) + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 2, ""); + + // For the object detection networks, TensorFlow Object Detection API + // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) + // order. We can manage it at DetectionOutput layer parsing predictions + // or shuffle last Faster-RCNN's matmul weights. + bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && + getLayerAttr(layer, "loc_pred_transposed").b(); + + layerParams.set("bias_term", false); + layerParams.blobs.resize(1); + + StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); // FIXIT Use layers fusion instead + if (next_layers.empty()) { - CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); - return layout; + next_layers = getNextLayers(net, name, "Add"); } + if (next_layers.size() == 1) { + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); - // Determine layout by layer's inputs - for (int i = 0, n = layer.input_size(); i < n; ++i) - { - std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); - if (it != data_layouts.end()) + int weights_layer_index = next_layers[0].second; + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); + + if (locPredTransposed) { - if (layout != DATA_LAYOUT_UNKNOWN) + const int numWeights = layerParams.blobs[1].total(); + float* biasData = reinterpret_cast(layerParams.blobs[1].data); + CV_Assert(numWeights % 4 == 0); + for (int i = 0; i < numWeights; i += 2) { - if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) - return DATA_LAYOUT_UNKNOWN; + std::swap(biasData[i], biasData[i + 1]); } - else - layout = it->second; } } - if (layout != DATA_LAYOUT_UNKNOWN) - { - CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); - return layout; - } - - // Determine layout by layer's consumers recursively. - std::map::const_iterator it = data_layouts.find(layer.name()); - CV_Assert(it != data_layouts.end()); - return it->second; -} - -void TFImporter::populateNet() -{ - CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); - - CV_LOG_INFO(NULL, "DNN/TF: parsing model" - << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)")) - << ". Number of nodes = " << netBin.node_size() - ); - - if (netTxt.ByteSize()) + int kernel_blob_index = -1; + const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index); + const String kernelTensorName = layer.input(kernel_blob_index); + std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); + if (sharedWeightsIt == sharedWeights.end()) { - CV_LOG_INFO(NULL, "DNN/TF: parsing config" - << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)")) - << ". Number of nodes = " << netTxt.node_size() - ); - - RemoveIdentityOps(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); - RemoveIdentityOps(netTxt); - CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes"); - - sortByExecutionOrder(netTxt); - CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes"); + blobFromTensor(kernelTensor, layerParams.blobs[0]); + releaseTensor(const_cast(&kernelTensor)); + sharedWeights[kernelTensorName] = layerParams.blobs[0]; } else { - removePhaseSwitches(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes"); + layerParams.blobs[0] = sharedWeightsIt->second; + } - RemoveIdentityOps(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed + Mat data = layerParams.blobs[0].t(); + layerParams.blobs[0] = data.clone(); + } - simplifySubgraphs(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes"); - sortByExecutionOrder(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes"); + layerParams.set("num_output", layerParams.blobs[0].size[0]); + if (locPredTransposed) + { + CV_Assert(layerParams.blobs[0].dims == 2); + for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2) + { + cv::Mat src = layerParams.blobs[0].row(i); + cv::Mat dst = layerParams.blobs[0].row(i + 1); + std::swap_ranges(src.begin(), src.end(), dst.begin()); + } } - tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + int id = dstNet.addLayer(name, "InnerProduct", layerParams); + layer_id[name] = id; - int layersSize = net.node_size(); + // one input only + int input_blob_index = kernel_blob_index == 0 ? 1 : 0; + connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); + data_layouts[name] = DATA_LAYOUT_PLANAR; +} - // Pre-fill data layouts where they are set explicitly. - // Assuming that nodes are in topological order - for (int i = layersSize - 1; i >= 0; --i) +void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + Pin inpId = parsePin(layer.input(0)); + DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts); + // There are two possible implementations: reshape an input using + // predefined sizes or use a second input blob as a source of new shape. + if (value_id.find(layer.input(1)) != value_id.end()) { - const tensorflow::NodeDef& layer = net.node(i); - std::string name = layer.name(); - - CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout..."); - - try + Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); + int newShapeSize = newShape.total(); + bool hasSwap = false; + if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2)) { - DataLayout layout = getDataLayout(layer); - std::map::iterator it = data_layouts.find(name); - if (it != data_layouts.end()) + // NHWC->NCHW + std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); + std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); + hasSwap = true; + } + if (inpLayout == DATA_LAYOUT_NHWC) + { + if (newShapeSize >= 2 || newShape.at(1) == 1) { - if (layout != DATA_LAYOUT_UNKNOWN) + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + addPermuteLayer(order, name + "/nhwc", inpId); + if (newShapeSize < 4) { - if (it->second == DATA_LAYOUT_UNKNOWN) - it->second = layout; - else if (it->second != layout) - { - it->second = DATA_LAYOUT_UNKNOWN; - layout = DATA_LAYOUT_UNKNOWN; - } + inpLayout = DATA_LAYOUT_NCHW; } else - layout = it->second; - } - else - data_layouts[name] = layout; - - // Specify input layers to have the same data layout. - for (int j = 0; j < layer.input_size(); ++j) - { - name = getNodeName(layer.input(j)); - it = data_layouts.find(name); - if (it != data_layouts.end()) { - if (layout != DATA_LAYOUT_UNKNOWN) - { - if (it->second == DATA_LAYOUT_UNKNOWN) - it->second = layout; - else if (it->second != layout) - it->second = DATA_LAYOUT_UNKNOWN; - } + inpLayout = DATA_LAYOUT_NHWC; } - else - data_layouts[name] = layout; } } - catch (const std::exception& e) - { - CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what()); - throw; - } - } - - addConstNodes(netBin, value_id, layers_to_ignore); - addConstNodes(netTxt, value_id, layers_to_ignore); + layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShapeSize)); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; - for (int li = 0; li < layersSize; li++) - { - const tensorflow::NodeDef& layer = net.node(li); + // one input only + connect(layer_id, dstNet, inpId, id, 0); + inpId = Pin(name); - const std::string name = layer.name(); - const std::string type = layer.op(); - const int ninputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && + newShapeSize == 4 && !hasSwap) + { + int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. + addPermuteLayer(order, name + "/nchw", inpId); + inpLayout = DATA_LAYOUT_NCHW; + } - parseNode(layer); + data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; } - - for (size_t i = 0; i < netInputsNames.size(); i++) + else { - CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'"); - CV_Assert(!netInputsNames[i].empty()); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); + data_layouts[name] = inpLayout; } - dstNet.setInputsNames(netInputsNames); - CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); } -void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) +void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { - LayerParams permLP; - permLP.set("order", DictValue::arrayInt(order, 4)); - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + Pin inpId = parsePin(layer.input(0)); + int inpLayout = getDataLayout(layer.input(0), data_layouts); + if (type == "Squeeze") + { + CV_Assert(hasLayerAttr(layer, "squeeze_dims")); + const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims"); + std::vector dimsVector(dims.list().i_size()); + for (int i = 0; i < dimsVector.size(); ++i) + dimsVector[i] = dims.list().i(i); + + // Flatten layer can squeeze dimensions range into one. + std::sort(dimsVector.begin(), dimsVector.end()); + for (int i = 1; i < dimsVector.size(); ++i) + { + if (dimsVector[i] != dimsVector[i - 1] + 1) + CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration"); + } + int start = dimsVector.front() - 1, end = dimsVector.back(); + if (start == -1 && end == 0) // squeeze 0th dimension + { + start = 0; + end = 1; + } + layerParams.set("axis", start); + layerParams.set("end_axis", end); + } + if (inpLayout == DATA_LAYOUT_NHWC) + { + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + permLP.set("order", DictValue::arrayInt(order, 4)); + + std::string permName = name + "/nchw"; + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); + } + int id = dstNet.addLayer(name, "Flatten", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); + data_layouts[name] = DATA_LAYOUT_PLANAR; } -void TFImporter::parseNode(const tensorflow::NodeDef& layer_) +void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { - tensorflow::NodeDef layer = layer_; - - tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; - - /*const*/ std::string name = layer.name(); - /*const*/ std::string type = layer.op(); - /*const*/ int num_inputs = layer.input_size(); - - try + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + Mat perm = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(perm.type() == CV_32SC1); + int* permData = (int*)perm.data; + if (perm.total() == 4) { - LayerParams layerParams; - - if (layers_to_ignore.find(name) != layers_to_ignore.end()) + // Only NHWC <-> NCHW permutations are allowed. OpenCV is always + // keep NCHW layout this way. + int inpLayout = getDataLayout(layer.input(0), data_layouts); + std::string type = "Identity"; + if (inpLayout == DATA_LAYOUT_NHWC) { - CV_LOG_DEBUG(NULL, "DNN/TF: ignored"); - return; + if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2) + { + // in TensorFlow: NHWC->NCHW + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NCHW; + } + else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) + { + // in TensorFlow: NHWC->NHWC + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NHWC; + } + else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1) + { + // in TensorFlow: NHWC->NCWH + // in OpenCV: NCHW->NCWH + int permData[] = {0, 1, 3, 2}; + layerParams.set("order", DictValue::arrayInt(permData, perm.total())); + data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters + type = "Permute"; + } + else + CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); } - - DataLayout predictedLayout = predictOutputDataLayout(layer); - data_layouts[name] = predictedLayout; - - if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "MirrorPad" || type == "Conv3D") + else if (inpLayout == DATA_LAYOUT_NCHW) { - CV_CheckGT(num_inputs, 0, ""); - // The first node of dilated convolution subgraph. - // Extract input node, dilation rate and paddings. - std::string input = layer.input(0); - StrIntVector next_layers; - if (type == "SpaceToBatchND" || type == "Pad") + if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1) { - next_layers = getNextLayers(net, name, "Conv2D"); - if (next_layers.empty()) - next_layers = getNextLayers(net, name, "DepthwiseConv2dNative"); + // in TensorFlow: NCHW->NHWC + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NHWC; } - - if (type == "SpaceToBatchND") + else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) { - // op: "SpaceToBatchND" - // input: "input" - // input: "SpaceToBatchND/block_shape" - // input: "SpaceToBatchND/paddings" - CV_CheckEQ(num_inputs, 3, ""); + // in TensorFlow: NCHW->NCHW + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NCHW; + } + else + CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); + } + int id = dstNet.addLayer(name, type, layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } + else + { + layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); - CV_Assert(dilation.size() == 2); - layerParams.set("dilation_h", dilation.get(0)); - layerParams.set("dilation_w", dilation.get(1)); + int id = dstNet.addLayer(name, "Permute", layerParams); + layer_id[name] = id; - Mat paddings; - parseTensor(getConstBlob(layer, value_id, 2), paddings); + // one input only + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + data_layouts[name] = DATA_LAYOUT_UNKNOWN; + } +} - // paddings is a 2x2 matrix: [[top, bot], [left, right]] - layerParams.set("pad_h", paddings.at(0)); - layerParams.set("pad_w", paddings.at(2)); +void TFImporter::parseConstant(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ +} - CV_Assert(next_layers.size() == 1); - layers_to_ignore.insert(next_layers[0].first); +void TFImporter::parseLrn(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - // FIXIT don't override, rewrite this code - layer = net.node(next_layers[0].second); - name = layer.name(); - type = layer.op(); - num_inputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); - } - else if (type == "Pad" || type == "MirrorPad") - { - Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(paddings.type() == CV_32SC1); - if (paddings.total() == 8) - { - // Perhaps, we have NHWC padding dimensions order. - // N H W C - // 0 1 2 3 4 5 6 7 - std::swap(paddings.at(2), paddings.at(6)); - std::swap(paddings.at(3), paddings.at(7)); - // N C W H - // 0 1 2 3 4 5 6 7 - std::swap(paddings.at(4), paddings.at(6)); - std::swap(paddings.at(5), paddings.at(7)); - // N C H W - // 0 1 2 3 4 5 6 7 - } + CV_CheckGT(num_inputs, 0, ""); + if(hasLayerAttr(layer, "alpha")) { + layerParams.set("alpha", getLayerAttr(layer, "alpha").f()); + } + if(hasLayerAttr(layer, "beta")) { + layerParams.set("beta", getLayerAttr(layer, "beta").f()); + } + if(hasLayerAttr(layer, "depth_radius")) { + int radius = (int)getLayerAttr(layer, "depth_radius").i(); + layerParams.set("local_size", 2*radius + 1); + } + if(hasLayerAttr(layer, "bias")) { + layerParams.set("bias", getLayerAttr(layer, "bias").f()); + } + layerParams.set("norm_by_size", false); - if (next_layers.empty() || paddings.total() != 8 || - paddings.at(4) != paddings.at(5) || - paddings.at(6) != paddings.at(7) || type == "MirrorPad") - { - // Just a single padding layer. - layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); - if (type == "MirrorPad") - layerParams.set("type", "reflect"); + int id = dstNet.addLayer(name, "LRN", layerParams); + layer_id[name] = id; - int id = dstNet.addLayer(name, "Padding", layerParams); - layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - connect(layer_id, dstNet, parsePin(input), id, 0); - return; - } - else - { - // Merge with subsequent convolutional layer. - CV_Assert(next_layers.size() == 1); +void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); - layerParams.set("pad_h", paddings.at(4)); - layerParams.set("pad_w", paddings.at(6)); + CV_CheckGT(num_inputs, 0, ""); + int axisId = (type == "Concat" ? 0 : num_inputs - 1); + int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); - layers_to_ignore.insert(next_layers[0].first); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + axis = toNCHW(axis); + else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC) + axis = toNCDHW(axis); + layerParams.set("axis", axis); - // FIXIT don't override, rewrite this code - layer = net.node(next_layers[0].second); - name = layer.name(); - type = layer.op(); - num_inputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); - } - } + // input(0) or input(n-1) is concat_dim + int from = (type == "Concat" ? 1 : 0); + int to = (type == "Concat" ? num_inputs : num_inputs - 1); - // For the object detection networks, TensorFlow Object Detection API - // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) - // order. We can manage it at DetectionOutput layer parsing predictions - // or shuffle last convolution's weights. - bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && - getLayerAttr(layer, "loc_pred_transposed").b(); + for (int ii = from; ii < to; ii++) + { + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + { + // There are constant inputs. + LayerParams lp; + lp.name = inp.name; + lp.type = "Const"; + lp.blobs.resize(1); + blobFromTensor(getConstBlob(layer, value_id, ii), lp.blobs.back()); + CV_Assert_N(!lp.blobs[0].empty(), lp.blobs[0].type() == CV_32F); + + int constInpId = dstNet.addLayer(lp.name, lp.type, lp); + layer_id[lp.name] = constInpId; + } + } - layerParams.set("bias_term", false); - layerParams.blobs.resize(1); + int id = dstNet.addLayer(name, "Concat", layerParams); + layer_id[name] = id; - next_layers = getNextLayers(net, name, "BiasAdd"); - if (next_layers.size() == 1) { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); + for (int ii = from; ii < to; ii++) + { + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + CV_Error(Error::StsError, "Input layer not found: " + inp.name); + connect(layer_id, dstNet, inp, id, ii - from); + } +} - int weights_layer_index = next_layers[0].second; +void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); + CV_CheckGT(num_inputs, 0, ""); + layerParams.set("pool", "max"); - // Shuffle bias from yxYX to xyXY. - if (locPredTransposed) - { - const int numWeights = layerParams.blobs[1].total(); - float* biasData = reinterpret_cast(layerParams.blobs[1].data); - CV_Assert(numWeights % 4 == 0); - for (int i = 0; i < numWeights; i += 2) - { - std::swap(biasData[i], biasData[i + 1]); - } - } - } + setKSize(layerParams, layer); + setStrides(layerParams, layer); + setPadding(layerParams, layer); + // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU + layerParams.set("ceil_mode", false); - int kernelTensorInpId = -1; - const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId); - const String kernelTensorName = layer.input(kernelTensorInpId); - std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); - if (sharedWeightsIt == sharedWeights.end()) - { - kernelFromTensor(kernelTensor, layerParams.blobs[0]); - releaseTensor(const_cast(&kernelTensor)); - - int* kshape = layerParams.blobs[0].size.p; - const int outCh = kshape[0]; - const int inCh = kshape[1]; - const int height = kshape[2]; - const int width = kshape[3]; - if (type == "DepthwiseConv2dNative") - { - CV_Assert(!locPredTransposed); - const int chMultiplier = kshape[0]; - - Mat copy = layerParams.blobs[0].clone(); - float* src = (float*)copy.data; - float* dst = (float*)layerParams.blobs[0].data; - for (int i = 0; i < chMultiplier; ++i) - for (int j = 0; j < inCh; ++j) - for (int s = 0; s < height * width; ++s) - { - int src_i = (i * inCh + j) * height * width + s; - int dst_i = (j * chMultiplier + i) * height* width + s; - dst[dst_i] = src[src_i]; - } - // TODO Use reshape instead - kshape[0] = inCh * chMultiplier; - kshape[1] = 1; - size_t* kstep = layerParams.blobs[0].step.p; - kstep[0] = kstep[1]; // fix steps too - } + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; - // Shuffle output channels from yxYX to xyXY. - if (locPredTransposed) - { - const int slice = height * width * inCh; - for (int i = 0; i < outCh; i += 2) - { - cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr(i)); - cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr(i + 1)); - std::swap_ranges(src.begin(), src.end(), dst.begin()); - } - } - sharedWeights[kernelTensorName] = layerParams.blobs[0]; - } - else - { - layerParams.blobs[0] = sharedWeightsIt->second; - } - Mat weights = layerParams.blobs[0]; - layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2)); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - layerParams.set("num_output", layerParams.blobs[0].size[0]); +void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - setStrides(layerParams, layer); - if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) - setPadding(layerParams, layer); + CV_CheckGT(num_inputs, 0, ""); + layerParams.set("pool", "ave"); + layerParams.set("ave_pool_padded_area", false); + setKSize(layerParams, layer); + setStrides(layerParams, layer); + setPadding(layerParams, layer); - // The final node of dilated convolution subgraph. - next_layers = getNextLayers(net, name, "BatchToSpaceND"); - if (!next_layers.empty()) - { - CV_Assert(next_layers.size() == 1); - ExcludeLayer(net, next_layers[0].second, 0, false); - layers_to_ignore.insert(next_layers[0].first); - } + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; - int id = dstNet.addLayer(name, "Convolution", layerParams); - layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - // one input only - connect(layer_id, dstNet, parsePin(input), id, 0); +void TFImporter::parseMaxPoolGrad(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + CV_CheckEQ(num_inputs, 3, ""); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN) - data_layouts[name] = DATA_LAYOUT_NHWC; - } - else if (type == "BiasAdd" || type == "Add" || type == "AddV2" || type == "Sub" || type=="AddN") - { - CV_CheckGT(num_inputs, 0, ""); - bool haveConst = false; - for(int ii = 0; !haveConst && ii < num_inputs; ++ii) - { - Pin input = parsePin(layer.input(ii)); - haveConst = value_id.find(input.name) != value_id.end(); - } - CV_Assert(!haveConst || num_inputs == 2); + layerParams.set("pool_k_h", 0); + layerParams.set("pool_k_w", 0); + layerParams.set("pool_stride_h", 0); + layerParams.set("pool_stride_w", 0); + layerParams.set("pool_pad_h", 0); + layerParams.set("pool_pad_w", 0); - if (haveConst) - { - Mat values = getTensorContent(getConstBlob(layer, value_id)); - CV_Assert(values.type() == CV_32FC1); - if (type == "Sub") - values *= -1.0f; + int id = dstNet.addLayer(name, "MaxUnpool", layerParams); + layer_id[name] = id; - int id; - if (values.total() == 1) // is a scalar. - { - layerParams.set("shift", values.at(0)); - id = dstNet.addLayer(name, "Power", layerParams); - } - else // is a vector - { - layerParams.blobs.resize(1, values); - id = dstNet.addLayer(name, "Shift", layerParams); - } - layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1) + ":1"), id, 1); + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 2); +} - // one input only - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else - { - layerParams.set("operation", "sum"); - if (type == "Sub") - { - static float subCoeffs[] = {1.f, -1.f}; - layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); - } +void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); - int id = dstNet.addLayer(name, "Eltwise", layerParams); - layer_id[name] = id; + DataLayout predictedLayout = data_layouts[name]; - for (int ii = 0; ii < num_inputs; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - CV_Error(Error::StsError, "Input layer not found: " + inp.name); - connect(layer_id, dstNet, inp, id, ii); - } - } + if (!hasLayerAttr(layer, "dtype") || + getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag. + { + netInputsNames.push_back(name); + layer_id[name] = 0; + } + tensorflow::TensorShapeProto shape; + if (hasLayerAttr(layer, "shape")) + shape = getLayerAttr(layer, "shape").shape(); + else if (hasLayerAttr(layer, "_output_shapes")) + { + tensorflow::AttrValue_ListValue list = getLayerAttr(layer, "_output_shapes").list(); + if (list.shape_size()) + shape = list.shape()[0]; + } + if (shape.dim_size()) + { + MatShape dims(shape.dim_size()); + for (int i = 0; i < dims.size(); ++i) + dims[i] = shape.dim(i).size(); + if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC) + { + std::swap(dims[1], dims[3]); // NHWC->NCWH + std::swap(dims[2], dims[3]); // NCWH->NCHW + if (dims[0] == -1) // It's OK to have undetermined batch size + dims[0] = 1; } - else if (type == "MatMul") + bool hasNeg = false; + for (int i = 0; i < dims.size() && !hasNeg; ++i) { - CV_CheckEQ(num_inputs, 2, ""); + hasNeg = dims[i] < 0; + } + if (!hasNeg) + netInputShapes.push_back(dims); + } +} - // For the object detection networks, TensorFlow Object Detection API - // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) - // order. We can manage it at DetectionOutput layer parsing predictions - // or shuffle last Faster-RCNN's matmul weights. - bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && - getLayerAttr(layer, "loc_pred_transposed").b(); +void TFImporter::parseSplit(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // TODO: determining axis index remapping by input dimensions order of input blob + // TODO: slicing input may be Const op + // TODO: slicing kernels for convolutions - in current implementation it is impossible + // TODO: add parsing num of slices parameter + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 2, ""); + // num_split + // 1st blob is dims tensor + int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + axis = toNCHW(axis); + layerParams.set("axis", axis); + + if (hasLayerAttr(layer, "num_split")) + layerParams.set("num_split", getLayerAttr(layer, "num_split").i()); + + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); +} - layerParams.set("bias_term", false); - layerParams.blobs.resize(1); +void TFImporter::parseSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "Slice" + // input: "input_node" + // input: "Slice/begin" + // input: "Slice/size" + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 3, ""); + Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_Assert_N(!begins.empty(), !sizes.empty()); + CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); + CV_CheckTypeEQ(sizes.type(), CV_32SC1, ""); + + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + { + // Swap NHWC parameters' order to NCHW. + std::swap(*begins.ptr(0, 2), *begins.ptr(0, 3)); + std::swap(*begins.ptr(0, 1), *begins.ptr(0, 2)); + std::swap(*sizes.ptr(0, 2), *sizes.ptr(0, 3)); + std::swap(*sizes.ptr(0, 1), *sizes.ptr(0, 2)); + } + layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); + layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total())); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); // FIXIT Use layers fusion instead - if (next_layers.empty()) - { - next_layers = getNextLayers(net, name, "Add"); - } - if (next_layers.size() == 1) { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; - int weights_layer_index = next_layers[0].second; - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - if (locPredTransposed) - { - const int numWeights = layerParams.blobs[1].total(); - float* biasData = reinterpret_cast(layerParams.blobs[1].data); - CV_Assert(numWeights % 4 == 0); - for (int i = 0; i < numWeights; i += 2) - { - std::swap(biasData[i], biasData[i + 1]); - } - } - } +void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 4, ""); + Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); + Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); + CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); + CV_CheckTypeEQ(ends.type(), CV_32SC1, ""); + CV_CheckTypeEQ(strides.type(), CV_32SC1, ""); + const int num = begins.total(); + CV_Assert_N(num == ends.total(), num == strides.total()); + + int end_mask = getLayerAttr(layer, "end_mask").i(); + for (int i = 0; i < num; ++i) + { + if (ends.at(i) < 0) + ends.at(i) -= 1; + if (end_mask & (1 << i)) + ends.at(i) = -1; + if (strides.at(i) != 1) + CV_Error(Error::StsNotImplemented, + format("StridedSlice with stride %d", strides.at(i))); + } + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + { + // Swap NHWC parameters' order to NCHW. + std::swap(begins.at(2), begins.at(3)); + std::swap(begins.at(1), begins.at(2)); + std::swap(ends.at(2), ends.at(3)); + std::swap(ends.at(1), ends.at(2)); + } + layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); + layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total())); - int kernel_blob_index = -1; - const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index); - const String kernelTensorName = layer.input(kernel_blob_index); - std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); - if (sharedWeightsIt == sharedWeights.end()) - { - blobFromTensor(kernelTensor, layerParams.blobs[0]); - releaseTensor(const_cast(&kernelTensor)); - sharedWeights[kernelTensorName] = layerParams.blobs[0]; - } - else - { - layerParams.blobs[0] = sharedWeightsIt->second; - } + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; - if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed - Mat data = layerParams.blobs[0].t(); - layerParams.blobs[0] = data.clone(); - } + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - layerParams.set("num_output", layerParams.blobs[0].size[0]); - if (locPredTransposed) - { - CV_Assert(layerParams.blobs[0].dims == 2); - for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2) - { - cv::Mat src = layerParams.blobs[0].row(i); - cv::Mat dst = layerParams.blobs[0].row(i + 1); - std::swap_ranges(src.begin(), src.end(), dst.begin()); - } - } +void TFImporter::parseMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); - int id = dstNet.addLayer(name, "InnerProduct", layerParams); - layer_id[name] = id; + CV_CheckGT(num_inputs, 0, ""); + int constId = -1; + for(int ii = 0; ii < num_inputs; ++ii) + { + Pin input = parsePin(layer.input(ii)); + if (value_id.find(input.name) != value_id.end()) + { + constId = ii; + break; + } + } + CV_Assert((constId != -1) || (num_inputs == 2)); - // one input only - int input_blob_index = kernel_blob_index == 0 ? 1 : 0; - connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + if (constId != -1) + { + // Multiplication by constant. + CV_CheckEQ(num_inputs, 2, ""); + Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); + CV_Assert(scaleMat.type() == CV_32FC1); + if (type == "RealDiv") + { + if (constId == 0) + CV_Error(Error::StsNotImplemented, "Division of constant over variable"); + scaleMat = 1.0f / scaleMat; } - else if (type == "Reshape") + + int id; + if (scaleMat.total() == 1) // is a scalar. { - CV_CheckGT(num_inputs, 0, ""); - Pin inpId = parsePin(layer.input(0)); - DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts); - // There are two possible implementations: reshape an input using - // predefined sizes or use a second input blob as a source of new shape. - if (value_id.find(layer.input(1)) != value_id.end()) + // Try to match with a LeakyRelu: + // node { + // name: "LeakyRelu/mul" + // op: "Mul" + // input: "LeakyRelu/alpha" + // input: "input" + // } + // node { + // name: "LeakyRelu/Maximum" + // op: "Maximum" + // input: "LeakyRelu/mul" + // input: "input" + // } + StrIntVector next_layers = getNextLayers(net, name, "Maximum"); + if (!next_layers.empty()) { - Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); - int newShapeSize = newShape.total(); - bool hasSwap = false; - if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2)) - { - // NHWC->NCHW - std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); - std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); - hasSwap = true; - } - if (inpLayout == DATA_LAYOUT_NHWC) - { - if (newShapeSize >= 2 || newShape.at(1) == 1) - { - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - addPermuteLayer(order, name + "/nhwc", inpId); - if (newShapeSize < 4) - { - inpLayout = DATA_LAYOUT_NCHW; - } - else - { - inpLayout = DATA_LAYOUT_NHWC; - } - } - } - layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShapeSize)); + int maximumLayerIdx = next_layers[0].second; - int id = dstNet.addLayer(name, "Reshape", layerParams); - layer_id[name] = id; + CV_Assert(net.node(maximumLayerIdx).input_size() == 2); - // one input only - connect(layer_id, dstNet, inpId, id, 0); - inpId = Pin(name); + // The input from the Mul layer can also be at index 1. + int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1; - if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && - newShapeSize == 4 && !hasSwap) - { - int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. - addPermuteLayer(order, name + "/nchw", inpId); - inpLayout = DATA_LAYOUT_NCHW; - } + ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false); + layers_to_ignore.insert(next_layers[0].first); - data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; + layerParams.set("negative_slope", scaleMat.at(0)); + id = dstNet.addLayer(name, "ReLU", layerParams); } else { - int id = dstNet.addLayer(name, "Reshape", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, inpId, id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - data_layouts[name] = inpLayout; + // Just a multiplication. + layerParams.set("scale", scaleMat.at(0)); + id = dstNet.addLayer(name, "Power", layerParams); } } - else if (type == "Flatten" || type == "Squeeze") + else // is a vector { - CV_CheckGT(num_inputs, 0, ""); - Pin inpId = parsePin(layer.input(0)); - int inpLayout = getDataLayout(layer.input(0), data_layouts); - if (type == "Squeeze") - { - CV_Assert(hasLayerAttr(layer, "squeeze_dims")); - const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims"); - std::vector dimsVector(dims.list().i_size()); - for (int i = 0; i < dimsVector.size(); ++i) - dimsVector[i] = dims.list().i(i); - - // Flatten layer can squeeze dimensions range into one. - std::sort(dimsVector.begin(), dimsVector.end()); - for (int i = 1; i < dimsVector.size(); ++i) - { - if (dimsVector[i] != dimsVector[i - 1] + 1) - CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration"); - } - int start = dimsVector.front() - 1, end = dimsVector.back(); - if (start == -1 && end == 0) // squeeze 0th dimension - { - start = 0; - end = 1; - } - layerParams.set("axis", start); - layerParams.set("end_axis", end); - } - if (inpLayout == DATA_LAYOUT_NHWC) + layerParams.blobs.resize(1, scaleMat); + + StrIntVector next_layers = getNextLayers(net, name, "Add"); + if (!next_layers.empty()) { - LayerParams permLP; - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - permLP.set("order", DictValue::arrayInt(order, 4)); + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); - std::string permName = name + "/nchw"; - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); + int weights_layer_index = next_layers[0].second; + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back()); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); } - int id = dstNet.addLayer(name, "Flatten", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + + if (hasLayerAttr(layer, "axis")) + layerParams.set("axis", getLayerAttr(layer, "axis").i()); + + id = dstNet.addLayer(name, "Scale", layerParams); } - else if (type == "Transpose") + layer_id[name] = id; + + Pin inp0 = parsePin(layer.input(0)); + if (layer_id.find(inp0.name) != layer_id.end()) + // First operand is a constant. + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + else + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); + } + else + { + // Check if all the inputs have the same shape. + bool equalInpShapes = true; + bool isShapeOnes = false; + MatShape outShape0; + for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++) { - CV_CheckGT(num_inputs, 0, ""); - Mat perm = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(perm.type() == CV_32SC1); - int* permData = (int*)perm.data; - if (perm.total() == 4) + Pin pin = parsePin(layer.input(ii)); + int inpId = layer_id.find(pin.name)->second; + + // Get input shape + MatShape outShape; + std::vector inpShapes, outShapes; + dstNet.getLayerShapes(netInputShapes, inpId, inpShapes, outShapes); + CV_CheckGT(static_cast(outShapes.size()), pin.blobIndex, ""); + outShape = outShapes[pin.blobIndex]; + + if (ii == 0) { - // Only NHWC <-> NCHW permutations are allowed. OpenCV is always - // keep NCHW layout this way. - int inpLayout = getDataLayout(layer.input(0), data_layouts); - std::string type = "Identity"; - if (inpLayout == DATA_LAYOUT_NHWC) - { - if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2) - { - // in TensorFlow: NHWC->NCHW - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NCHW; - } - else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) - { - // in TensorFlow: NHWC->NHWC - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NHWC; - } - else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1) - { - // in TensorFlow: NHWC->NCWH - // in OpenCV: NCHW->NCWH - int permData[] = {0, 1, 3, 2}; - layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters - type = "Permute"; - } - else - CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); - } - else if (inpLayout == DATA_LAYOUT_NCHW) - { - if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1) - { - // in TensorFlow: NCHW->NHWC - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NHWC; - } - else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) - { - // in TensorFlow: NCHW->NCHW - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NCHW; - } - else - CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); - } - int id = dstNet.addLayer(name, type, layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + outShape0 = outShape; } - else + else if (outShape != outShape0) { - layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - - int id = dstNet.addLayer(name, "Permute", layerParams); - layer_id[name] = id; - - // one input only - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + equalInpShapes = false; + isShapeOnes = isAllOnes(outShape, 2, outShape.size()) || + isAllOnes(outShape0, 2, outShape0.size()); + break; } } - else if (type == "Const") + + int id; + if (equalInpShapes || netInputShapes.empty() || (!equalInpShapes && isShapeOnes)) { + layerParams.set("operation", type == "RealDiv" ? "div" : "prod"); + id = dstNet.addLayer(name, "Eltwise", layerParams); } - else if (type == "LRN") + else { - CV_CheckGT(num_inputs, 0, ""); - if(hasLayerAttr(layer, "alpha")) { - layerParams.set("alpha", getLayerAttr(layer, "alpha").f()); - } - if(hasLayerAttr(layer, "beta")) { - layerParams.set("beta", getLayerAttr(layer, "beta").f()); - } - if(hasLayerAttr(layer, "depth_radius")) { - int radius = (int)getLayerAttr(layer, "depth_radius").i(); - layerParams.set("local_size", 2*radius + 1); - } - if(hasLayerAttr(layer, "bias")) { - layerParams.set("bias", getLayerAttr(layer, "bias").f()); - } - layerParams.set("norm_by_size", false); + if (type == "RealDiv") + CV_Error(Error::StsNotImplemented, "Division of non equal tensors"); + id = dstNet.addLayer(name, "Scale", layerParams); + } - int id = dstNet.addLayer(name, "LRN", layerParams); - layer_id[name] = id; + layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "Concat" || type == "ConcatV2") + for (int ii = 0; ii < num_inputs; ii++) { - CV_CheckGT(num_inputs, 0, ""); - int axisId = (type == "Concat" ? 0 : num_inputs - 1); - int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + CV_Error(Error::StsError, "Input layer not found: " + inp.name); + connect(layer_id, dstNet, inp, id, ii); + } + } +} - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - axis = toNCHW(axis); - else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC) - axis = toNCDHW(axis); - layerParams.set("axis", axis); +void TFImporter::parseFusedBatchNorm(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "FusedBatchNorm" + // input: "input" + // input: "BatchNorm/gamma" + // input: "BatchNorm/beta" + // input: "BatchNorm/moving_mean" + // input: "BatchNorm/moving_variance" - // input(0) or input(n-1) is concat_dim - int from = (type == "Concat" ? 1 : 0); - int to = (type == "Concat" ? num_inputs : num_inputs - 1); + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - for (int ii = from; ii < to; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - { - // There are constant inputs. - LayerParams lp; - lp.name = inp.name; - lp.type = "Const"; - lp.blobs.resize(1); - blobFromTensor(getConstBlob(layer, value_id, ii), lp.blobs.back()); - CV_Assert_N(!lp.blobs[0].empty(), lp.blobs[0].type() == CV_32F); - - int constInpId = dstNet.addLayer(lp.name, lp.type, lp); - layer_id[lp.name] = constInpId; - } - } + CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std"); + Pin inpId = parsePin(layer.input(0)); - int id = dstNet.addLayer(name, "Concat", layerParams); - layer_id[name] = id; + bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); - for (int ii = from; ii < to; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - CV_Error(Error::StsError, "Input layer not found: " + inp.name); - connect(layer_id, dstNet, inp, id, ii - from); - } - } - else if (type == "MaxPool" || type == "MaxPool3D") - { - CV_CheckGT(num_inputs, 0, ""); - layerParams.set("pool", "max"); + layerParams.blobs.resize(2); - setKSize(layerParams, layer); - setStrides(layerParams, layer); - setPadding(layerParams, layer); - // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU - layerParams.set("ceil_mode", false); + const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1); + if (!gammaTensor.tensor_content().empty()) + { + layerParams.blobs.resize(layerParams.blobs.size() + 1); + layerParams.set("has_weight", true); + blobFromTensor(gammaTensor, layerParams.blobs.back()); + } + else + layerParams.set("has_weight", false); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; + const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2); + if (!betaTensor.tensor_content().empty()) + { + layerParams.blobs.resize(layerParams.blobs.size() + 1); + layerParams.set("has_bias", true); + blobFromTensor(betaTensor, layerParams.blobs.back()); + } + else + layerParams.set("has_bias", false); - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "AvgPool" || type == "AvgPool3D") - { - CV_CheckGT(num_inputs, 0, ""); - layerParams.set("pool", "ave"); - layerParams.set("ave_pool_padded_area", false); - setKSize(layerParams, layer); - setStrides(layerParams, layer); - setPadding(layerParams, layer); + Mat mean, std; + if (isTraining) + { + if (layerParams.blobs.size() == 2) + CV_Error(Error::StsNotImplemented, "Cannot determine number " + "of parameters for batch normalization layer."); + mean = Mat::zeros(1, layerParams.blobs[2].total(), CV_32F); + std = Mat::ones(1, layerParams.blobs[2].total(), CV_32F); + + // Add an extra layer: Mean-Variance normalization + LayerParams mvnParams; + std::string mvnName = name + "/MVN"; + CV_Assert(layer_id.find(mvnName) == layer_id.end()); + int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams); + layer_id[mvnName] = mvnId; + connect(layer_id, dstNet, inpId, mvnId, 0); + inpId = Pin(mvnName); + } + else + { + blobFromTensor(getConstBlob(layer, value_id, 3), mean); + blobFromTensor(getConstBlob(layer, value_id, 4), std); + } + layerParams.blobs[0] = mean; + layerParams.blobs[1] = std; - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; + if (hasLayerAttr(layer, "epsilon")) + layerParams.set("eps", getLayerAttr(layer, "epsilon").f()); - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "MaxPoolGrad") - { - CV_CheckEQ(num_inputs, 3, ""); + int id = dstNet.addLayer(name, "BatchNorm", layerParams); + layer_id[name] = id; - layerParams.set("pool_k_h", 0); - layerParams.set("pool_k_w", 0); - layerParams.set("pool_stride_h", 0); - layerParams.set("pool_stride_w", 0); - layerParams.set("pool_pad_h", 0); - layerParams.set("pool_pad_w", 0); + // one input only + connect(layer_id, dstNet, inpId, id, 0); +} - int id = dstNet.addLayer(name, "MaxUnpool", layerParams); - layer_id[name] = id; +void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "Conv2DBackpropInput" + // input: "conv2d_transpose/output_shape" + // input: "weights" + // input: "input" - connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1) + ":1"), id, 1); - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 2); - } - else if (type == "Placeholder") - { - if (!hasLayerAttr(layer, "dtype") || - getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag. - { - netInputsNames.push_back(name); - layer_id[name] = 0; - } - tensorflow::TensorShapeProto shape; - if (hasLayerAttr(layer, "shape")) - shape = getLayerAttr(layer, "shape").shape(); - else if (hasLayerAttr(layer, "_output_shapes")) - { - tensorflow::AttrValue_ListValue list = getLayerAttr(layer, "_output_shapes").list(); - if (list.shape_size()) - shape = list.shape()[0]; - } - if (shape.dim_size()) - { - MatShape dims(shape.dim_size()); - for (int i = 0; i < dims.size(); ++i) - dims[i] = shape.dim(i).size(); - if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC) - { - std::swap(dims[1], dims[3]); // NHWC->NCWH - std::swap(dims[2], dims[3]); // NCWH->NCHW - if (dims[0] == -1) // It's OK to have undetermined batch size - dims[0] = 1; - } - bool hasNeg = false; - for (int i = 0; i < dims.size() && !hasNeg; ++i) - { - hasNeg = dims[i] < 0; - } - if (!hasNeg) - netInputShapes.push_back(dims); - } - } - else if (type == "Split") { - // TODO: determining axis index remapping by input dimensions order of input blob - // TODO: slicing input may be Const op - // TODO: slicing kernels for convolutions - in current implementation it is impossible - // TODO: add parsing num of slices parameter - CV_CheckEQ(num_inputs, 2, ""); - // num_split - // 1st blob is dims tensor - int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - axis = toNCHW(axis); - layerParams.set("axis", axis); - - if (hasLayerAttr(layer, "num_split")) - layerParams.set("num_split", getLayerAttr(layer, "num_split").i()); - - int id = dstNet.addLayer(name, "Slice", layerParams); - layer_id[name] = id; + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - // one input only - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - } - else if (type == "Slice") - { - // op: "Slice" - // input: "input_node" - // input: "Slice/begin" - // input: "Slice/size" - CV_CheckEQ(num_inputs, 3, ""); - Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_Assert_N(!begins.empty(), !sizes.empty()); - CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); - CV_CheckTypeEQ(sizes.type(), CV_32SC1, ""); - - if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - { - // Swap NHWC parameters' order to NCHW. - std::swap(*begins.ptr(0, 2), *begins.ptr(0, 3)); - std::swap(*begins.ptr(0, 1), *begins.ptr(0, 2)); - std::swap(*sizes.ptr(0, 2), *sizes.ptr(0, 3)); - std::swap(*sizes.ptr(0, 1), *sizes.ptr(0, 2)); - } - layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); - layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total())); + CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); - int id = dstNet.addLayer(name, "Slice", layerParams); - layer_id[name] = id; + layerParams.set("bias_term", false); + layerParams.blobs.resize(1); - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else if (type == "StridedSlice") + StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); + if (next_layers.size() == 1) + { + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); + + int weights_layer_index = next_layers[0].second; + + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); + } + + kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]); + + const int* kshape = layerParams.blobs[0].size.p; + const int kernelH = kshape[2]; + const int kernelW = kshape[3]; + layerParams.set("kernel_h", kernelH); + layerParams.set("kernel_w", kernelW); + layerParams.set("num_output", kshape[1]); + + setStrides(layerParams, layer); + setPadding(layerParams, layer); + + // For convolution layer, output shape computes as + // o = 1 + (i - k + 2*p) / s + // i - input size, o - output size, k - kernel size, p - pad, s - stride + // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2 + // considering that k is odd. + // SAME: o = 1 + (i - 1) / s + // VALID: o = 1 + i / s + // Deconvolution's layer output shape computes as + // SAME: o = 1 + (i - 1)*s + // VALID: o = (i - 1)*s + // If output_shape differs from formulas above then adjust padding is applied. + + const int strideY = layerParams.get("stride_h"); + const int strideX = layerParams.get("stride_w"); + Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); + const int outH = outShape.at(1); + const int outW = outShape.at(2); + if (layerParams.get("pad_mode") == "SAME") + { + layerParams.set("adj_w", (outW - 1) % strideX); + layerParams.set("adj_h", (outH - 1) % strideY); + } + else if (layerParams.get("pad_mode") == "VALID") + { + layerParams.set("adj_w", (outW - kernelW) % strideX); + layerParams.set("adj_h", (outH - kernelH) % strideY); + } + int id = dstNet.addLayer(name, "Deconvolution", layerParams); + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); +} + +void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "BlockLSTM" + // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps) + // input: "input" + // input: "lstm_block_wrapper/zeros" (ignore) + // input: "lstm_block_wrapper/zeros" (ignore) + // input: "lstm_block_wrapper/kernel" + // input: "lstm_block_wrapper/w_i_diag" + // input: "lstm_block_wrapper/w_f_diag" + // input: "lstm_block_wrapper/w_o_diag" + // input: "lstm_block_wrapper/bias" + + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes"); + + if (hasLayerAttr(layer, "forget_bias")) + layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f()); + + if (hasLayerAttr(layer, "forget_bias")) + { + float cellClip = getLayerAttr(layer, "cell_clip").f(); + // Cell clip disabled if it's negative. + if (cellClip >= 0) { - CV_CheckEQ(num_inputs, 4, ""); - Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); - Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); - CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); - CV_CheckTypeEQ(ends.type(), CV_32SC1, ""); - CV_CheckTypeEQ(strides.type(), CV_32SC1, ""); - const int num = begins.total(); - CV_Assert_N(num == ends.total(), num == strides.total()); - - int end_mask = getLayerAttr(layer, "end_mask").i(); - for (int i = 0; i < num; ++i) - { - if (ends.at(i) < 0) - ends.at(i) -= 1; - if (end_mask & (1 << i)) - ends.at(i) = -1; - if (strides.at(i) != 1) - CV_Error(Error::StsNotImplemented, - format("StridedSlice with stride %d", strides.at(i))); - } - if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - { - // Swap NHWC parameters' order to NCHW. - std::swap(begins.at(2), begins.at(3)); - std::swap(begins.at(1), begins.at(2)); - std::swap(ends.at(2), ends.at(3)); - std::swap(ends.at(1), ends.at(2)); - } - layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); - layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total())); + layerParams.set("use_cell_clip", true); + layerParams.set("cell_clip", cellClip); + } + } - int id = dstNet.addLayer(name, "Slice", layerParams); - layer_id[name] = id; + Mat W, Wh, Wx, b; + blobFromTensor(getConstBlob(layer, value_id, 4), W); + blobFromTensor(getConstBlob(layer, value_id, 8), b); + const int outSize = W.cols / 4; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + // IGFO->IFOG + float* weightData = (float*)W.data; + for (int i = 0; i < W.rows; ++i) + for (int j = 0; j < outSize; ++j) + { + std::swap(weightData[i * W.cols + 1 * outSize + j], + weightData[i * W.cols + 2 * outSize + j]); + std::swap(weightData[i * W.cols + 2 * outSize + j], + weightData[i * W.cols + 3 * outSize + j]); } - else if (type == "Mul" || type == "RealDiv") + Wx = W.rowRange(0, W.rows - outSize).t(); + Wh = W.rowRange(W.rows - outSize, W.rows).t(); + + layerParams.blobs.resize(3); + layerParams.blobs[0] = Wh; + layerParams.blobs[1] = Wx; + layerParams.blobs[2] = b; + + if (hasLayerAttr(layer, "use_peephole")) + { + bool usePeephole = getLayerAttr(layer, "use_peephole").b(); + if (usePeephole) { - CV_CheckGT(num_inputs, 0, ""); - int constId = -1; - for(int ii = 0; ii < num_inputs; ++ii) + layerParams.set("use_peephole", true); + layerParams.blobs.resize(6); + for (int i = 0; i < 3; ++i) { - Pin input = parsePin(layer.input(ii)); - if (value_id.find(input.name) != value_id.end()) - { - constId = ii; - break; - } + Mat w; + blobFromTensor(getConstBlob(layer, value_id, 5 + i), w); + w = w.reshape(1, w.total()); // Single column. + w = Mat::diag(w); // Make a diagonal matrix. + layerParams.blobs[3 + i] = w; } - CV_Assert((constId != -1) || (num_inputs == 2)); + } + } - if (constId != -1) - { - // Multiplication by constant. - CV_CheckEQ(num_inputs, 2, ""); - Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); - CV_Assert(scaleMat.type() == CV_32FC1); - if (type == "RealDiv") - { - if (constId == 0) - CV_Error(Error::StsNotImplemented, "Division of constant over variable"); - scaleMat = 1.0f / scaleMat; - } + int id = dstNet.addLayer(name, "LSTM", layerParams); + layer_id[name] = id; - int id; - if (scaleMat.total() == 1) // is a scalar. - { - // Try to match with a LeakyRelu: - // node { - // name: "LeakyRelu/mul" - // op: "Mul" - // input: "LeakyRelu/alpha" - // input: "input" - // } - // node { - // name: "LeakyRelu/Maximum" - // op: "Maximum" - // input: "LeakyRelu/mul" - // input: "input" - // } - StrIntVector next_layers = getNextLayers(net, name, "Maximum"); - if (!next_layers.empty()) - { - int maximumLayerIdx = next_layers[0].second; + // one input only + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); + data_layouts[name] = DATA_LAYOUT_UNKNOWN; +} - CV_Assert(net.node(maximumLayerIdx).input_size() == 2); +void TFImporter::parseResize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) +{ + tensorflow::NodeDef layer = layer_; + std::string name = layer.name(); + const std::string& type = layer.op(); + int num_inputs = layer.input_size(); - // The input from the Mul layer can also be at index 1. - int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1; + CV_CheckGT(num_inputs, 0, ""); + std::string convWeights = ""; + if (type == "FusedResizeAndPadConv2D") + { + // input: "mul_1" + // input: "decoder/ResizeBilinear/size" + // input: "decoder/decoder_conv0/Conv2D_dummy_paddings" + // input: "decoder/decoder_conv0/weights" + CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D"); - ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false); - layers_to_ignore.insert(next_layers[0].first); + Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode"); - layerParams.set("negative_slope", scaleMat.at(0)); - id = dstNet.addLayer(name, "ReLU", layerParams); - } - else - { - // Just a multiplication. - layerParams.set("scale", scaleMat.at(0)); - id = dstNet.addLayer(name, "Power", layerParams); - } - } - else // is a vector - { - layerParams.blobs.resize(1, scaleMat); - - StrIntVector next_layers = getNextLayers(net, name, "Add"); - if (!next_layers.empty()) - { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); - - int weights_layer_index = next_layers[0].second; - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back()); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); - } + convWeights = layer.input(3); + layer.mutable_input()->DeleteSubrange(2, 2); // FIXIT do NOT modify input model + num_inputs = layer.input_size(); + name = name + "/resize"; - if (hasLayerAttr(layer, "axis")) - layerParams.set("axis", getLayerAttr(layer, "axis").i()); + if (hasLayerAttr(layer, "resize_align_corners")) + { + // FIXIT do NOT modify input model + layer.mutable_attr()->insert( + ::google::protobuf::MapPair("align_corners", + getLayerAttr(layer, "resize_align_corners"))); + } + } + if (num_inputs == 2) + { + Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, ""); + layerParams.set("height", outSize.at(0, 0)); + layerParams.set("width", outSize.at(0, 1)); + } + else if (num_inputs == 3) + { + Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); + factorHeight.convertTo(factorHeight, CV_32F); + factorWidth.convertTo(factorWidth, CV_32F); + layerParams.set("zoom_factor_x", factorWidth.at(0)); + layerParams.set("zoom_factor_y", factorHeight.at(0)); + } + else + CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, ""); - id = dstNet.addLayer(name, "Scale", layerParams); - } - layer_id[name] = id; + if (type == "ResizeNearestNeighbor") + layerParams.set("interpolation", "nearest"); + else + layerParams.set("interpolation", "bilinear"); - Pin inp0 = parsePin(layer.input(0)); - if (layer_id.find(inp0.name) != layer_id.end()) - // First operand is a constant. - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - else - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - } - else - { - // Check if all the inputs have the same shape. - bool equalInpShapes = true; - bool isShapeOnes = false; - MatShape outShape0; - for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++) - { - Pin pin = parsePin(layer.input(ii)); - int inpId = layer_id.find(pin.name)->second; + if (hasLayerAttr(layer, "align_corners")) + layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b()); - // Get input shape - MatShape outShape; - std::vector inpShapes, outShapes; - dstNet.getLayerShapes(netInputShapes, inpId, inpShapes, outShapes); - CV_CheckGT(static_cast(outShapes.size()), pin.blobIndex, ""); - outShape = outShapes[pin.blobIndex]; + if (hasLayerAttr(layer, "half_pixel_centers")) + layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b()); - if (ii == 0) - { - outShape0 = outShape; - } - else if (outShape != outShape0) - { - equalInpShapes = false; - isShapeOnes = isAllOnes(outShape, 2, outShape.size()) || - isAllOnes(outShape0, 2, outShape0.size()); - break; - } - } + int id = dstNet.addLayer(name, "Resize", layerParams); + layer_id[name] = id; - int id; - if (equalInpShapes || netInputShapes.empty() || (!equalInpShapes && isShapeOnes)) - { - layerParams.set("operation", type == "RealDiv" ? "div" : "prod"); - id = dstNet.addLayer(name, "Eltwise", layerParams); - } - else - { - if (type == "RealDiv") - CV_Error(Error::StsNotImplemented, "Division of non equal tensors"); - id = dstNet.addLayer(name, "Scale", layerParams); - } + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - layer_id[name] = id; + // Step back to add convolution + if (type == "FusedResizeAndPadConv2D") + { + tensorflow::NodeDef conv = layer_; + conv.clear_input(); + conv.add_input(name); + conv.add_input(convWeights); + conv.set_op("Conv2D"); + parseNode(conv); + } +} - for (int ii = 0; ii < num_inputs; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - CV_Error(Error::StsError, "Input layer not found: " + inp.name); - connect(layer_id, dstNet, inp, id, ii); - } - } - } - else if (type == "FusedBatchNorm" || type == "FusedBatchNormV3") - { - // op: "FusedBatchNorm" - // input: "input" - // input: "BatchNorm/gamma" - // input: "BatchNorm/beta" - // input: "BatchNorm/moving_mean" - // input: "BatchNorm/moving_variance" - CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std"); - Pin inpId = parsePin(layer.input(0)); +void TFImporter::parseL2Normalize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "L2Normalize" + // input: "input" + // input: "reduction_indices" (axis) - bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - layerParams.blobs.resize(2); + CV_CheckEQ(num_inputs, 2, ""); + Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(reductionIndices.type() == CV_32SC1); - const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1); - if (!gammaTensor.tensor_content().empty()) - { - layerParams.blobs.resize(layerParams.blobs.size() + 1); - layerParams.set("has_weight", true); - blobFromTensor(gammaTensor, layerParams.blobs.back()); - } - else - layerParams.set("has_weight", false); - - const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2); - if (!betaTensor.tensor_content().empty()) - { - layerParams.blobs.resize(layerParams.blobs.size() + 1); - layerParams.set("has_bias", true); - blobFromTensor(betaTensor, layerParams.blobs.back()); - } - else - layerParams.set("has_bias", false); - - Mat mean, std; - if (isTraining) - { - if (layerParams.blobs.size() == 2) - CV_Error(Error::StsNotImplemented, "Cannot determine number " - "of parameters for batch normalization layer."); - mean = Mat::zeros(1, layerParams.blobs[2].total(), CV_32F); - std = Mat::ones(1, layerParams.blobs[2].total(), CV_32F); - - // Add an extra layer: Mean-Variance normalization - LayerParams mvnParams; - std::string mvnName = name + "/MVN"; - CV_Assert(layer_id.find(mvnName) == layer_id.end()); - int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams); - layer_id[mvnName] = mvnId; - connect(layer_id, dstNet, inpId, mvnId, 0); - inpId = Pin(mvnName); - } - else - { - blobFromTensor(getConstBlob(layer, value_id, 3), mean); - blobFromTensor(getConstBlob(layer, value_id, 4), std); - } - layerParams.blobs[0] = mean; - layerParams.blobs[1] = std; + const int numAxes = reductionIndices.total(); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + for (int i = 0; i < numAxes; ++i) + reductionIndices.at(i) = toNCHW(reductionIndices.at(i)); - if (hasLayerAttr(layer, "epsilon")) - layerParams.set("eps", getLayerAttr(layer, "epsilon").f()); + cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING); + for (int i = 1; i < numAxes; ++i) + { + CV_Assert(reductionIndices.at(i) == reductionIndices.at(i - 1) + 1); + // Axes have the same sign. + CV_Assert(reductionIndices.at(i) * reductionIndices.at(i - 1) >= 0); + } + layerParams.set("start_axis", reductionIndices.at(0)); + layerParams.set("end_axis", reductionIndices.at(numAxes - 1)); - int id = dstNet.addLayer(name, "BatchNorm", layerParams); - layer_id[name] = id; + int id = dstNet.addLayer(name, "Normalize", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - // one input only - connect(layer_id, dstNet, inpId, id, 0); - } - else if (type == "Conv2DBackpropInput") +void TFImporter::parsePriorBox(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 2, ""); + if (hasLayerAttr(layer, "min_size")) + layerParams.set("min_size", getLayerAttr(layer, "min_size").i()); + if (hasLayerAttr(layer, "max_size")) + layerParams.set("max_size", getLayerAttr(layer, "max_size").i()); + if (hasLayerAttr(layer, "flip")) + layerParams.set("flip", getLayerAttr(layer, "flip").b()); + if (hasLayerAttr(layer, "clip")) + layerParams.set("clip", getLayerAttr(layer, "clip").b()); + if (hasLayerAttr(layer, "offset")) + layerParams.set("offset", getLayerAttr(layer, "offset").f()); + if (hasLayerAttr(layer, "step")) + layerParams.set("step", getLayerAttr(layer, "step").f()); + + const std::string paramNames[] = {"variance", "aspect_ratio", "scales", + "width", "height"}; + for (int i = 0; i < 5; ++i) + { + if (hasLayerAttr(layer, paramNames[i])) { - // op: "Conv2DBackpropInput" - // input: "conv2d_transpose/output_shape" - // input: "weights" - // input: "input" - CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); + Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor()); + layerParams.set(paramNames[i], + DictValue::arrayReal((float*)values.data, values.total())); + } + } + int id = dstNet.addLayer(name, "PriorBox", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); + data_layouts[name] = DATA_LAYOUT_UNKNOWN; +} - layerParams.set("bias_term", false); - layerParams.blobs.resize(1); +void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); - if (next_layers.size() == 1) - { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); + CV_CheckGT(num_inputs, 0, ""); + if (hasLayerAttr(layer, "axis")) + layerParams.set("axis", getLayerAttr(layer, "axis").i()); - int weights_layer_index = next_layers[0].second; + int id = dstNet.addLayer(name, "Softmax", layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); - } +void TFImporter::parseCropAndResize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "CropAndResize" + // input: "input" + // input: "boxes" + // input: "sizes" - kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]); - - const int* kshape = layerParams.blobs[0].size.p; - const int kernelH = kshape[2]; - const int kernelW = kshape[3]; - layerParams.set("kernel_h", kernelH); - layerParams.set("kernel_w", kernelW); - layerParams.set("num_output", kshape[1]); - - setStrides(layerParams, layer); - setPadding(layerParams, layer); - - // For convolution layer, output shape computes as - // o = 1 + (i - k + 2*p) / s - // i - input size, o - output size, k - kernel size, p - pad, s - stride - // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2 - // considering that k is odd. - // SAME: o = 1 + (i - 1) / s - // VALID: o = 1 + i / s - // Deconvolution's layer output shape computes as - // SAME: o = 1 + (i - 1)*s - // VALID: o = (i - 1)*s - // If output_shape differs from formulas above then adjust padding is applied. - - const int strideY = layerParams.get("stride_h"); - const int strideX = layerParams.get("stride_w"); - Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); - const int outH = outShape.at(1); - const int outW = outShape.at(2); - if (layerParams.get("pad_mode") == "SAME") - { - layerParams.set("adj_w", (outW - 1) % strideX); - layerParams.set("adj_h", (outH - 1) % strideY); - } - else if (layerParams.get("pad_mode") == "VALID") - { - layerParams.set("adj_w", (outW - kernelW) % strideX); - layerParams.set("adj_h", (outH - kernelH) % strideY); - } - int id = dstNet.addLayer(name, "Deconvolution", layerParams); - layer_id[name] = id; + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + CV_CheckEQ(num_inputs, 3, ""); - // one input only - connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); - } - else if (type == "BlockLSTM") - { - // op: "BlockLSTM" - // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps) - // input: "input" - // input: "lstm_block_wrapper/zeros" (ignore) - // input: "lstm_block_wrapper/zeros" (ignore) - // input: "lstm_block_wrapper/kernel" - // input: "lstm_block_wrapper/w_i_diag" - // input: "lstm_block_wrapper/w_f_diag" - // input: "lstm_block_wrapper/w_o_diag" - // input: "lstm_block_wrapper/bias" - CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes"); - - if (hasLayerAttr(layer, "forget_bias")) - layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f()); - - if (hasLayerAttr(layer, "forget_bias")) - { - float cellClip = getLayerAttr(layer, "cell_clip").f(); - // Cell clip disabled if it's negative. - if (cellClip >= 0) - { - layerParams.set("use_cell_clip", true); - layerParams.set("cell_clip", cellClip); - } - } + Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, ""); - Mat W, Wh, Wx, b; - blobFromTensor(getConstBlob(layer, value_id, 4), W); - blobFromTensor(getConstBlob(layer, value_id, 8), b); - const int outSize = W.cols / 4; + layerParams.set("height", cropSize.at(0)); + layerParams.set("width", cropSize.at(1)); - // IGFO->IFOG - float* weightData = (float*)W.data; - for (int i = 0; i < W.rows; ++i) - for (int j = 0; j < outSize; ++j) - { - std::swap(weightData[i * W.cols + 1 * outSize + j], - weightData[i * W.cols + 2 * outSize + j]); - std::swap(weightData[i * W.cols + 2 * outSize + j], - weightData[i * W.cols + 3 * outSize + j]); - } - Wx = W.rowRange(0, W.rows - outSize).t(); - Wh = W.rowRange(W.rows - outSize, W.rows).t(); + int id = dstNet.addLayer(name, "CropAndResize", layerParams); + layer_id[name] = id; - layerParams.blobs.resize(3); - layerParams.blobs[0] = Wh; - layerParams.blobs[1] = Wx; - layerParams.blobs[2] = b; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); +} - if (hasLayerAttr(layer, "use_peephole")) - { - bool usePeephole = getLayerAttr(layer, "use_peephole").b(); - if (usePeephole) - { - layerParams.set("use_peephole", true); - layerParams.blobs.resize(6); - for (int i = 0; i < 3; ++i) - { - Mat w; - blobFromTensor(getConstBlob(layer, value_id, 5 + i), w); - w = w.reshape(1, w.total()); // Single column. - w = Mat::diag(w); // Make a diagonal matrix. - layerParams.blobs[3 + i] = w; - } - } - } +void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // Computes the mean of elements across dimensions of a tensor. + // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, + // else the reduced dimensions are retained with length 1. + // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1 + // if keepdims is false we use Flatten after Pooling: out_shape = NxC + // if indices = [0] we use a global pooling by indices. + // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input, + // if keepdims is false we use Flatten after Slice. + // Example: input_shape = NxCxHxW + // determine out shape: NxCxHxW --Slice--> 1xCxHxW + // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) + // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape + + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + + Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(indices.type() == CV_32SC1); + + // There are two attributes, "keepdims" and a deprecated "keep_dims". + bool keepDims = false; + if (hasLayerAttr(layer, "keepdims")) + keepDims = getLayerAttr(layer, "keepdims").b(); + else if (hasLayerAttr(layer, "keep_dims")) + keepDims = getLayerAttr(layer, "keep_dims").b(); + + if (indices.total() == 1 && indices.at(0) == 0) + { + LayerParams flattenLp; + std::string flattenName = name + "/flatten"; + CV_Assert(layer_id.find(flattenName) == layer_id.end()); + int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); + layer_id[flattenName] = flattenId; + connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); + + LayerParams reshapeLp; + std::string reshapeName = name + "/reshape"; + CV_Assert(layer_id.find(reshapeName) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + int newShape[] = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); + + int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); + layer_id[reshapeName] = reshapeId; + connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0); + + LayerParams avgLp; + std::string avgName = name + "/avg"; + CV_Assert(layer_id.find(avgName) == layer_id.end()); + avgLp.set("pool", type == "Mean" ? "ave" : "sum"); + // pooling kernel H x 1 + avgLp.set("global_pooling_h", true); + avgLp.set("kernel_w", 1); + int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); + layer_id[avgName] = avgId; + connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); + + LayerParams sliceLp; + std::string layerShapeName = name + "/slice"; + CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); + sliceLp.set("axis", 0); + int begin[] = {0}; + int size[] = {1}; + sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); + sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); + int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp); + layer_id[layerShapeName] = sliceId; + connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); + + if (!keepDims) + { + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", 0); + squeezeLp.set("end_axis", 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); + layerShapeName = squeezeName; + } - int id = dstNet.addLayer(name, "LSTM", layerParams); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, Pin(avgName), id, 0); + connect(layer_id, dstNet, Pin(layerShapeName), id, 1); + } else if (indices.total() == 1) { + int axis = toNCHW(indices.at(0)); + if (axis == 2 || axis == 3) + { + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); + layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - // one input only - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + if (!keepDims) + { + // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + std::string permName = name + "/nchw"; + Pin inpId = Pin(name); + addPermuteLayer(order, permName, inpId); + + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(permName), squeezeId, 0); + } } - else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear" || type == "FusedResizeAndPadConv2D") + else if (axis == 1) { - CV_CheckGT(num_inputs, 0, ""); - std::string convWeights = ""; - if (type == "FusedResizeAndPadConv2D") - { - // input: "mul_1" - // input: "decoder/ResizeBilinear/size" - // input: "decoder/decoder_conv0/Conv2D_dummy_paddings" - // input: "decoder/decoder_conv0/weights" - CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D"); - - Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode"); + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + Pin inpId = parsePin(layer.input(0)); + addPermuteLayer(order, name + "/nhwc", inpId); - convWeights = layer.input(3); - layer.mutable_input()->DeleteSubrange(2, 2); // FIXIT do NOT modify input model - num_inputs = layer.input_size(); - name = name + "/resize"; + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("kernel_h", 1); + layerParams.set("global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); - if (hasLayerAttr(layer, "resize_align_corners")) - { - // FIXIT do NOT modify input model - layer.mutable_attr()->insert( - ::google::protobuf::MapPair("align_corners", - getLayerAttr(layer, "resize_align_corners"))); - } - } - if (num_inputs == 2) - { - Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, ""); - layerParams.set("height", outSize.at(0, 0)); - layerParams.set("width", outSize.at(0, 1)); - } - else if (num_inputs == 3) + if (!keepDims) { - Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); - factorHeight.convertTo(factorHeight, CV_32F); - factorWidth.convertTo(factorWidth, CV_32F); - layerParams.set("zoom_factor_x", factorWidth.at(0)); - layerParams.set("zoom_factor_y", factorHeight.at(0)); + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + int channel_id = 3; // TF NHWC layout + squeezeLp.set("axis", channel_id - 1); + squeezeLp.set("end_axis", channel_id); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(name), squeezeId, 0); } else - CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, ""); - - if (type == "ResizeNearestNeighbor") - layerParams.set("interpolation", "nearest"); - else - layerParams.set("interpolation", "bilinear"); - - if (hasLayerAttr(layer, "align_corners")) - layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b()); - - if (hasLayerAttr(layer, "half_pixel_centers")) - layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b()); - - int id = dstNet.addLayer(name, "Resize", layerParams); - layer_id[name] = id; - - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - - // Step back to add convolution - if (type == "FusedResizeAndPadConv2D") { - tensorflow::NodeDef conv = layer_; - conv.clear_input(); - conv.add_input(name); - conv.add_input(convWeights); - conv.set_op("Conv2D"); - parseNode(conv); + int order[] = {0, 3, 1, 2}; // From NHWC to OpenCV's NCHW. + Pin inpId = parsePin(name); + addPermuteLayer(order, name + "/nchw", inpId); } } - else if (type == "L2Normalize") - { - // op: "L2Normalize" - // input: "input" - // input: "reduction_indices" (axis) - CV_CheckEQ(num_inputs, 2, ""); - Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(reductionIndices.type() == CV_32SC1); - - const int numAxes = reductionIndices.total(); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - for (int i = 0; i < numAxes; ++i) - reductionIndices.at(i) = toNCHW(reductionIndices.at(i)); - - cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING); - for (int i = 1; i < numAxes; ++i) - { - CV_Assert(reductionIndices.at(i) == reductionIndices.at(i - 1) + 1); - // Axes have the same sign. - CV_Assert(reductionIndices.at(i) * reductionIndices.at(i - 1) >= 0); - } - layerParams.set("start_axis", reductionIndices.at(0)); - layerParams.set("end_axis", reductionIndices.at(numAxes - 1)); + } else { + if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) + CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); - int id = dstNet.addLayer(name, "Normalize", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else if (type == "PriorBox") + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("global_pooling", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + + if (!keepDims) { - CV_CheckEQ(num_inputs, 2, ""); - if (hasLayerAttr(layer, "min_size")) - layerParams.set("min_size", getLayerAttr(layer, "min_size").i()); - if (hasLayerAttr(layer, "max_size")) - layerParams.set("max_size", getLayerAttr(layer, "max_size").i()); - if (hasLayerAttr(layer, "flip")) - layerParams.set("flip", getLayerAttr(layer, "flip").b()); - if (hasLayerAttr(layer, "clip")) - layerParams.set("clip", getLayerAttr(layer, "clip").b()); - if (hasLayerAttr(layer, "offset")) - layerParams.set("offset", getLayerAttr(layer, "offset").f()); - if (hasLayerAttr(layer, "step")) - layerParams.set("step", getLayerAttr(layer, "step").f()); - - const std::string paramNames[] = {"variance", "aspect_ratio", "scales", - "width", "height"}; - for (int i = 0; i < 5; ++i) - { - if (hasLayerAttr(layer, paramNames[i])) - { - Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor()); - layerParams.set(paramNames[i], - DictValue::arrayReal((float*)values.data, values.total())); - } - } - int id = dstNet.addLayer(name, "PriorBox", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + LayerParams flattenLp; + std::string flattenName = name + "/flatten"; + CV_Assert(layer_id.find(flattenName) == layer_id.end()); + int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); + layer_id[flattenName] = flattenId; + connect(layer_id, dstNet, Pin(name), flattenId, 0); } - else if (type == "Softmax") - { - CV_CheckGT(num_inputs, 0, ""); - if (hasLayerAttr(layer, "axis")) - layerParams.set("axis", getLayerAttr(layer, "axis").i()); + } +} - int id = dstNet.addLayer(name, "Softmax", layerParams); - layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "CropAndResize") - { - // op: "CropAndResize" - // input: "input" - // input: "boxes" - // input: "sizes" - CV_CheckEQ(num_inputs, 3, ""); +void TFImporter::parsePack(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: tf.stack(list of tensors, axis=0) + // Join a list of inputs along a new axis. + // The "axis" specifies the index of the new axis in the dimensions of the output. + // Example: given a list with "N" tensors of shape (C, H, W): + // if axis == 0 then the output tensor will have the shape (N, C, H, W), + // if axis == 1 then the output tensor will have the shape (C, N, H, W). + + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + CV_Assert(hasLayerAttr(layer, "axis")); + int dim = (int)getLayerAttr(layer, "axis").i(); + if (dim != 0) + CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); + + CV_Assert(hasLayerAttr(layer, "N")); + int num = (int)getLayerAttr(layer, "N").i(); + CV_CheckEQ(num_inputs, num, ""); + std::string base_name = name + "/reshape_"; + std::vector reshape_ids; + for (int i = 0; i < num; i++) { + std::ostringstream ss; + ss << i; + std::string reshape_name = base_name + ss.str(); + LayerParams reshapeLP; + reshapeLP.set("axis", dim); + reshapeLP.set("num_axes", 1); + int outShape[] = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); + int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); + layer_id[reshape_name] = id; + reshape_ids.push_back(id); + connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); + } - Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, ""); + layerParams.set("axis", dim); + int id = dstNet.addLayer(name, "Concat", layerParams); + layer_id[name] = id; - layerParams.set("height", cropSize.at(0)); - layerParams.set("width", cropSize.at(1)); + for (int li = 0; li < num; li++) + dstNet.connect(reshape_ids[li], 0, id, li); +} - int id = dstNet.addLayer(name, "CropAndResize", layerParams); - layer_id[name] = id; +void TFImporter::parseClipByValue(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "ClipByValue" + // input: "input" + // input: "mix" + // input: "max" - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - } - else if (type == "Mean" || type == "Sum") - { - // Computes the mean of elements across dimensions of a tensor. - // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, - // else the reduced dimensions are retained with length 1. - // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1 - // if keepdims is false we use Flatten after Pooling: out_shape = NxC - // if indices = [0] we use a global pooling by indices. - // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input, - // if keepdims is false we use Flatten after Slice. - // Example: input_shape = NxCxHxW - // determine out shape: NxCxHxW --Slice--> 1xCxHxW - // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) - // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape - CV_CheckGT(num_inputs, 0, ""); - - Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(indices.type() == CV_32SC1); - - // There are two attributes, "keepdims" and a deprecated "keep_dims". - bool keepDims = false; - if (hasLayerAttr(layer, "keepdims")) - keepDims = getLayerAttr(layer, "keepdims").b(); - else if (hasLayerAttr(layer, "keep_dims")) - keepDims = getLayerAttr(layer, "keep_dims").b(); - - if (indices.total() == 1 && indices.at(0) == 0) - { - LayerParams flattenLp; - std::string flattenName = name + "/flatten"; - CV_Assert(layer_id.find(flattenName) == layer_id.end()); - int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); - layer_id[flattenName] = flattenId; - connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); - - LayerParams reshapeLp; - std::string reshapeName = name + "/reshape"; - CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", 0); - reshapeLp.set("num_axes", 1); - int newShape[] = {1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); - - int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); - layer_id[reshapeName] = reshapeId; - connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0); - - LayerParams avgLp; - std::string avgName = name + "/avg"; - CV_Assert(layer_id.find(avgName) == layer_id.end()); - avgLp.set("pool", type == "Mean" ? "ave" : "sum"); - // pooling kernel H x 1 - avgLp.set("global_pooling_h", true); - avgLp.set("kernel_w", 1); - int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); - layer_id[avgName] = avgId; - connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); - - LayerParams sliceLp; - std::string layerShapeName = name + "/slice"; - CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); - sliceLp.set("axis", 0); - int begin[] = {0}; - int size[] = {1}; - sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); - sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); - int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp); - layer_id[layerShapeName] = sliceId; - connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - - if (!keepDims) - { - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", 0); - squeezeLp.set("end_axis", 1); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); - layerShapeName = squeezeName; - } + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - int id = dstNet.addLayer(name, "Reshape", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, Pin(avgName), id, 0); - connect(layer_id, dstNet, Pin(layerShapeName), id, 1); - } else if (indices.total() == 1) { - int axis = toNCHW(indices.at(0)); - if (axis == 2 || axis == 3) - { - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); - layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); - layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - - if (!keepDims) - { - // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC - LayerParams permLP; - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - std::string permName = name + "/nchw"; - Pin inpId = Pin(name); - addPermuteLayer(order, permName, inpId); - - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", indices.at(0)); - squeezeLp.set("end_axis", indices.at(0) + 1); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(permName), squeezeId, 0); - } - } - else if (axis == 1) - { - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - Pin inpId = parsePin(layer.input(0)); - addPermuteLayer(order, name + "/nhwc", inpId); - - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); - layerParams.set("kernel_h", 1); - layerParams.set("global_pooling_w", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, inpId, id, 0); - - if (!keepDims) - { - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - int channel_id = 3; // TF NHWC layout - squeezeLp.set("axis", channel_id - 1); - squeezeLp.set("end_axis", channel_id); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(name), squeezeId, 0); - } - else - { - int order[] = {0, 3, 1, 2}; // From NHWC to OpenCV's NCHW. - Pin inpId = parsePin(name); - addPermuteLayer(order, name + "/nchw", inpId); - } - } - } else { - if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) - CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); + CV_CheckEQ(num_inputs, 3, ""); - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); - layerParams.set("global_pooling", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, ""); + CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, ""); - if (!keepDims) - { - LayerParams flattenLp; - std::string flattenName = name + "/flatten"; - CV_Assert(layer_id.find(flattenName) == layer_id.end()); - int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); - layer_id[flattenName] = flattenId; - connect(layer_id, dstNet, Pin(name), flattenId, 0); - } - } - } - else if (type == "Pack") - { - // op: tf.stack(list of tensors, axis=0) - // Join a list of inputs along a new axis. - // The "axis" specifies the index of the new axis in the dimensions of the output. - // Example: given a list with "N" tensors of shape (C, H, W): - // if axis == 0 then the output tensor will have the shape (N, C, H, W), - // if axis == 1 then the output tensor will have the shape (C, N, H, W). - CV_CheckGT(num_inputs, 0, ""); - CV_Assert(hasLayerAttr(layer, "axis")); - int dim = (int)getLayerAttr(layer, "axis").i(); - if (dim != 0) - CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); - - CV_Assert(hasLayerAttr(layer, "N")); - int num = (int)getLayerAttr(layer, "N").i(); - CV_CheckEQ(num_inputs, num, ""); - std::string base_name = name + "/reshape_"; - std::vector reshape_ids; - for (int i = 0; i < num; i++) { - std::ostringstream ss; - ss << i; - std::string reshape_name = base_name + ss.str(); - LayerParams reshapeLP; - reshapeLP.set("axis", dim); - reshapeLP.set("num_axes", 1); - int outShape[] = {1, -1}; - reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); - int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); - layer_id[reshape_name] = id; - reshape_ids.push_back(id); - connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); - } + layerParams.set("min_value", minValue.at(0)); + layerParams.set("max_value", maxValue.at(0)); - layerParams.set("axis", dim); - int id = dstNet.addLayer(name, "Concat", layerParams); - layer_id[name] = id; + int id = dstNet.addLayer(name, "ReLU6", layerParams); + layer_id[name] = id; - for (int li = 0; li < num; li++) - dstNet.connect(reshape_ids[li], 0, id, li); - } - else if (type == "ClipByValue") - { - // op: "ClipByValue" - // input: "input" - // input: "mix" - // input: "max" - CV_CheckEQ(num_inputs, 3, ""); + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, ""); - CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, ""); +void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - layerParams.set("min_value", minValue.at(0)); - layerParams.set("max_value", maxValue.at(0)); + CV_CheckGT(num_inputs, 0, ""); + CV_Assert(hasLayerAttr(layer, "alpha")); + layerParams.set("negative_slope", getLayerAttr(layer, "alpha").f()); - int id = dstNet.addLayer(name, "ReLU6", layerParams); - layer_id[name] = id; + int id = dstNet.addLayer(name, "ReLU", layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else if (type == "LeakyRelu") - { - CV_CheckGT(num_inputs, 0, ""); - CV_Assert(hasLayerAttr(layer, "alpha")); - layerParams.set("negative_slope", getLayerAttr(layer, "alpha").f()); +void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + std::string dnnType = type; + if (type == "Abs") dnnType = "AbsVal"; + else if (type == "Tanh") dnnType = "TanH"; + else if (type == "Relu") dnnType = "ReLU"; + else if (type == "Relu6") dnnType = "ReLU6"; + else if (type == "Elu") dnnType = "ELU"; + + int id = dstNet.addLayer(name, dnnType, layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - int id = dstNet.addLayer(name, "ReLU", layerParams); - layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" || - type == "Relu" || type == "Elu" || type == "Exp" || - type == "Identity" || type == "Relu6") +void TFImporter::parseCustomLayer(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer. + // However we create a layer with the same type and rely that user defined a custom layer. + + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + // All the attributes are added to LayerParams. + google::protobuf::Map attr = layer.attr(); + for (google::protobuf::Map::const_iterator ai = attr.begin(); + ai != attr.end(); ++ai) + { + if (ai->second.value_case() == tensorflow::AttrValue::kS) // string + layerParams.set(ai->first, ai->second.s()); + if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64 + layerParams.set(ai->first, ai->second.i()); + if (ai->second.value_case() == tensorflow::AttrValue::kF) // float + layerParams.set(ai->first, ai->second.f()); + if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool + layerParams.set(ai->first, ai->second.b()); + } + + // All the Const input nodes are added to layer's blobs. + std::vector inputsNames; + for (int i = 0; i < num_inputs; ++i) + { + // Check if input is a Const node. + if (value_id.find(layer.input(i)) != value_id.end()) { - CV_CheckGT(num_inputs, 0, ""); - std::string dnnType = type; - if (type == "Abs") dnnType = "AbsVal"; - else if (type == "Tanh") dnnType = "TanH"; - else if (type == "Relu") dnnType = "ReLU"; - else if (type == "Relu6") dnnType = "ReLU6"; - else if (type == "Elu") dnnType = "ELU"; - - int id = dstNet.addLayer(name, dnnType, layerParams); - layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); + Mat blob = getTensorContent(getConstBlob(layer, value_id, i)); + layerParams.blobs.push_back(blob); } else - { - // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer. - // However we create a layer with the same type and rely that user defined a custom layer. + inputsNames.push_back(layer.input(i)); + } + int id = dstNet.addLayer(name, type, layerParams); + layer_id[name] = id; - // All the attributes are added to LayerParams. - google::protobuf::Map attr = layer.attr(); - for (google::protobuf::Map::const_iterator ai = attr.begin(); - ai != attr.end(); ++ai) - { - if (ai->second.value_case() == tensorflow::AttrValue::kS) // string - layerParams.set(ai->first, ai->second.s()); - if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64 - layerParams.set(ai->first, ai->second.i()); - if (ai->second.value_case() == tensorflow::AttrValue::kF) // float - layerParams.set(ai->first, ai->second.f()); - if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool - layerParams.set(ai->first, ai->second.b()); - } + for (int i = 0; i < inputsNames.size(); ++i) + { + connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i); + } +} - // All the Const input nodes are added to layer's blobs. - std::vector inputsNames; - for (int i = 0; i < num_inputs; ++i) - { - // Check if input is a Const node. - if (value_id.find(layer.input(i)) != value_id.end()) - { - Mat blob = getTensorContent(getConstBlob(layer, value_id, i)); - layerParams.blobs.push_back(blob); - } - else - inputsNames.push_back(layer.input(i)); - } - int id = dstNet.addLayer(name, type, layerParams); - layer_id[name] = id; +TFImporter::TFImporter(Net& net, const char *model, const char *config) + : dstNet(net), dispatch(buildDispatchMap()) +{ + if (model && model[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model); + ReadTFNetParamsFromBinaryFileOrDie(model, &netBin); + } + if (config && config[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config); + ReadTFNetParamsFromTextFileOrDie(config, &netTxt); + } - for (int i = 0; i < inputsNames.size(); ++i) - { - connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i); - } + populateNet(); +} + +TFImporter::TFImporter( + Net& net, + const char *dataModel, size_t lenModel, + const char *dataConfig, size_t lenConfig +) + : dstNet(net), dispatch(buildDispatchMap()) +{ + if (dataModel != NULL && lenModel > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)"); + ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin); + } + if (dataConfig != NULL && lenConfig > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)"); + ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt); + } + populateNet(); +} + +void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) +{ + MatShape shape; + blobShapeFromTensor(tensor, shape); + int dims = (int)shape.size(); + + // TODO: other blob types + CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || + tensor.dtype() == tensorflow::DT_HALF); + CV_Assert(dims == 4 || dims == 5); + + int out_c, input_c, depth, height, width; + if (dims == 4) + { + // REORDER kernel HWIO to OIHW + swap(shape[0], shape[2]); // IWHO + swap(shape[1], shape[3]); // IOHW + swap(shape[0], shape[1]); // OIHW + depth = 1; height = shape[2]; width = shape[3]; + } + else + { + // REORDER kernel DHWIO to OIDHW + swap(shape[0], shape[4]); // OHWID + swap(shape[1], shape[3]); // OIWHD + swap(shape[2], shape[4]); // OIDHW + depth = shape[2]; height = shape[3]; width = shape[4]; + } + out_c = shape[0]; input_c = shape[1]; + + dstBlob.create(shape, CV_32F); + + Mat tensorContent = getTensorContent(tensor, /*no copy*/false); + int size = tensorContent.total(); + CV_Assert(size == (int)dstBlob.total()); + + float *dstData = dstBlob.ptr(); + const float *data = reinterpret_cast(tensorContent.data); + + int total = out_c * input_c * depth * height * width; + for (int i_oc = 0; i_oc < out_c; i_oc++) { + for (int i_ic = 0; i_ic < input_c; i_ic++) { + for (int i_d = 0; i_d < depth; i_d++) { + for (int i_h = 0; i_h < height; i_h++) { + for (int i_w = 0; i_w < width; i_w++) { + int dst_i = input_c * depth * height * width * i_oc + + depth * height * width * i_ic + height * width * i_d + width * i_h + i_w; + int src_i = out_c * input_c * width * height * i_d + + out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc; + CV_Assert(dst_i < total); + CV_Assert(src_i < total); + dstData[dst_i] = data[src_i]; + } + } + } + } + } +} + +void TFImporter::connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, + const int input_layer_id, const int input_blob_id) +{ + std::map::const_iterator it = layers_name_id_map.find(outPin.name); + if (it == layers_name_id_map.end()) + CV_Error(Error::StsError, "Input layer not found: " + outPin.name); + + std::vector::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name); + int blobIndex; + if (inpNameIt == netInputsNames.end()) + blobIndex = outPin.blobIndex; + else + blobIndex = inpNameIt - netInputsNames.begin(); + network.connect(it->second, blobIndex, input_layer_id, input_blob_id); +} + +void TFImporter::connectToAllBlobs(const std::map& layer_id, Net& network, const Pin& outPin, + const int input_layer_id, const int input_blobs_count) +{ + for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++) + connect(layer_id, network, outPin, input_layer_id, input_blob_id); +} + +const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map const_layers, + int input_blob_index, int* actual_inp_blob_idx) { + if (input_blob_index == -1) { + for(int i = 0; i < layer.input_size(); i++) { + Pin input = parsePin(layer.input(i)); + if (const_layers.find(input.name) != const_layers.end()) { + if (input_blob_index != -1) + CV_Error(Error::StsError, "More than one input is Const op"); + + input_blob_index = i; + } + } + } + + if (input_blob_index == -1) + CV_Error(Error::StsError, "Const input blob for weights not found"); + + Pin kernel_inp = parsePin(layer.input(input_blob_index)); + if (const_layers.find(kernel_inp.name) == const_layers.end()) + CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) + + "] for node [" + layer.name() + "] not found"); + if (kernel_inp.blobIndex != 0) + CV_Error(Error::StsError, "Unsupported kernel input"); + + if(actual_inp_blob_idx) { + *actual_inp_blob_idx = input_blob_index; + } + + int nodeIdx = const_layers.at(kernel_inp.name); + if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name) + { + return netBin.node(nodeIdx).attr().at("value").tensor(); + } + else + { + CV_Assert_N(nodeIdx < netTxt.node_size(), + netTxt.node(nodeIdx).name() == kernel_inp.name); + return netTxt.node(nodeIdx).attr().at("value").tensor(); + } +} + +static void addConstNodes(tensorflow::GraphDef& net, std::map& const_layers, + std::set& layers_to_ignore) +{ + CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes..."); + for (int li = 0; li < net.node_size(); li++) + { + const tensorflow::NodeDef &layer = net.node(li); + String name = layer.name(); + String type = layer.op(); + + //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type); + + try + { + if (type == "Dequantize") + { + // Example of Dequantize node: + // name: "conv2d_1/bias" + // op: "Dequantize" + // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) + // input: "conv2d_1/bias_quantized_min" + // input: "conv2d_1/bias_quantized_max" + // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) + // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) + CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only"); + for (int i = 0; i < 3; ++i) + CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); + CV_Assert(hasLayerAttr(layer, "mode") && + getLayerAttr(layer, "mode").s() == "MIN_FIRST"); + + int tensorId = const_layers[layer.input(0)]; + int minId = const_layers[layer.input(1)]; + int maxId = const_layers[layer.input(2)]; + + tensorflow::TensorProto* tensor = net.mutable_node(tensorId) + ->mutable_attr()->at("value") + .mutable_tensor(); + CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, ""); + + Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); + Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); + CV_CheckEQ(qMin.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMin.type(), CV_32FC1, ""); + CV_CheckEQ(qMax.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMax.type(), CV_32FC1, ""); + + Mat content = getTensorContent(*tensor); + + float minVal = qMin.at(0); + float rangeScale = (qMax.at(0) - minVal) / 255; + CV_Assert(rangeScale >= 0); + content.convertTo(content, CV_32FC1, rangeScale, + rangeScale * cvRound(minVal / rangeScale)); + + tensor->set_dtype(tensorflow::DT_FLOAT); + tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); + + net.mutable_node(tensorId)->set_name(name); + CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); + layers_to_ignore.insert(name); + continue; + } + else if (type != "Const") + continue; // only Const parameters are supported + + if (layer.attr().find("value") != layer.attr().end()) + { + CV_Assert(const_layers.insert(std::make_pair(name, li)).second); + } + layers_to_ignore.insert(name); + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what()); + throw; + } + } + CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size()); +} + +// If all inputs of specific layer have the same data layout we can say that +// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. +DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) +{ + DataLayout layout = getDataLayout(layer); + if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); + return layout; + } + + // Determine layout by layer's inputs + for (int i = 0, n = layer.input_size(); i < n; ++i) + { + std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) + return DATA_LAYOUT_UNKNOWN; + } + else + layout = it->second; + } + } + + if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); + return layout; + } + + // Determine layout by layer's consumers recursively. + std::map::const_iterator it = data_layouts.find(layer.name()); + CV_Assert(it != data_layouts.end()); + return it->second; +} + +void TFImporter::populateNet() +{ + CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); + + CV_LOG_INFO(NULL, "DNN/TF: parsing model" + << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netBin.node_size() + ); + + if (netTxt.ByteSize()) + { + CV_LOG_INFO(NULL, "DNN/TF: parsing config" + << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netTxt.node_size() + ); + + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + RemoveIdentityOps(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes"); + + sortByExecutionOrder(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes"); + } + else + { + removePhaseSwitches(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes"); + + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + + simplifySubgraphs(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes"); + sortByExecutionOrder(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes"); + } + + tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + + int layersSize = net.node_size(); + + // Pre-fill data layouts where they are set explicitly. + // Assuming that nodes are in topological order + for (int i = layersSize - 1; i >= 0; --i) + { + const tensorflow::NodeDef& layer = net.node(i); + std::string name = layer.name(); + + CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout..."); + + try + { + DataLayout layout = getDataLayout(layer); + std::map::iterator it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + { + it->second = DATA_LAYOUT_UNKNOWN; + layout = DATA_LAYOUT_UNKNOWN; + } + } + else + layout = it->second; + } + else + data_layouts[name] = layout; + + // Specify input layers to have the same data layout. + for (int j = 0; j < layer.input_size(); ++j) + { + name = getNodeName(layer.input(j)); + it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + it->second = DATA_LAYOUT_UNKNOWN; + } + } + else + data_layouts[name] = layout; + } + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what()); + throw; + } + } + + addConstNodes(netBin, value_id, layers_to_ignore); + addConstNodes(netTxt, value_id, layers_to_ignore); + + + for (int li = 0; li < layersSize; li++) + { + const tensorflow::NodeDef& layer = net.node(li); + + const std::string name = layer.name(); + const std::string type = layer.op(); + const int ninputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + + parseNode(layer); + } + + for (size_t i = 0; i < netInputsNames.size(); i++) + { + CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'"); + CV_Assert(!netInputsNames[i].empty()); + } + dstNet.setInputsNames(netInputsNames); + CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); +} + +void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) +{ + LayerParams permLP; + permLP.set("order", DictValue::arrayInt(order, 4)); + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); +} + +void TFImporter::parseNode(const tensorflow::NodeDef& layer) +{ + tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + + const std::string& name = layer.name(); + const std::string& type = layer.op(); + + try + { + LayerParams layerParams; + + if (layers_to_ignore.find(name) != layers_to_ignore.end()) + { + CV_LOG_DEBUG(NULL, "DNN/TF: ignored"); + return; + } + + DataLayout predictedLayout = predictOutputDataLayout(layer); + data_layouts[name] = predictedLayout; + + DispatchMap::const_iterator iter = dispatch.find(type); + if (iter != dispatch.end()) + { + ((*this).*(iter->second))(net, layer, layerParams); + } + else + { + parseCustomLayer(net, layer, layerParams); } } catch (const std::exception& e)