diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 4bf829cf2082fbe41a2f42f428962885ebec7bf3..8ed50ed815d63a5ca6540f601509a03912d1e5d8 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -81,7 +81,7 @@ public: Size outSize = Size(outputs[0].size[3], outputs[0].size[2]); getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize, - kernel, stride, padMode, pad); + kernel, stride, padMode, dilation, pad); } bool hasBias() const @@ -183,7 +183,7 @@ public: } else { - getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, out); + getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out); } int ngroups = inpCn / blobs[0].size[1]; diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index d11739a8fbadf5dd92cf5a468e71f8d34aa1cedc..8552a4a0ccc43fa0f990ea865bf56e78213c6c87 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -167,12 +167,12 @@ void getConvolutionKernelParams(const LayerParams ¶ms, int &kernelH, int &ke // we pad more on the right and bottom than on the top and left. void getConvPoolOutParams(const Size& inp, const Size &kernel, const Size &stride, const String &padMode, - Size& out) + const Size &dilation, Size& out) { if (padMode == "VALID") { - out.height = (inp.height - kernel.height + stride.height) / stride.height; - out.width = (inp.width- kernel.width + stride.width) / stride.width; + out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height; + out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width; } else if (padMode == "SAME") { @@ -187,7 +187,7 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel, void getConvPoolPaddings(const Size& inp, const Size& out, const Size &kernel, const Size &stride, - const String &padMode, Size &pad) + const String &padMode, const Size &dilation, Size &pad) { if (padMode == "VALID") { @@ -195,8 +195,8 @@ void getConvPoolPaddings(const Size& inp, const Size& out, } else if (padMode == "SAME") { - int Ph = std::max(0, (out.height - 1) * stride.height + kernel.height - inp.height); - int Pw = std::max(0, (out.width - 1) * stride.width + kernel.width - inp.width); + int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height); + int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width); // For odd values of total padding, add more padding at the 'right' // side of the given dimension. pad = cv::Size(Pw / 2, Ph / 2); diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index c206f9faf18ee8c5421dee0194d5e276e5d96332..46170e9109ba450ff602103bb7d7fee05e5658f1 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -64,11 +64,11 @@ void getPoolingKernelParams(const LayerParams ¶ms, int &kernelH, int &kernel void getConvPoolOutParams(const Size& inp, const Size &kernel, const Size &stride, const String &padMode, - Size& out); + const Size &dilation, Size& out); void getConvPoolPaddings(const Size& inp, const Size& out, const Size &kernel, const Size &stride, - const String &padMode, Size &pad); + const String &padMode, const Size &dilation, Size &pad); } } diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index ce99552b825fe5cd65f8fc2c265ae5a0f16cb73c..11b320d17a6a2e7c7f00815dc919fbece0bd4325 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -93,7 +93,7 @@ public: kernel = inp; } - getConvPoolPaddings(inp, out, kernel, stride, padMode, pad); + getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad); } virtual bool supportBackend(int backendId) @@ -592,8 +592,7 @@ public: } else { - getConvPoolOutParams(in, kernel, stride, - padMode, out); + getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out); } outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size()); diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index de278423718549331f72edc4e7c0363a5ccebeeb..d7fd0767557127134b194010bfea27677e028dda 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -88,6 +88,8 @@ static Mat getTensorContent(const tensorflow::TensorProto &tensor) return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone(); case tensorflow::DT_DOUBLE: return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone(); + case tensorflow::DT_INT32: + return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone(); case tensorflow::DT_HALF: { Mat halfs; @@ -563,7 +565,7 @@ void TFImporter::populateNet(Net dstNet) for (int li = 0; li < layersSize; li++) { - const tensorflow::NodeDef &layer = net.node(li); + tensorflow::NodeDef layer = net.node(li); String name = layer.name(); String type = layer.op(); LayerParams layerParams; @@ -571,8 +573,38 @@ void TFImporter::populateNet(Net dstNet) if(layers_to_ignore.find(li) != layers_to_ignore.end()) continue; - if (type == "Conv2D") + if (type == "Conv2D" || type == "SpaceToBatchND") { + // The first node of dilated convolution subgraph. + // Extract input node, dilation rate and paddings. + std::string input = layer.input(0); + if (type == "SpaceToBatchND") + { + // op: "SpaceToBatchND" + // input: "input" + // input: "SpaceToBatchND/block_shape" + // input: "SpaceToBatchND/paddings" + CV_Assert(layer.input_size() == 3); + + DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); + CV_Assert(dilation.size() == 2 && dilation.get(0) == dilation.get(1)); + layerParams.set("dilation", dilation.get(0)); + + Mat paddings; + parseTensor(getConstBlob(layer, value_id, 2), paddings); + + // paddings is a 2x2 matrix: [[top, bot], [left, right]] + layerParams.set("pad_h", paddings.at(0)); + layerParams.set("pad_w", paddings.at(2)); + + StrIntVector next_layers = getNextLayers(net, name, "Conv2D"); + CV_Assert(next_layers.size() == 1); + layer = net.node(next_layers[0].second); + layers_to_ignore[next_layers[0].second] = next_layers[0].first; + name = layer.name(); + type = layer.op(); + } + layerParams.set("bias_term", false); layerParams.blobs.resize(1); @@ -597,11 +629,21 @@ void TFImporter::populateNet(Net dstNet) setStrides(layerParams, layer); setPadding(layerParams, layer); + // The final node of dilated convolution subgraph. + next_layers = getNextLayers(net, name, "BatchToSpaceND"); + if (!next_layers.empty()) + { + layerParams.set("pad_mode", ""); // We use padding values. + CV_Assert(next_layers.size() == 1); + ExcludeLayer(net, next_layers[0].second, 0, false); + layers_to_ignore[next_layers[0].second] = next_layers[0].first; + } + int id = dstNet.addLayer(name, "Convolution", layerParams); layer_id[name] = id; // one input only - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + connect(layer_id, dstNet, parsePin(input), id, 0); } else if (type == "BiasAdd" || type == "Add") { diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 4d2c64aa4357e22c9d1e3916044dc2b1e420a9ae..6d5df73d3c618dc71a314b06df8fd37dc3f5675a 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -96,6 +96,8 @@ static void runTensorFlowNet(const std::string& prefix, TEST(Test_TensorFlow, single_conv) { runTensorFlowNet("single_conv"); + runTensorFlowNet("atrous_conv2d_valid"); + runTensorFlowNet("atrous_conv2d_same"); } TEST(Test_TensorFlow, padding)