From d695208727c8bfcf3e1524a30ebe7a4d4ca24845 Mon Sep 17 00:00:00 2001 From: Liubov Batanina Date: Mon, 3 Aug 2020 21:02:49 +0300 Subject: [PATCH] Merge pull request #17967 from l-bat:non_const_weights_for_conv * Supported convolution with non-const weights * Fix opencl blobs * Update tests --- modules/dnn/src/layers/convolution_layer.cpp | 166 +++++++++++++------ modules/dnn/src/onnx/onnx_importer.cpp | 9 +- modules/dnn/test/test_onnx_importer.cpp | 56 +++++++ 3 files changed, 178 insertions(+), 53 deletions(-) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 17fadd93ec..b6532f23d3 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -106,18 +106,19 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); - CV_Assert(inputs.size() > 0); + CV_Assert((inputs.size() > outputs.size() && blobs.empty()) || + (!inputs.empty() && (blobs.size() == 1 || blobs.size() == 2))); + MatSize weightShape = blobs.empty() ? inputs[1].size : blobs[0].size; - CV_Assert(blobs.size() == 1 || blobs.size() == 2); CV_Assert(inputs[0].dims == outputs[0].dims); - CV_Assert(blobs[0].dims == kernel_size.size() + 2); + CV_Assert(weightShape.dims() == kernel_size.size() + 2); for (int i = 0; i < kernel_size.size(); i++) { - CV_Assert(blobs[0].size[i + 2] == kernel_size[i]); + CV_Assert(weightShape[i + 2] == kernel_size[i]); } const Mat &input = inputs[0]; CV_Assert((input.dims == 4 || input.dims == 5) && (input.type() == CV_32F || input.type() == CV_16S)); - for (size_t i = 0; i < inputs.size(); i++) + for (size_t i = 0; i < outputs.size(); i++) { CV_Assert(inputs[i].type() == input.type()); CV_Assert((inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); @@ -245,6 +246,7 @@ public: MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE { + CV_Assert(!blobs.empty()); int dims = inpShape.size(); int inpD = dims == 5 ? inpShape[2] : 1; int inpH = inpShape[dims - 2]; @@ -262,12 +264,14 @@ public: { if (kernel_size.size() == 3) return preferableTarget == DNN_TARGET_CPU; + if ((backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || preferableTarget != DNN_TARGET_MYRIAD) && blobs.empty()) + return false; return (preferableTarget != DNN_TARGET_MYRIAD || dilation.width == dilation.height); } else #endif return (kernel_size.size() == 3 && preferableTarget == DNN_TARGET_CPU && backendId == DNN_BACKEND_OPENCV) || - (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || backendId == DNN_BACKEND_HALIDE)); + (kernel_size.size() == 2 && (backendId == DNN_BACKEND_OPENCV || (backendId == DNN_BACKEND_HALIDE && !blobs.empty()))); } bool getMemoryShapes(const std::vector &inputs, @@ -275,16 +279,16 @@ public: std::vector &outputs, std::vector &internals) const CV_OVERRIDE { - CV_Assert(blobs.size() != 0); - CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]); - CV_Assert(inputs.size() == (size_t)1); + CV_Assert(!blobs.empty() || inputs.size() > 1); + const int* weightShape = blobs.empty() ? &inputs[1][0] : blobs[0].size.p; + CV_Assert(!hasBias() || blobs[1].total() == (size_t)weightShape[0]); internals.clear(); CV_Assert(inputs.size() != 0); std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); - int outCn = blobs[0].size[0]; + int outCn = weightShape[0]; std::vector outShape; outShape.push_back(inputs[0][0]); outShape.push_back(outCn); @@ -300,10 +304,10 @@ public: getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape); } - int ngroups = inpCn / blobs[0].size[1]; - if (ngroups == 0 || ngroups * blobs[0].size[1] != inpCn) + int ngroups = inpCn / weightShape[1]; + if (ngroups == 0 || ngroups * weightShape[1] != inpCn) CV_Error(Error::StsError, format("Number of input channels should " - "be multiple of %d but got %d", blobs[0].size[1], inpCn)); + "be multiple of %d but got %d", weightShape[1], inpCn)); CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0); outputs.resize(1, outShape); @@ -315,15 +319,15 @@ public: { BaseConvolutionLayerImpl::finalize(inputs_arr, outputs_arr); - CV_Assert(!blobs.empty()); - const int outCn = blobs[0].size[0]; + std::vector inputs; + inputs_arr.getMatVector(inputs); // prepare weightsMat where each row is aligned and has enough zero padding on the right to // use vectorized (i.e. with intrinsics) loops without tail processing - Mat wm = blobs[0].reshape(1, outCn); + Mat wm = blobs.empty() ? inputs[1].reshape(1, numOutput) : blobs[0].reshape(1, numOutput); if( wm.step1() % VEC_ALIGN != 0 ) { int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); - Mat wm_buffer = Mat(outCn, newcols, wm.type()); + Mat wm_buffer = Mat(numOutput, newcols, wm.type()); Mat wm_padding = wm_buffer.colRange(wm.cols, newcols); wm_padding.setTo(Scalar::all(0.)); Mat wm_aligned = wm_buffer.colRange(0, wm.cols); @@ -331,18 +335,18 @@ public: wm = wm_aligned; } weightsMat = wm; - weightsMultipliers.assign(outCn, 1.0); + weightsMultipliers.assign(numOutput, 1.0); - Mat biasMat = hasBias() ? blobs[1].reshape(1, outCn) : Mat(); - biasvec.resize(outCn+2); + Mat biasMat = hasBias() ? blobs[1].reshape(1, numOutput) : Mat(); + biasvec.resize(numOutput+2); if( biasMat.empty() ) { - for(int i = 0; i < outCn; i++ ) + for(int i = 0; i < numOutput; i++ ) biasvec[i] = 0.f; } else { - for(int i = 0; i < outCn; i++ ) + for(int i = 0; i < numOutput; i++ ) biasvec[i] = biasMat.at(i); } #ifdef HAVE_OPENCL @@ -352,7 +356,7 @@ public: bool setActivation(const Ptr& layer) CV_OVERRIDE { - if (!activ.empty() && !layer.empty()) + if ((!activ.empty() && !layer.empty()) || blobs.empty()) return false; activ = layer; @@ -537,37 +541,48 @@ public: virtual Ptr initNgraph(const std::vector > &inputs, const std::vector >& nodes) CV_OVERRIDE { - CV_Assert_N(inputs.size() == 1, nodes.size() == 1); + CV_Assert_N(inputs.size() >= 1, nodes.size() >= 1); auto& ieInpNode = nodes[0].dynamicCast()->node; std::vector dims = ieInpNode->get_shape(); CV_Assert(dims.size() == 4 || dims.size() == 5); + std::shared_ptr ieWeights = nodes.size() > 1 ? nodes[1].dynamicCast()->node : nullptr; const int inpCn = dims[1]; - const int outCn = blobs[0].size[0]; - const int inpGroupCn = blobs[0].size[1]; + const int inpGroupCn = nodes.size() > 1 ? ieWeights->get_shape()[1] : blobs[0].size[1]; const int group = inpCn / inpGroupCn; - std::vector kernel_shape = getShape(blobs[0]); + std::vector kernel_shape; if (group != 1) { - kernel_shape[0] /= group; - kernel_shape.insert(kernel_shape.begin(), group); + kernel_shape.push_back(group); } + kernel_shape.push_back(numOutput / group); + kernel_shape.push_back(inpCn / group); + std::copy(kernel_size.begin(), kernel_size.end(), back_inserter(kernel_shape)); - auto ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, blobs[0].data); - if (fusedWeights) + if (nodes.size() == 1) { - if (weightsMat.isContinuous()) - { - ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, weightsMat.data); - } - else + ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, blobs[0].data); + if (fusedWeights) { - Mat newWeights; - Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / outCn); - cvWeights.copyTo(newWeights); - ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, newWeights.data); + if (weightsMat.isContinuous()) + { + ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, weightsMat.data); + } + else + { + Mat newWeights; + Mat cvWeights = weightsMat.colRange(0, blobs[0].total() / numOutput); + cvWeights.copyTo(newWeights); + ieWeights = std::make_shared(ngraph::element::f32, kernel_shape, newWeights.data); + } } } + else + { + auto shape = std::make_shared(ngraph::element::i64, + ngraph::Shape{kernel_shape.size()}, kernel_shape.data()); + ieWeights = std::make_shared(ieWeights, shape, true); + } ngraph::op::PadType pad_type = ngraph::op::PadType::EXPLICIT; if (!padMode.empty()) @@ -592,11 +607,21 @@ public: pad_type); } - if (hasBias() || fusedBias) + if (hasBias() || fusedBias || nodes.size() == 3) { std::vector shape(conv_node->get_shape().size(), 1); - shape[1] = outCn; - auto bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), biasvec.data()); + shape[1] = conv_node->get_shape()[1]; + std::shared_ptr bias; + if (nodes.size() == 3) + { + auto bias_shape = std::make_shared(ngraph::element::i64, + ngraph::Shape{shape.size()}, shape.data()); + bias = std::make_shared(nodes[2].dynamicCast()->node, bias_shape, true); + } + else + { + bias = std::make_shared(ngraph::element::f32, ngraph::Shape(shape), biasvec.data()); + } auto conv_bias = std::make_shared(conv_node, bias, ngraph::op::AutoBroadcastType::NUMPY); return Ptr(new InfEngineNgraphNode(conv_bias)); } @@ -1103,6 +1128,26 @@ public: for (int i = 0; i < inputs.size(); ++i) CV_Assert(inputs[i].u != outputs[0].u); + if (blobs.empty()) + { + size_t n = inputs.size() - 1; + umat_blobs.resize(n); + for (size_t i = 0; i < n; i++) + { + if (use_half) + { + Mat matFP32; + convertFp16(inputs[i + 1], matFP32); + matFP32.copyTo(umat_blobs[i]); + } + else + { + inputs[i + 1].copyTo(umat_blobs[i]); + } + } + inputs.resize(1); + } + if (umat_blobs.empty()) { size_t n = blobs.size(); @@ -1113,7 +1158,7 @@ public: } } - if (convolutionOp.empty()) + if (convolutionOp.empty() || blobs.empty()) { OCL4DNNConvConfig config; config.in_shape = shape(inputs[0]); @@ -1123,7 +1168,7 @@ public: config.stride = stride; config.dilation = dilation; config.group = inputs[0].size[1] / umat_blobs[0].size[1]; - config.bias_term = (hasBias()) ? true : false; + config.bias_term = umat_blobs.size() == 2; config.use_half = use_half; convolutionOp = Ptr >(new OCL4DNNConvSpatial(config)); @@ -1250,16 +1295,37 @@ public: inputs_arr.getMatVector(inputs); outputs_arr.getMatVector(outputs); + int outCn = blobs.empty() ? inputs[1].size[0] : blobs[0].size[0]; + // Need to align non-const blobs + if (blobs.empty()) + { + Mat wm = inputs[1].reshape(1, outCn); + if( wm.step1() % VEC_ALIGN != 0 ) + { + wm.copyTo(weightsMat); + if (inputs.size() > 2) + { + Mat biasMat = inputs[2].reshape(1, outCn); + biasMat.col(0).copyTo(biasvec); + biasvec.resize(outCn + 2); + } + else + { + biasvec.resize(outCn + 2, 0); + } + } + } + /*printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3], kernel.width, kernel.height, pad.width, pad.height, stride.width, stride.height, dilation.width, dilation.height);*/ - CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % blobs[0].size[1] == 0, + int inpGroupCn = blobs.empty() ? inputs[1].size[1] : blobs[0].size[1]; + CV_Assert_N(inputs.size() >= (size_t)1, inputs[0].size[1] % inpGroupCn == 0, outputs.size() == 1, inputs[0].data != outputs[0].data); - int ngroups = inputs[0].size[1]/blobs[0].size[1]; + int ngroups = inputs[0].size[1] / inpGroupCn; CV_Assert(outputs[0].size[1] % ngroups == 0); - int outCn = blobs[0].size[0]; reluslope.clear(); if( activ ) @@ -1328,11 +1394,11 @@ public: virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { - CV_Assert(inputs.size() == outputs.size()); + CV_Assert(inputs.size() == outputs.size() || inputs.size() == outputs.size() + blobs.size()); int64 flops = 0; int karea = std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies()); - for (int i = 0; i < inputs.size(); i++) + for (int i = 0; i < outputs.size(); i++) { flops += total(outputs[i])*(CV_BIG_INT(2)*karea*inputs[i][1] + 1); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 220cae813e..407dcdc570 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1003,10 +1003,13 @@ void ONNXImporter::populateNet(Net dstNet) CV_Assert(node_proto.input_size() >= 2); layerParams.type = "Convolution"; for (int j = 1; j < node_proto.input_size(); j++) { - layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); + if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) + { + layerParams.blobs.push_back(getBlob(node_proto, constBlobs, j)); + } } - layerParams.set("num_output", layerParams.blobs[0].size[0]); - layerParams.set("bias_term", node_proto.input_size() == 3); + int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; + layerParams.set("num_output", outCn); } else if (layer_type == "ConvTranspose") { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index e932bc6919..6a9e68dbc5 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -111,6 +111,62 @@ TEST_P(Test_ONNX_layers, Convolution) testONNXModels("convolution"); } +TEST_P(Test_ONNX_layers, Convolution_variable_weight) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + String basename = "conv_variable_w"; + Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + for (int i = 0; i < 2; i++) + { + Mat input = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_0.npy")); + Mat weights = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_1.npy")); + Mat ref = blobFromNPY(_tf("data/output_" + basename + format("_%d", i) + ".npy")); + + net.setInput(input, "0"); + net.setInput(weights, "1"); + + Mat out = net.forward(); + normAssert(ref, out, "", default_l1, default_lInf); + } +} + +TEST_P(Test_ONNX_layers, Convolution_variable_weight_bias) +{ + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH || + backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + String basename = "conv_variable_wb"; + Net net = readNetFromONNX(_tf("models/" + basename + ".onnx")); + ASSERT_FALSE(net.empty()); + + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + for (int i = 0; i < 2; i++) + { + Mat input = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_0.npy")); + Mat weights = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_1.npy")); + Mat bias = blobFromNPY(_tf("data/input_" + basename + format("_%d", i) + "_2.npy")); + Mat ref = blobFromNPY(_tf("data/output_" + basename + format("_%d", i) + ".npy")); + + net.setInput(input, "0"); + net.setInput(weights, "1"); + net.setInput(bias, "bias"); + + Mat out = net.forward(); + normAssert(ref, out, "", default_l1, default_lInf); + } +} + TEST_P(Test_ONNX_layers, Gather) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) -- GitLab