diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 2517499cbf0a6e8009dd3cfe24a14b3970908510..32505014682b41c14e2ba8798c32b51bd360d8d9 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -824,15 +824,9 @@ public: for (int i = 0; i < inputs.size(); ++i) CV_Assert(inputs[i].u != outputs[0].u); - int inpH = inputs[0].size[2]; - int inpW = inputs[0].size[3]; - int out_h = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1; - int out_w = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1; - if (out_h != outputs[0].size[2] || out_w != outputs[0].size[3]) + if (padMode == "SAME") return false; - int group = inputs[0].size[1] / umat_blobs[0].size[1]; - if (convolutionOp.empty()) { OCL4DNNConvConfig config; @@ -842,7 +836,7 @@ public: config.pad = pad; config.stride = stride; config.dilation = dilation; - config.group = group; + config.group = inputs[0].size[1] / umat_blobs[0].size[1]; config.bias_term = (hasBias()) ? true : false; convolutionOp = Ptr >(new OCL4DNNConvSpatial(config)); diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index 43445890f0329df0fa4e80ddedbf08806deb0426..54b8c145c58fe6bc771a39649f403bf075029936 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -105,6 +105,18 @@ public: float norm = pow(absSum, 1.0f / pnorm); multiply(src, 1.0f / norm, dst); } + else + { + Mat norm; + reduce(buffer, norm, 0, REDUCE_SUM); + norm += epsilon; + + // compute inverted norm to call multiply instead divide + cv::pow(norm, -1.0f / pnorm, norm); + + repeat(norm, channels, 1, buffer); + multiply(src, buffer, dst); + } if (!blobs.empty()) { diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index 2f5f0412af2e91206f1ddf37da21928b3b5ab7e1..10952edd94bdb91b05fd5baffd35da7fd220b046 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -145,6 +145,9 @@ public: inps.getUMatVector(inputs); outs.getUMatVector(outputs); + if (type == AVE && padMode == "SAME") + return false; + if (poolOp.empty()) { OCL4DNNPoolConfig config; diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index 89ed5d0cbbbab8541dcb307cd6f2bb9086b71167..2f6c7804cbb6f13d3f552f965cda6a45d05245bd 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -222,9 +222,7 @@ TEST_P(DNNTestNetwork, OpenFace) TEST_P(DNNTestNetwork, opencv_face_detector) { - if (backend == DNN_BACKEND_HALIDE || - backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL) - throw SkipTestException(""); + if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false)); Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt", @@ -233,9 +231,7 @@ TEST_P(DNNTestNetwork, opencv_face_detector) TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow) { - if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL || - backend == DNN_BACKEND_HALIDE) - throw SkipTestException(""); + if (backend == DNN_BACKEND_HALIDE) throw SkipTestException(""); Mat sample = imread(findDataFile("dnn/street.png", false)); Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp index b1d1ef8ac3dc2bc7925e2fa06971bc858139773c..8dc8b7ff1af81eb1749a453ead6311058822372b 100644 --- a/modules/dnn/test/test_caffe_importer.cpp +++ b/modules/dnn/test/test_caffe_importer.cpp @@ -456,16 +456,21 @@ TEST(Test_Caffe, multiple_inputs) normAssert(out, first_image + second_image); } -typedef testing::TestWithParam opencv_face_detector; +CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL) +typedef testing::TestWithParam > opencv_face_detector; TEST_P(opencv_face_detector, Accuracy) { std::string proto = findDataFile("dnn/opencv_face_detector.prototxt", false); - std::string model = findDataFile(GetParam(), false); + std::string model = findDataFile(get<0>(GetParam()), false); + dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam()); Net net = readNetFromCaffe(proto, model); Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false)); Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); + net.setPreferableBackend(DNN_BACKEND_DEFAULT); + net.setPreferableTarget(targetId); + net.setInput(blob); // Output has shape 1x1xNx7 where N - number of detections. // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] @@ -479,11 +484,13 @@ TEST_P(opencv_face_detector, Accuracy) 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801); normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref); } -INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, Values( - "dnn/opencv_face_detector.caffemodel", - "dnn/opencv_face_detector_fp16.caffemodel" -)); - +INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, + Combine( + Values("dnn/opencv_face_detector.caffemodel", + "dnn/opencv_face_detector_fp16.caffemodel"), + Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL) + ) +); TEST(Test_Caffe, FasterRCNN_and_RFCN) { diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index a2b6df6e936804ea7335310dbe5a8a2d43858e0c..15b293a53f446f862ca547f149da156cb209685d 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -317,11 +317,44 @@ OCL_TEST(Test_TensorFlow, MobileNet_SSD) std::vector output; net.forward(output, outNames); - normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4); + normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1)); normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4); normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2); } +OCL_TEST(Test_TensorFlow, Inception_v2_SSD) +{ + std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false); + std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false); + + Net net = readNetFromTensorflow(model, proto); + Mat img = imread(findDataFile("dnn/street.png", false)); + Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false); + + net.setPreferableBackend(DNN_BACKEND_DEFAULT); + net.setPreferableTarget(DNN_TARGET_OPENCL); + + net.setInput(blob); + // Output has shape 1x1xNx7 where N - number of detections. + // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom] + Mat out = net.forward(); + out = out.reshape(1, out.total() / 7); + + Mat detections; + for (int i = 0; i < out.rows; ++i) + { + if (out.at(i, 2) > 0.5) + detections.push_back(out.row(i).colRange(1, 7)); + } + + Mat ref = (Mat_(5, 6) << 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729, + 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131, + 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015, + 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527, + 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384); + normAssert(detections, ref); +} + TEST(Test_TensorFlow, lstm) { runTensorFlowNet("lstm", DNN_TARGET_CPU, true);