diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index 2517499cbf0a6e8009dd3cfe24a14b3970908510..32505014682b41c14e2ba8798c32b51bd360d8d9 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -824,15 +824,9 @@ public:
         for (int i = 0; i < inputs.size(); ++i)
             CV_Assert(inputs[i].u != outputs[0].u);
 
-        int inpH = inputs[0].size[2];
-        int inpW = inputs[0].size[3];
-        int out_h = (inpH + 2 * pad.height - (dilation.height * (kernel.height - 1) + 1)) / stride.height + 1;
-        int out_w = (inpW + 2 * pad.width - (dilation.width * (kernel.width - 1) + 1)) / stride.width + 1;
-        if (out_h != outputs[0].size[2] || out_w != outputs[0].size[3])
+        if (padMode == "SAME")
             return false;
 
-        int group = inputs[0].size[1] / umat_blobs[0].size[1];
-
         if (convolutionOp.empty())
         {
             OCL4DNNConvConfig config;
@@ -842,7 +836,7 @@ public:
             config.pad = pad;
             config.stride = stride;
             config.dilation = dilation;
-            config.group = group;
+            config.group = inputs[0].size[1] / umat_blobs[0].size[1];
             config.bias_term = (hasBias()) ? true : false;
 
             convolutionOp = Ptr<OCL4DNNConvSpatial<float> >(new OCL4DNNConvSpatial<float>(config));
diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp
index 43445890f0329df0fa4e80ddedbf08806deb0426..54b8c145c58fe6bc771a39649f403bf075029936 100644
--- a/modules/dnn/src/layers/normalize_bbox_layer.cpp
+++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp
@@ -105,6 +105,18 @@ public:
                 float norm = pow(absSum, 1.0f / pnorm);
                 multiply(src, 1.0f / norm, dst);
             }
+            else
+            {
+                Mat norm;
+                reduce(buffer, norm, 0, REDUCE_SUM);
+                norm += epsilon;
+
+                // compute inverted norm to call multiply instead divide
+                cv::pow(norm, -1.0f / pnorm, norm);
+
+                repeat(norm, channels, 1, buffer);
+                multiply(src, buffer, dst);
+            }
 
             if (!blobs.empty())
             {
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
index 2f5f0412af2e91206f1ddf37da21928b3b5ab7e1..10952edd94bdb91b05fd5baffd35da7fd220b046 100644
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -145,6 +145,9 @@ public:
         inps.getUMatVector(inputs);
         outs.getUMatVector(outputs);
 
+        if (type == AVE && padMode == "SAME")
+            return false;
+
         if (poolOp.empty())
         {
             OCL4DNNPoolConfig config;
diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp
index 89ed5d0cbbbab8541dcb307cd6f2bb9086b71167..2f6c7804cbb6f13d3f552f965cda6a45d05245bd 100644
--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -222,9 +222,7 @@ TEST_P(DNNTestNetwork, OpenFace)
 
 TEST_P(DNNTestNetwork, opencv_face_detector)
 {
-    if (backend == DNN_BACKEND_HALIDE ||
-        backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL)
-        throw SkipTestException("");
+    if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
     Mat inp = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
     processNet("dnn/opencv_face_detector.caffemodel", "dnn/opencv_face_detector.prototxt",
@@ -233,9 +231,7 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
 
 TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
 {
-    if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
-        backend == DNN_BACKEND_HALIDE)
-        throw SkipTestException("");
+    if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
     Mat sample = imread(findDataFile("dnn/street.png", false));
     Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
     processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
index b1d1ef8ac3dc2bc7925e2fa06971bc858139773c..8dc8b7ff1af81eb1749a453ead6311058822372b 100644
--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -456,16 +456,21 @@ TEST(Test_Caffe, multiple_inputs)
     normAssert(out, first_image + second_image);
 }
 
-typedef testing::TestWithParam<std::string> opencv_face_detector;
+CV_ENUM(DNNTarget, DNN_TARGET_CPU, DNN_TARGET_OPENCL)
+typedef testing::TestWithParam<tuple<std::string, DNNTarget> > opencv_face_detector;
 TEST_P(opencv_face_detector, Accuracy)
 {
     std::string proto = findDataFile("dnn/opencv_face_detector.prototxt", false);
-    std::string model = findDataFile(GetParam(), false);
+    std::string model = findDataFile(get<0>(GetParam()), false);
+    dnn::Target targetId = (dnn::Target)(int)get<1>(GetParam());
 
     Net net = readNetFromCaffe(proto, model);
     Mat img = imread(findDataFile("gpu/lbpcascade/er.png", false));
     Mat blob = blobFromImage(img, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false);
 
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(targetId);
+
     net.setInput(blob);
     // Output has shape 1x1xNx7 where N - number of detections.
     // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
@@ -479,11 +484,13 @@ TEST_P(opencv_face_detector, Accuracy)
                                     0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801);
     normAssert(out.reshape(1, out.total() / 7).rowRange(0, 6).colRange(2, 7), ref);
 }
-INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector, Values(
-    "dnn/opencv_face_detector.caffemodel",
-    "dnn/opencv_face_detector_fp16.caffemodel"
-));
-
+INSTANTIATE_TEST_CASE_P(Test_Caffe, opencv_face_detector,
+    Combine(
+        Values("dnn/opencv_face_detector.caffemodel",
+               "dnn/opencv_face_detector_fp16.caffemodel"),
+        Values(DNN_TARGET_CPU, DNN_TARGET_OPENCL)
+    )
+);
 
 TEST(Test_Caffe, FasterRCNN_and_RFCN)
 {
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
index a2b6df6e936804ea7335310dbe5a8a2d43858e0c..15b293a53f446f862ca547f149da156cb209685d 100644
--- a/modules/dnn/test/test_tf_importer.cpp
+++ b/modules/dnn/test/test_tf_importer.cpp
@@ -317,11 +317,44 @@ OCL_TEST(Test_TensorFlow, MobileNet_SSD)
     std::vector<Mat> output;
     net.forward(output, outNames);
 
-    normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1), "", 1e-5, 1.5e-4);
+    normAssert(target[0].reshape(1, 1), output[0].reshape(1, 1));
     normAssert(target[1].reshape(1, 1), output[1].reshape(1, 1), "", 1e-5, 3e-4);
     normAssert(target[2].reshape(1, 1), output[2].reshape(1, 1), "", 4e-5, 1e-2);
 }
 
+OCL_TEST(Test_TensorFlow, Inception_v2_SSD)
+{
+    std::string proto = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt", false);
+    std::string model = findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false);
+
+    Net net = readNetFromTensorflow(model, proto);
+    Mat img = imread(findDataFile("dnn/street.png", false));
+    Mat blob = blobFromImage(img, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), true, false);
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    net.setInput(blob);
+    // Output has shape 1x1xNx7 where N - number of detections.
+    // An every detection is a vector of values [id, classId, confidence, left, top, right, bottom]
+    Mat out = net.forward();
+    out = out.reshape(1, out.total() / 7);
+
+    Mat detections;
+    for (int i = 0; i < out.rows; ++i)
+    {
+        if (out.at<float>(i, 2) > 0.5)
+          detections.push_back(out.row(i).colRange(1, 7));
+    }
+
+    Mat ref = (Mat_<float>(5, 6) << 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729,
+                                    3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131,
+                                    3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015,
+                                    10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527,
+                                    10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384);
+    normAssert(detections, ref);
+}
+
 TEST(Test_TensorFlow, lstm)
 {
     runTensorFlowNet("lstm", DNN_TARGET_CPU, true);