diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index 27aefd9b48932963e63c277476289160f4955634..9452e4e85ea846fe493cc2cebe8facf505ac782b 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -15,6 +15,7 @@ Implementation of Scale layer. #include "../op_inf_engine.hpp" #include "../ie_ngraph.hpp" +#include #include namespace cv @@ -324,7 +325,7 @@ public: std::vector &internals) const CV_OVERRIDE { CV_Assert_N(inputs.size() == 1, blobs.size() == 3); - CV_Assert_N(blobs[0].total() == 1, blobs[1].total() == total(inputs[0], 1), + CV_Assert_N(blobs[0].total() == 1, blobs[2].total() == inputs[0][1]); outputs.assign(1, inputs[0]); @@ -347,15 +348,20 @@ public: float* outData = outputs[0].ptr(); Mat data_mean_cpu = blobs[1].clone(); + Mat mean_resize = Mat(inputs[0].size[3], inputs[0].size[2], CV_32FC3); + Mat mean_3d = Mat(data_mean_cpu.size[3], data_mean_cpu.size[2], CV_32FC3, data_mean_cpu.ptr(0)); + resize(mean_3d, mean_resize, Size(inputs[0].size[3], inputs[0].size[2])); + int new_size[] = {1, mean_resize.channels(), mean_resize.cols, mean_resize.rows}; + Mat data_mean_cpu_resize = mean_resize.reshape(1, *new_size); Mat data_mean_per_channel_cpu = blobs[2].clone(); - const int numWeights = data_mean_cpu.total(); + const int numWeights = data_mean_cpu_resize.total(); CV_Assert(numWeights != 0); ++num_iter; if (num_iter <= recompute_mean) { - data_mean_cpu *= (num_iter - 1); + data_mean_cpu_resize *= (num_iter - 1); const int batch = inputs[0].size[0]; float alpha = 1.0 / batch; @@ -364,15 +370,15 @@ public: Mat inpSlice(1, numWeights, CV_32F, inpData); inpSlice = alpha * inpSlice; - add(data_mean_cpu.reshape(1, 1), inpSlice, data_mean_cpu.reshape(1, 1)); + add(data_mean_cpu_resize.reshape(1, 1), inpSlice, data_mean_cpu_resize.reshape(1, 1)); inpData += numWeights; } - data_mean_cpu *= (1.0 / num_iter); + data_mean_cpu_resize *= (1.0 / num_iter); - int newsize[] = {blobs[1].size[1], (int)blobs[1].total(2)}; - reduce(data_mean_cpu.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F); + int newsize[] = {inputs[0].size[1], (int)inputs[0].total(2)}; + reduce(data_mean_cpu_resize.reshape(1, 2, &newsize[0]), data_mean_per_channel_cpu, 1, REDUCE_SUM, CV_32F); - int area = blobs[1].total(2); + int area = inputs[0].total(2); data_mean_per_channel_cpu *= (1.0 / area); } @@ -387,7 +393,7 @@ public: Mat inpSlice(1, numWeights, CV_32F, inpData); Mat outSlice(1, numWeights, CV_32F, outData); - add(inpSlice, (-1) * data_mean_cpu, outSlice); + add(inpSlice, (-1) * data_mean_cpu_resize, outSlice); inpData += numWeights; outData += numWeights; } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 648e0aaa16b4e63fac5a12597585a21387ebe5a3..d61f319f54d424db74fd9269cee3e728f0fd8d27 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -646,6 +646,8 @@ TEST_P(Test_Caffe_layers, DataAugmentation) if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); testLayerUsingCaffeModels("data_augmentation", true, false); + testLayerUsingCaffeModels("data_augmentation_2x1", true, false); + testLayerUsingCaffeModels("data_augmentation_8x6", true, false); } TEST_P(Test_Caffe_layers, Resample) diff --git a/samples/dnn/optical_flow.py b/samples/dnn/optical_flow.py index 5d0d831cf3205582a2617a1276177ee29d7f398e..da2a5808f2d5cc9f71852513e8ee78bfac2d5c0c 100644 --- a/samples/dnn/optical_flow.py +++ b/samples/dnn/optical_flow.py @@ -5,7 +5,7 @@ Original paper: https://arxiv.org/abs/1612.01925. Original repo: https://github.com/lmb-freiburg/flownet2. Download the converted .caffemodel model from https://drive.google.com/open?id=16qvE9VNmU39NttpZwZs81Ga8VYQJDaWZ -and .prototxt from https://drive.google.com/open?id=19bo6SWU2p8ZKvjXqMKiCPdK8mghwDy9b. +and .prototxt from https://drive.google.com/file/d/1RyNIUsan1ZOh2hpYIH36A-jofAvJlT6a/view?usp=sharing. Otherwise download original model from https://lmb.informatik.uni-freiburg.de/resources/binaries/flownet2/flownet2-models.tar.gz, convert .h5 model to .caffemodel and modify original .prototxt using .prototxt from link above. ''' @@ -18,7 +18,7 @@ import cv2 as cv class OpticalFlow(object): def __init__(self, proto, model, height, width): - self.net = cv.dnn.readNet(proto, model) + self.net = cv.dnn.readNetFromCaffe(proto, model) self.net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) self.height = height self.width = width @@ -62,9 +62,9 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Use this script to calculate optical flow using FlowNetv2', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-input', '-i', required=True, help='Path to input video file. Skip this argument to capture frames from a camera.') - parser.add_argument('--height', default=320, help='Input height') - parser.add_argument('--width', default=448, help='Input width') - parser.add_argument('--proto', '-p', default='FlowNet2_deploy.prototxt', help='Path to prototxt.') + parser.add_argument('--height', default=320, type=int, help='Input height') + parser.add_argument('--width', default=448, type=int, help='Input width') + parser.add_argument('--proto', '-p', default='FlowNet2_deploy_anysize.prototxt', help='Path to prototxt.') parser.add_argument('--model', '-m', default='FlowNet2_weights.caffemodel', help='Path to caffemodel.') args, _ = parser.parse_known_args() @@ -75,7 +75,25 @@ if __name__ == '__main__': cv.namedWindow(winName, cv.WINDOW_NORMAL) cap = cv.VideoCapture(args.input if args.input else 0) hasFrame, first_frame = cap.read() - opt_flow = OpticalFlow(args.proto, args.model, args.height, args.width) + + divisor = 64. + var = {} + var['ADAPTED_WIDTH'] = int(np.ceil(args.width/divisor) * divisor) + var['ADAPTED_HEIGHT'] = int(np.ceil(args.height/divisor) * divisor) + var['SCALE_WIDTH'] = args.width / float(var['ADAPTED_WIDTH']) + var['SCALE_HEIGHT'] = args.height / float(var['ADAPTED_HEIGHT']) + + config = '' + proto = open(args.proto).readlines() + for line in proto: + for key, value in var.items(): + tag = "$%s$" % key + line = line.replace(tag, str(value)) + config += line + + caffemodel = open(args.model, 'rb').read() + + opt_flow = OpticalFlow(bytearray(config.encode()), caffemodel, var['ADAPTED_HEIGHT'], var['ADAPTED_WIDTH']) while cv.waitKey(1) < 0: hasFrame, second_frame = cap.read() if not hasFrame: