diff --git a/samples/dnn/tf_text_graph_common.py b/samples/dnn/tf_text_graph_common.py index bf04c421745db034dc9e45300a77ad7505760df8..a6444207806bad877fdbce93be9968efaf70480c 100644 --- a/samples/dnn/tf_text_graph_common.py +++ b/samples/dnn/tf_text_graph_common.py @@ -289,7 +289,7 @@ def removeUnusedNodesAndAttrs(to_remove, graph_def): op = graph_def.node[i].op name = graph_def.node[i].name - if op == 'Const' or to_remove(name, op): + if to_remove(name, op): if op != 'Const': removedNodes.append(name) diff --git a/samples/dnn/tf_text_graph_faster_rcnn.py b/samples/dnn/tf_text_graph_faster_rcnn.py index 13a9c29ec08ee96ff4b205a3c4f851a90af61d1e..e3d0ad0127f6235caf4865778205925af03120d1 100644 --- a/samples/dnn/tf_text_graph_faster_rcnn.py +++ b/samples/dnn/tf_text_graph_faster_rcnn.py @@ -49,7 +49,7 @@ def createFasterRCNNGraph(modelPath, configPath, outputPath): removeIdentity(graph_def) def to_remove(name, op): - return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \ + return op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \ (name.startswith('CropAndResize') and op != 'CropAndResize') removeUnusedNodesAndAttrs(to_remove, graph_def) diff --git a/samples/dnn/tf_text_graph_mask_rcnn.py b/samples/dnn/tf_text_graph_mask_rcnn.py index aaefe456ad1355d8aa6216acc2e63a90c723e0ad..c8803088f981cbe8a1dfd33627af331d65bfee4c 100644 --- a/samples/dnn/tf_text_graph_mask_rcnn.py +++ b/samples/dnn/tf_text_graph_mask_rcnn.py @@ -55,7 +55,7 @@ graph_def = parseTextGraph(args.output) removeIdentity(graph_def) def to_remove(name, op): - return name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \ + return op == 'Const' or name.startswith(scopesToIgnore) or not name.startswith(scopesToKeep) or \ (name.startswith('CropAndResize') and op != 'CropAndResize') removeUnusedNodesAndAttrs(to_remove, graph_def) diff --git a/samples/dnn/tf_text_graph_ssd.py b/samples/dnn/tf_text_graph_ssd.py index 5017dba7a74bd17738b942b08074b10cf9894396..1576380646e5ee59be3332a69efa4e49c75d02b5 100644 --- a/samples/dnn/tf_text_graph_ssd.py +++ b/samples/dnn/tf_text_graph_ssd.py @@ -10,14 +10,60 @@ # Then you can import it with a binary frozen graph (.pb) using readNetFromTensorflow() function. # See details and examples on the following wiki page: https://github.com/opencv/opencv/wiki/TensorFlow-Object-Detection-API import argparse +import re from math import sqrt from tf_text_graph_common import * +class SSDAnchorGenerator: + def __init__(self, min_scale, max_scale, num_layers, aspect_ratios, + reduce_boxes_in_lowest_layer, image_width, image_height): + self.min_scale = min_scale + self.aspect_ratios = aspect_ratios + self.reduce_boxes_in_lowest_layer = reduce_boxes_in_lowest_layer + self.image_width = image_width + self.image_height = image_height + self.scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) + for i in range(num_layers)] + [1.0] + + def get(self, layer_id): + if layer_id == 0 and self.reduce_boxes_in_lowest_layer: + widths = [0.1, self.min_scale * sqrt(2.0), self.min_scale * sqrt(0.5)] + heights = [0.1, self.min_scale / sqrt(2.0), self.min_scale / sqrt(0.5)] + else: + widths = [self.scales[layer_id] * sqrt(ar) for ar in self.aspect_ratios] + heights = [self.scales[layer_id] / sqrt(ar) for ar in self.aspect_ratios] + + widths += [sqrt(self.scales[layer_id] * self.scales[layer_id + 1])] + heights += [sqrt(self.scales[layer_id] * self.scales[layer_id + 1])] + widths = [w * self.image_width for w in widths] + heights = [h * self.image_height for h in heights] + return widths, heights + + +class MultiscaleAnchorGenerator: + def __init__(self, min_level, aspect_ratios, scales_per_octave, anchor_scale): + self.min_level = min_level + self.aspect_ratios = aspect_ratios + self.anchor_scale = anchor_scale + self.scales = [2**(float(s) / scales_per_octave) for s in range(scales_per_octave)] + + def get(self, layer_id): + widths = [] + heights = [] + for a in self.aspect_ratios: + for s in self.scales: + base_anchor_size = 2**(self.min_level + layer_id) * self.anchor_scale + ar = sqrt(a) + heights.append(base_anchor_size * s / ar) + widths.append(base_anchor_size * s * ar) + return widths, heights + + def createSSDGraph(modelPath, configPath, outputPath): # Nodes that should be kept. - keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu6', 'Placeholder', 'FusedBatchNorm', + keepOps = ['Conv2D', 'BiasAdd', 'Add', 'Relu', 'Relu6', 'Placeholder', 'FusedBatchNorm', 'DepthwiseConv2dNative', 'ConcatV2', 'Mul', 'MaxPool', 'AvgPool', 'Identity', - 'Sub'] + 'Sub', 'ResizeNearestNeighbor', 'Pad'] # Node with which prefixes should be removed prefixesToRemove = ('MultipleGridAnchorGenerator/', 'Postprocessor/', 'Preprocessor/map') @@ -27,26 +73,50 @@ def createSSDGraph(modelPath, configPath, outputPath): config = config['model'][0]['ssd'][0] num_classes = int(config['num_classes'][0]) - ssd_anchor_generator = config['anchor_generator'][0]['ssd_anchor_generator'][0] - min_scale = float(ssd_anchor_generator['min_scale'][0]) - max_scale = float(ssd_anchor_generator['max_scale'][0]) - num_layers = int(ssd_anchor_generator['num_layers'][0]) - aspect_ratios = [float(ar) for ar in ssd_anchor_generator['aspect_ratios']] - reduce_boxes_in_lowest_layer = True - if 'reduce_boxes_in_lowest_layer' in ssd_anchor_generator: - reduce_boxes_in_lowest_layer = ssd_anchor_generator['reduce_boxes_in_lowest_layer'][0] == 'true' - fixed_shape_resizer = config['image_resizer'][0]['fixed_shape_resizer'][0] image_width = int(fixed_shape_resizer['width'][0]) image_height = int(fixed_shape_resizer['height'][0]) box_predictor = 'convolutional' if 'convolutional_box_predictor' in config['box_predictor'][0] else 'weight_shared_convolutional' + anchor_generator = config['anchor_generator'][0] + if 'ssd_anchor_generator' in anchor_generator: + ssd_anchor_generator = anchor_generator['ssd_anchor_generator'][0] + min_scale = float(ssd_anchor_generator['min_scale'][0]) + max_scale = float(ssd_anchor_generator['max_scale'][0]) + num_layers = int(ssd_anchor_generator['num_layers'][0]) + aspect_ratios = [float(ar) for ar in ssd_anchor_generator['aspect_ratios']] + reduce_boxes_in_lowest_layer = True + if 'reduce_boxes_in_lowest_layer' in ssd_anchor_generator: + reduce_boxes_in_lowest_layer = ssd_anchor_generator['reduce_boxes_in_lowest_layer'][0] == 'true' + priors_generator = SSDAnchorGenerator(min_scale, max_scale, num_layers, + aspect_ratios, reduce_boxes_in_lowest_layer, + image_width, image_height) + + + print('Scale: [%f-%f]' % (min_scale, max_scale)) + print('Aspect ratios: %s' % str(aspect_ratios)) + print('Reduce boxes in the lowest layer: %s' % str(reduce_boxes_in_lowest_layer)) + elif 'multiscale_anchor_generator' in anchor_generator: + multiscale_anchor_generator = anchor_generator['multiscale_anchor_generator'][0] + min_level = int(multiscale_anchor_generator['min_level'][0]) + max_level = int(multiscale_anchor_generator['max_level'][0]) + anchor_scale = float(multiscale_anchor_generator['anchor_scale'][0]) + aspect_ratios = [float(ar) for ar in multiscale_anchor_generator['aspect_ratios']] + scales_per_octave = int(multiscale_anchor_generator['scales_per_octave'][0]) + num_layers = max_level - min_level + 1 + priors_generator = MultiscaleAnchorGenerator(min_level, aspect_ratios, + scales_per_octave, anchor_scale) + print('Levels: [%d-%d]' % (min_level, max_level)) + print('Anchor scale: %f' % anchor_scale) + print('Scales per octave: %d' % scales_per_octave) + print('Aspect ratios: %s' % str(aspect_ratios)) + else: + print('Unknown anchor_generator') + exit(0) + print('Number of classes: %d' % num_classes) print('Number of layers: %d' % num_layers) - print('Scale: [%f-%f]' % (min_scale, max_scale)) - print('Aspect ratios: %s' % str(aspect_ratios)) - print('Reduce boxes in the lowest layer: %s' % str(reduce_boxes_in_lowest_layer)) print('box predictor: %s' % box_predictor) print('Input image size: %dx%d' % (image_width, image_height)) @@ -67,8 +137,8 @@ def createSSDGraph(modelPath, configPath, outputPath): return unconnected - # Detect unfused batch normalization nodes and fuse them. - def fuse_batch_normalization(): + def fuse_nodes(nodesToKeep): + # Detect unfused batch normalization nodes and fuse them. # Add_0 <-- moving_variance, add_y # Rsqrt <-- Add_0 # Mul_0 <-- Rsqrt, gamma @@ -77,9 +147,15 @@ def createSSDGraph(modelPath, configPath, outputPath): # Sub_0 <-- beta, Mul_2 # Add_1 <-- Mul_1, Sub_0 nodesMap = {node.name: node for node in graph_def.node} - subgraph = ['Add', + subgraphBatchNorm = ['Add', ['Mul', 'input', ['Mul', ['Rsqrt', ['Add', 'moving_variance', 'add_y']], 'gamma']], ['Sub', 'beta', ['Mul', 'moving_mean', 'Mul_0']]] + # Detect unfused nearest neighbor resize. + subgraphResizeNN = ['Reshape', + ['Mul', ['Reshape', 'input', ['Pack', 'shape_1', 'shape_2', 'shape_3', 'shape_4', 'shape_5']], + 'ones'], + ['Pack', ['StridedSlice', ['Shape', 'input'], 'stack', 'stack_1', 'stack_2'], + 'out_height', 'out_width', 'out_channels']] def checkSubgraph(node, targetNode, inputs, fusedNodes): op = targetNode[0] if node.op == op and (len(node.input) >= len(targetNode) - 1): @@ -100,7 +176,7 @@ def createSSDGraph(modelPath, configPath, outputPath): for node in graph_def.node: inputs = {} fusedNodes = [] - if checkSubgraph(node, subgraph, inputs, fusedNodes): + if checkSubgraph(node, subgraphBatchNorm, inputs, fusedNodes): name = node.name node.Clear() node.name = name @@ -112,15 +188,41 @@ def createSSDGraph(modelPath, configPath, outputPath): node.input.append(inputs['moving_variance']) node.addAttr('epsilon', 0.001) nodesToRemove += fusedNodes[1:] + + inputs = {} + fusedNodes = [] + if checkSubgraph(node, subgraphResizeNN, inputs, fusedNodes): + name = node.name + node.Clear() + node.name = name + node.op = 'ResizeNearestNeighbor' + node.input.append(inputs['input']) + node.input.append(name + '/output_shape') + + out_height_node = nodesMap[inputs['out_height']] + out_width_node = nodesMap[inputs['out_width']] + out_height = int(out_height_node.attr['value']['tensor'][0]['int_val'][0]) + out_width = int(out_width_node.attr['value']['tensor'][0]['int_val'][0]) + + shapeNode = NodeDef() + shapeNode.name = name + '/output_shape' + shapeNode.op = 'Const' + shapeNode.addAttr('value', [out_height, out_width]) + graph_def.node.insert(graph_def.node.index(node), shapeNode) + nodesToKeep.append(shapeNode.name) + + nodesToRemove += fusedNodes[1:] for node in nodesToRemove: graph_def.node.remove(node) - fuse_batch_normalization() + nodesToKeep = [] + fuse_nodes(nodesToKeep) removeIdentity(graph_def) def to_remove(name, op): - return (not op in keepOps) or name.startswith(prefixesToRemove) + return (not name in nodesToKeep) and \ + (op == 'Const' or (not op in keepOps) or name.startswith(prefixesToRemove)) removeUnusedNodesAndAttrs(to_remove, graph_def) @@ -169,19 +271,15 @@ def createSSDGraph(modelPath, configPath, outputPath): graph_def.node.extend([flatten]) addConcatNode('%s/concat' % label, concatInputs, 'concat/axis_flatten') - idx = 0 + num_matched_layers = 0 for node in graph_def.node: - if node.name == ('BoxPredictor_%d/BoxEncodingPredictor/Conv2D' % idx) or \ - node.name == ('WeightSharedConvolutionalBoxPredictor_%d/BoxPredictor/Conv2D' % idx) or \ - node.name == 'WeightSharedConvolutionalBoxPredictor/BoxPredictor/Conv2D': + if re.match('BoxPredictor_\d/BoxEncodingPredictor/Conv2D', node.name) or \ + re.match('WeightSharedConvolutionalBoxPredictor(_\d)*/BoxPredictor/Conv2D', node.name): node.addAttr('loc_pred_transposed', True) - idx += 1 - assert(idx == num_layers) + num_matched_layers += 1 + assert(num_matched_layers == num_layers) # Add layers that generate anchors (bounding boxes proposals). - scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) - for i in range(num_layers)] + [1.0] - priorBoxes = [] for i in range(num_layers): priorBox = NodeDef() @@ -199,17 +297,8 @@ def createSSDGraph(modelPath, configPath, outputPath): priorBox.addAttr('flip', False) priorBox.addAttr('clip', False) - if i == 0 and reduce_boxes_in_lowest_layer: - widths = [0.1, min_scale * sqrt(2.0), min_scale * sqrt(0.5)] - heights = [0.1, min_scale / sqrt(2.0), min_scale / sqrt(0.5)] - else: - widths = [scales[i] * sqrt(ar) for ar in aspect_ratios] - heights = [scales[i] / sqrt(ar) for ar in aspect_ratios] + widths, heights = priors_generator.get(i) - widths += [sqrt(scales[i] * scales[i + 1])] - heights += [sqrt(scales[i] * scales[i + 1])] - widths = [w * image_width for w in widths] - heights = [h * image_height for h in heights] priorBox.addAttr('width', widths) priorBox.addAttr('height', heights) priorBox.addAttr('variance', [0.1, 0.1, 0.2, 0.2]) @@ -217,6 +306,7 @@ def createSSDGraph(modelPath, configPath, outputPath): graph_def.node.extend([priorBox]) priorBoxes.append(priorBox.name) + # Compare this layer's output with Postprocessor/Reshape addConcatNode('PriorBox/concat', priorBoxes, 'concat/axis_flatten') # Sigmoid for classes predictions and DetectionOutput layer