graph.py

from google.protobuf import text_format

from .caffe import get_caffe_resolver
from .errors import KaffeError, print_stderr
from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
from .shapes import TensorShape


class Node(object):
    def __init__(self, name, kind, layer=None):
        self.name = name
        self.kind = kind
        self.layer = LayerAdapter(layer, kind) if layer else None
        self.parents = []
        self.children = []
        self.data = None
        self.output_shape = None
        self.metadata = {}

    def add_parent(self, parent_node):
        assert parent_node not in self.parents
        self.parents.append(parent_node)
        if self not in parent_node.children:
            parent_node.children.append(self)

    def add_child(self, child_node):
        assert child_node not in self.children
        self.children.append(child_node)
        if self not in child_node.parents:
            child_node.parents.append(self)

    def get_only_parent(self):
        if len(self.parents) != 1:
            raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' %
                             (self, len(self.parents)))
        return self.parents[0]

    @property
    def parameters(self):
        if self.layer is not None:
            return self.layer.parameters
        return None

    def __str__(self):
        return '[%s] %s' % (self.kind, self.name)

    def __repr__(self):
        return '%s (0x%x)' % (self.name, id(self))


class Graph(object):
    def __init__(self, nodes=None, name=None):
        self.nodes = nodes or []
        self.node_lut = {node.name: node for node in self.nodes}
        self.name = name

    def add_node(self, node):
        self.nodes.append(node)
        self.node_lut[node.name] = node

    def get_node(self, name):
        try:
            return self.node_lut[name]
        except KeyError:
            raise KaffeError('Layer not found: %s' % name)

    def get_input_nodes(self):
        return [node for node in self.nodes if len(node.parents) == 0]

    def get_output_nodes(self):
        return [node for node in self.nodes if len(node.children) == 0]

    def topologically_sorted(self):
        sorted_nodes = []
        unsorted_nodes = list(self.nodes)
        temp_marked = set()
        perm_marked = set()

        def visit(node):
            if node in temp_marked:
                raise KaffeError('Graph is not a DAG.')
            if node in perm_marked:
                return
            temp_marked.add(node)
            for child in node.children:
                visit(child)
            perm_marked.add(node)
            temp_marked.remove(node)
            sorted_nodes.insert(0, node)

        while len(unsorted_nodes):
            visit(unsorted_nodes.pop())
        return sorted_nodes

    def compute_output_shapes(self):
        sorted_nodes = self.topologically_sorted()
        for node in sorted_nodes:
            node.output_shape = TensorShape(
                *NodeKind.compute_output_shape(node))

    def replaced(self, new_nodes):
        return Graph(nodes=new_nodes, name=self.name)

    def transformed(self, transformers):
        graph = self
        for transformer in transformers:
            graph = transformer(graph)
            if graph is None:
                raise KaffeError('Transformer failed: {}'.format(transformer))
            assert isinstance(graph, Graph)
        return graph

    def __contains__(self, key):
        return key in self.node_lut

    def __str__(self):
        hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param',
                                                   'Output')
        s = [hdr, '-' * 94]
        for node in self.topologically_sorted():
            # If the node has learned parameters, display the first one's shape.
            # In case of convolutions, this corresponds to the weights.
            data_shape = node.data[0].shape if node.data else '--'
            out_shape = node.output_shape or '--'
            s.append('{:<20} {:<30} {:>20} {:>20}'.format(
                node.kind, node.name, data_shape, tuple(out_shape)))
        return '\n'.join(s)


class GraphBuilder(object):
    '''Constructs a model graph from a Caffe protocol buffer definition.'''

    def __init__(self, def_path, phase='test'):
        '''
        def_path: Path to the model definition (.prototxt)
        data_path: Path to the model data (.caffemodel)
        phase: Either 'test' or 'train'. Used for filtering phase-specific nodes.
        '''
        self.def_path = def_path
        self.phase = phase
        self.load()

    def load(self):
        '''Load the layer definitions from the prototxt.'''
        self.params = get_caffe_resolver().NetParameter()
        with open(self.def_path, 'rb') as def_file:
            text_format.Merge(def_file.read(), self.params)

    def filter_layers(self, layers):
        '''Filter out layers based on the current phase.'''
        phase_map = {0: 'train', 1: 'test'}
        filtered_layer_names = set()
        filtered_layers = []
        for layer in layers:
            phase = self.phase
            if len(layer.include):
                phase = phase_map[layer.include[0].phase]
            if len(layer.exclude):
                phase = phase_map[1 - layer.include[0].phase]
            exclude = (phase != self.phase)
            # Dropout layers appear in a fair number of Caffe
            # test-time networks. These are just ignored. We'll
            # filter them out here.
            if (not exclude) and (phase == 'test'):
                exclude = (layer.type == LayerType.Dropout)
            if not exclude:
                filtered_layers.append(layer)
                # Guard against dupes.
                assert layer.name not in filtered_layer_names
                filtered_layer_names.add(layer.name)
        return filtered_layers

    def make_node(self, layer):
        '''Create a graph node for the given layer.'''
        kind = NodeKind.map_raw_kind(layer.type)
        if kind is None:
            raise KaffeError('Unknown layer type encountered: %s' % layer.type)
        # We want to use the layer's top names (the "output" names), rather than the
        # name attribute, which is more of readability thing than a functional one.
        # Other layers will refer to a node by its "top name".
        return Node(layer.name, kind, layer=layer)

    def make_input_nodes(self):
        '''
        Create data input nodes.

        This method is for old-style inputs, where the input specification
        was not treated as a first-class layer in the prototext.
        Newer models use the "Input layer" type.
        '''
        nodes = [Node(name, NodeKind.Data) for name in self.params.input]
        if len(nodes):
            input_dim = map(int, self.params.input_dim)
            if not input_dim:
                if len(self.params.input_shape) > 0:
                    input_dim = map(int, self.params.input_shape[0].dim)
                else:
                    raise KaffeError('Dimensions for input not specified.')
            for node in nodes:
                node.output_shape = tuple(input_dim)
        return nodes

    def build(self):
        '''
        Builds the graph from the Caffe layer definitions.
        '''
        # Get the layers
        layers = self.params.layers or self.params.layer
        # Filter out phase-excluded layers
        layers = self.filter_layers(layers)
        # Get any separately-specified input layers
        nodes = self.make_input_nodes()
        nodes += [self.make_node(layer) for layer in layers]
        # Initialize the graph
        graph = Graph(nodes=nodes, name=self.params.name)
        # Connect the nodes
        #
        # A note on layers and outputs:
        # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs
        # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom
        # (in case of in-place operations). Note that the layer's name is not used for establishing
        # any connectivity. It's only used for data association. By convention, a layer with a
        # single top will often use the same name (although this is not required).
        #
        # The current implementation only supports single-output nodes (note that a node can still
        # have multiple children, since multiple child nodes can refer to the single top's name).
        node_outputs = {}
        for layer in layers:
            node = graph.get_node(layer.name)
            for input_name in layer.bottom:
                assert input_name != layer.name
                parent_node = node_outputs.get(input_name)
                if (parent_node is None) or (parent_node == node):
                    parent_node = graph.get_node(input_name)
                node.add_parent(parent_node)
            if len(layer.top) > 1:
                raise KaffeError('Multiple top nodes are not supported.')
            for output_name in layer.top:
                if output_name == layer.name:
                    # Output is named the same as the node. No further action required.
                    continue
                # There are two possibilities here:
                #
                # Case 1: output_name refers to another node in the graph.
                # This is an "in-place operation" that overwrites an existing node.
                # This would create a cycle in the graph. We'll undo the in-placing
                # by substituting this node wherever the overwritten node is referenced.
                #
                # Case 2: output_name violates the convention layer.name == output_name.
                # Since we are working in the single-output regime, we will can rename it to
                # match the layer name.
                #
                # For both cases, future references to this top re-routes to this node.
                node_outputs[output_name] = node

        graph.compute_output_shapes()
        return graph


class NodeMapper(NodeDispatch):
    def __init__(self, graph):
        self.graph = graph

    def map(self):
        nodes = self.graph.topologically_sorted()
        # Remove input nodes - we'll handle them separately.
        input_nodes = self.graph.get_input_nodes()
        nodes = [t for t in nodes if t not in input_nodes]
        # Decompose DAG into chains.
        chains = []
        for node in nodes:
            attach_to_chain = None
            if len(node.parents) == 1:
                parent = node.get_only_parent()
                for chain in chains:
                    if chain[-1] == parent:
                        # Node is part of an existing chain.
                        attach_to_chain = chain
                        break
            if attach_to_chain is None:
                # Start a new chain for this node.
                attach_to_chain = []
                chains.append(attach_to_chain)
            attach_to_chain.append(node)
        # Map each chain.
        mapped_chains = []
        for chain in chains:
            mapped_chains.append(self.map_chain(chain))
        return self.commit(mapped_chains)

    def map_chain(self, chain):
        return [self.map_node(node) for node in chain]

    def map_node(self, node):
        map_func = self.get_handler(node.kind, 'map')
        mapped_node = map_func(node)
        assert mapped_node is not None
        mapped_node.node = node
        return mapped_node

    def commit(self, mapped_chains):
        raise NotImplementedError('Must be implemented by subclass.')