diff --git a/fluid/DeepASR/tools/profile.py b/fluid/DeepASR/tools/profile.py index cb0227c33a25b1c38977f8485237f13d0351c36f..9d0b47694273345357726ad51062d01ff01b120d 100644 --- a/fluid/DeepASR/tools/profile.py +++ b/fluid/DeepASR/tools/profile.py @@ -169,7 +169,8 @@ def profile(args): outs = exe.run(fluid.default_main_program(), feed={"feature": feature_t, "label": label_t}, - fetch_list=[avg_cost, accuracy], + fetch_list=[avg_cost, accuracy] + if args.print_train_acc else [], return_numpy=False) if args.print_train_acc: diff --git a/fluid/DeepASR/train.py b/fluid/DeepASR/train.py index 9856dad7d56b47bf14c32a7d0ca0ec10b8ecf88f..b5d2239e94dd5ddcd79d0245e4b980d6cf5bfbf4 100644 --- a/fluid/DeepASR/train.py +++ b/fluid/DeepASR/train.py @@ -216,16 +216,17 @@ def train(args): label_t.set(labels, place) label_t.set_lod([lod]) - cost, acc = exe.run(fluid.default_main_program(), - feed={"feature": feature_t, - "label": label_t}, - fetch_list=[avg_cost, accuracy], - return_numpy=False) + to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0) + outs = exe.run(fluid.default_main_program(), + feed={"feature": feature_t, + "label": label_t}, + fetch_list=[avg_cost, accuracy] if to_print else [], + return_numpy=False) - if batch_id > 0 and (batch_id % args.print_per_batches == 0): + if to_print: print("\nBatch %d, train cost: %f, train acc: %f" % - (batch_id, lodtensor_to_ndarray(cost)[0], - lodtensor_to_ndarray(acc)[0])) + (batch_id, lodtensor_to_ndarray(outs[0])[0], + lodtensor_to_ndarray(outs[1])[0])) # save the latest checkpoint if args.checkpoints != '': model_path = os.path.join(args.checkpoints, diff --git a/fluid/image_classification/caffe2fluid/README.md b/fluid/image_classification/caffe2fluid/README.md new file mode 100644 index 0000000000000000000000000000000000000000..279b4c6e57a785736a1c75928de8d45f4e4e956e --- /dev/null +++ b/fluid/image_classification/caffe2fluid/README.md @@ -0,0 +1,25 @@ +### Caffe2Fluid +This tool is used to convert a Caffe model to Fluid model + +### Howto +1, Prepare caffepb.py in ./proto, two options provided + 1) generate it from caffe.proto using protoc + bash ./proto/compile.sh + + 2) download one from github directly + cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py + +2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file + +3, Use the converted model to predict + see more detail info in 'tests/lenet/README.md' + + +### Supported models +- Lenet on mnist dataset + +- ResNets:(ResNet-50, ResNet-101, ResNet-152) + model addrs:(https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777) + +### Notes +Some of this code come from here: https://github.com/ethereon/caffe-tensorflow diff --git a/fluid/image_classification/caffe2fluid/convert.py b/fluid/image_classification/caffe2fluid/convert.py new file mode 100755 index 0000000000000000000000000000000000000000..68a9e4f7e490a69c1b582d6fc14b2015bfdf9536 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/convert.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +import os +import sys +import numpy as np +import argparse +from kaffe import KaffeError, print_stderr + +from kaffe.paddle import Transformer + + +def fatal_error(msg): + """ fatal error encounted + """ + print_stderr(msg) + exit(-1) + + +def validate_arguments(args): + """ validate args + """ + if (args.data_output_path is not None) and (args.caffemodel is None): + fatal_error('No input data path provided.') + if (args.caffemodel is not None) and (args.data_output_path is None): + fatal_error('No output data path provided.') + if (args.code_output_path is None) and (args.data_output_path is None): + fatal_error('No output path specified.') + + +def convert(def_path, caffemodel_path, data_output_path, code_output_path, + phase): + """ convert caffe model to tf/paddle models + """ + try: + transformer = Transformer(def_path, caffemodel_path, phase=phase) + print_stderr('Converting data...') + if caffemodel_path is not None: + data = transformer.transform_data() + print_stderr('Saving data...') + with open(data_output_path, 'wb') as data_out: + np.save(data_out, data) + if code_output_path: + print_stderr('Saving source...') + with open(code_output_path, 'wb') as src_out: + src_out.write(transformer.transform_source()) + print_stderr('Done.') + except KaffeError as err: + fatal_error('Error encountered: {}'.format(err)) + + +def main(): + """ main + """ + parser = argparse.ArgumentParser() + parser.add_argument('def_path', help='Model definition (.prototxt) path') + parser.add_argument('--caffemodel', help='Model data (.caffemodel) path') + parser.add_argument('--data-output-path', help='Converted data output path') + parser.add_argument( + '--code-output-path', help='Save generated source to this path') + parser.add_argument( + '-p', + '--phase', + default='test', + help='The phase to convert: test (default) or train') + args = parser.parse_args() + validate_arguments(args) + convert(args.def_path, args.caffemodel, args.data_output_path, + args.code_output_path, args.phase) + + +if __name__ == '__main__': + main() diff --git a/fluid/image_classification/caffe2fluid/kaffe/__init__.py b/fluid/image_classification/caffe2fluid/kaffe/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c11ce45c63112a75a2d15ac5d46fbbbf9f6a76e9 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/__init__.py @@ -0,0 +1,5 @@ +from .graph import GraphBuilder, NodeMapper +from .errors import KaffeError, print_stderr + +import os +from . import paddle diff --git a/fluid/image_classification/caffe2fluid/kaffe/caffe/__init__.py b/fluid/image_classification/caffe2fluid/kaffe/caffe/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8d53dee29d79721cfef275fcc0592fa8310acd34 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/caffe/__init__.py @@ -0,0 +1 @@ +from .resolver import get_caffe_resolver, has_pycaffe diff --git a/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py b/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbd48d3ade5ab4b812210acf82be625871740cb --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py @@ -0,0 +1,61 @@ +import os +import sys + +SHARED_CAFFE_RESOLVER = None + + +def import_caffepb(): + p = os.path.realpath(__file__) + p = os.path.dirname(p) + p = os.path.join(p, '../../proto') + sys.path.insert(0, p) + import caffepb + return caffepb + + +class CaffeResolver(object): + def __init__(self): + self.import_caffe() + + def import_caffe(self): + self.caffe = None + try: + # Try to import PyCaffe first + import caffe + self.caffe = caffe + except ImportError: + # Fall back to the protobuf implementation + self.caffepb = import_caffepb() + show_fallback_warning() + if self.caffe: + # Use the protobuf code from the imported distribution. + # This way, Caffe variants with custom layers will work. + self.caffepb = self.caffe.proto.caffe_pb2 + self.NetParameter = self.caffepb.NetParameter + + def has_pycaffe(self): + return self.caffe is not None + + +def get_caffe_resolver(): + global SHARED_CAFFE_RESOLVER + if SHARED_CAFFE_RESOLVER is None: + SHARED_CAFFE_RESOLVER = CaffeResolver() + return SHARED_CAFFE_RESOLVER + + +def has_pycaffe(): + return get_caffe_resolver().has_pycaffe() + + +def show_fallback_warning(): + msg = ''' +------------------------------------------------------------ + WARNING: PyCaffe not found! + Falling back to a pure protocol buffer implementation. + * Conversions will be drastically slower. + * This backend is UNTESTED! +------------------------------------------------------------ + +''' + sys.stderr.write(msg) diff --git a/fluid/image_classification/caffe2fluid/kaffe/errors.py b/fluid/image_classification/caffe2fluid/kaffe/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..75eced5778a1f9abc8d6700ef5342b02462f6db3 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/errors.py @@ -0,0 +1,34 @@ +import sys + +#debug level, can be 'warn', 'verbose' +log_level = 'warn' + + +class KaffeError(Exception): + pass + + +def print_stderr(msg): + sys.stderr.write('%s\n' % msg) + + +def debug(msg): + if log_level == 'verbose': + print_stderr('[DEBUG]' + msg) + + +def notice(msg): + print_stderr('[NOTICE]' + msg) + + +def warn(msg): + print_stderr('[WARNING]' + msg) + + +def set_loglevel(level): + global log_level + + if 'warn' != level and 'verbose' != level: + raise Exception('not supported log level[%s]' % (level)) + + log_level = level diff --git a/fluid/image_classification/caffe2fluid/kaffe/graph.py b/fluid/image_classification/caffe2fluid/kaffe/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..cb751dffa1ca9cc19214bed12681312942046df6 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/graph.py @@ -0,0 +1,302 @@ +from google.protobuf import text_format + +from .caffe import get_caffe_resolver +from .errors import KaffeError, print_stderr +from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch +from .shapes import TensorShape + + +class Node(object): + def __init__(self, name, kind, layer=None): + self.name = name + self.kind = kind + self.layer = LayerAdapter(layer, kind) if layer else None + self.parents = [] + self.children = [] + self.data = None + self.output_shape = None + self.metadata = {} + + def add_parent(self, parent_node): + assert parent_node not in self.parents + self.parents.append(parent_node) + if self not in parent_node.children: + parent_node.children.append(self) + + def add_child(self, child_node): + assert child_node not in self.children + self.children.append(child_node) + if self not in child_node.parents: + child_node.parents.append(self) + + def get_only_parent(self): + if len(self.parents) != 1: + raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' % + (self, len(self.parents))) + return self.parents[0] + + @property + def parameters(self): + if self.layer is not None: + return self.layer.parameters + return None + + def __str__(self): + return '[%s] %s' % (self.kind, self.name) + + def __repr__(self): + return '%s (0x%x)' % (self.name, id(self)) + + +class Graph(object): + def __init__(self, nodes=None, name=None): + self.nodes = nodes or [] + self.node_lut = {node.name: node for node in self.nodes} + self.name = name + + def add_node(self, node): + self.nodes.append(node) + self.node_lut[node.name] = node + + def get_node(self, name): + try: + return self.node_lut[name] + except KeyError: + raise KaffeError('Layer not found: %s' % name) + + def get_input_nodes(self): + return [node for node in self.nodes if len(node.parents) == 0] + + def get_output_nodes(self): + return [node for node in self.nodes if len(node.children) == 0] + + def topologically_sorted(self): + sorted_nodes = [] + unsorted_nodes = list(self.nodes) + temp_marked = set() + perm_marked = set() + + def visit(node): + if node in temp_marked: + raise KaffeError('Graph is not a DAG.') + if node in perm_marked: + return + temp_marked.add(node) + for child in node.children: + visit(child) + perm_marked.add(node) + temp_marked.remove(node) + sorted_nodes.insert(0, node) + + while len(unsorted_nodes): + visit(unsorted_nodes.pop()) + return sorted_nodes + + def compute_output_shapes(self): + sorted_nodes = self.topologically_sorted() + for node in sorted_nodes: + node.output_shape = TensorShape( + *NodeKind.compute_output_shape(node)) + + def replaced(self, new_nodes): + return Graph(nodes=new_nodes, name=self.name) + + def transformed(self, transformers): + graph = self + for transformer in transformers: + graph = transformer(graph) + if graph is None: + raise KaffeError('Transformer failed: {}'.format(transformer)) + assert isinstance(graph, Graph) + return graph + + def __contains__(self, key): + return key in self.node_lut + + def __str__(self): + hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', + 'Output') + s = [hdr, '-' * 94] + for node in self.topologically_sorted(): + # If the node has learned parameters, display the first one's shape. + # In case of convolutions, this corresponds to the weights. + data_shape = node.data[0].shape if node.data else '--' + out_shape = node.output_shape or '--' + s.append('{:<20} {:<30} {:>20} {:>20}'.format( + node.kind, node.name, data_shape, tuple(out_shape))) + return '\n'.join(s) + + +class GraphBuilder(object): + '''Constructs a model graph from a Caffe protocol buffer definition.''' + + def __init__(self, def_path, phase='test'): + ''' + def_path: Path to the model definition (.prototxt) + data_path: Path to the model data (.caffemodel) + phase: Either 'test' or 'train'. Used for filtering phase-specific nodes. + ''' + self.def_path = def_path + self.phase = phase + self.load() + + def load(self): + '''Load the layer definitions from the prototxt.''' + self.params = get_caffe_resolver().NetParameter() + with open(self.def_path, 'rb') as def_file: + text_format.Merge(def_file.read(), self.params) + + def filter_layers(self, layers): + '''Filter out layers based on the current phase.''' + phase_map = {0: 'train', 1: 'test'} + filtered_layer_names = set() + filtered_layers = [] + for layer in layers: + phase = self.phase + if len(layer.include): + phase = phase_map[layer.include[0].phase] + if len(layer.exclude): + phase = phase_map[1 - layer.include[0].phase] + exclude = (phase != self.phase) + # Dropout layers appear in a fair number of Caffe + # test-time networks. These are just ignored. We'll + # filter them out here. + if (not exclude) and (phase == 'test'): + exclude = (layer.type == LayerType.Dropout) + if not exclude: + filtered_layers.append(layer) + # Guard against dupes. + assert layer.name not in filtered_layer_names + filtered_layer_names.add(layer.name) + return filtered_layers + + def make_node(self, layer): + '''Create a graph node for the given layer.''' + kind = NodeKind.map_raw_kind(layer.type) + if kind is None: + raise KaffeError('Unknown layer type encountered: %s' % layer.type) + # We want to use the layer's top names (the "output" names), rather than the + # name attribute, which is more of readability thing than a functional one. + # Other layers will refer to a node by its "top name". + return Node(layer.name, kind, layer=layer) + + def make_input_nodes(self): + ''' + Create data input nodes. + + This method is for old-style inputs, where the input specification + was not treated as a first-class layer in the prototext. + Newer models use the "Input layer" type. + ''' + nodes = [Node(name, NodeKind.Data) for name in self.params.input] + if len(nodes): + input_dim = map(int, self.params.input_dim) + if not input_dim: + if len(self.params.input_shape) > 0: + input_dim = map(int, self.params.input_shape[0].dim) + else: + raise KaffeError('Dimensions for input not specified.') + for node in nodes: + node.output_shape = tuple(input_dim) + return nodes + + def build(self): + ''' + Builds the graph from the Caffe layer definitions. + ''' + # Get the layers + layers = self.params.layers or self.params.layer + # Filter out phase-excluded layers + layers = self.filter_layers(layers) + # Get any separately-specified input layers + nodes = self.make_input_nodes() + nodes += [self.make_node(layer) for layer in layers] + # Initialize the graph + graph = Graph(nodes=nodes, name=self.params.name) + # Connect the nodes + # + # A note on layers and outputs: + # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs + # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom + # (in case of in-place operations). Note that the layer's name is not used for establishing + # any connectivity. It's only used for data association. By convention, a layer with a + # single top will often use the same name (although this is not required). + # + # The current implementation only supports single-output nodes (note that a node can still + # have multiple children, since multiple child nodes can refer to the single top's name). + node_outputs = {} + for layer in layers: + node = graph.get_node(layer.name) + for input_name in layer.bottom: + assert input_name != layer.name + parent_node = node_outputs.get(input_name) + if (parent_node is None) or (parent_node == node): + parent_node = graph.get_node(input_name) + node.add_parent(parent_node) + if len(layer.top) > 1: + raise KaffeError('Multiple top nodes are not supported.') + for output_name in layer.top: + if output_name == layer.name: + # Output is named the same as the node. No further action required. + continue + # There are two possibilities here: + # + # Case 1: output_name refers to another node in the graph. + # This is an "in-place operation" that overwrites an existing node. + # This would create a cycle in the graph. We'll undo the in-placing + # by substituting this node wherever the overwritten node is referenced. + # + # Case 2: output_name violates the convention layer.name == output_name. + # Since we are working in the single-output regime, we will can rename it to + # match the layer name. + # + # For both cases, future references to this top re-routes to this node. + node_outputs[output_name] = node + + graph.compute_output_shapes() + return graph + + +class NodeMapper(NodeDispatch): + def __init__(self, graph): + self.graph = graph + + def map(self): + nodes = self.graph.topologically_sorted() + # Remove input nodes - we'll handle them separately. + input_nodes = self.graph.get_input_nodes() + nodes = [t for t in nodes if t not in input_nodes] + # Decompose DAG into chains. + chains = [] + for node in nodes: + attach_to_chain = None + if len(node.parents) == 1: + parent = node.get_only_parent() + for chain in chains: + if chain[-1] == parent: + # Node is part of an existing chain. + attach_to_chain = chain + break + if attach_to_chain is None: + # Start a new chain for this node. + attach_to_chain = [] + chains.append(attach_to_chain) + attach_to_chain.append(node) + # Map each chain. + mapped_chains = [] + for chain in chains: + mapped_chains.append(self.map_chain(chain)) + return self.commit(mapped_chains) + + def map_chain(self, chain): + return [self.map_node(node) for node in chain] + + def map_node(self, node): + map_func = self.get_handler(node.kind, 'map') + mapped_node = map_func(node) + assert mapped_node is not None + mapped_node.node = node + return mapped_node + + def commit(self, mapped_chains): + raise NotImplementedError('Must be implemented by subclass.') diff --git a/fluid/image_classification/caffe2fluid/kaffe/layers.py b/fluid/image_classification/caffe2fluid/kaffe/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..6be35ed727fed76a1c96017455bdaa354ace9f97 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/layers.py @@ -0,0 +1,152 @@ +import re +import numbers +from collections import namedtuple + +from .shapes import * + +LAYER_DESCRIPTORS = { + + # Caffe Types + 'AbsVal': shape_identity, + 'Accuracy': shape_scalar, + 'ArgMax': shape_not_implemented, + 'BatchNorm': shape_identity, + 'BNLL': shape_not_implemented, + 'Concat': shape_concat, + 'ContrastiveLoss': shape_scalar, + 'Convolution': shape_convolution, + 'Deconvolution': shape_not_implemented, + 'Data': shape_data, + 'Dropout': shape_identity, + 'DummyData': shape_data, + 'EuclideanLoss': shape_scalar, + 'Eltwise': shape_identity, + 'Exp': shape_identity, + 'Flatten': shape_not_implemented, + 'HDF5Data': shape_data, + 'HDF5Output': shape_identity, + 'HingeLoss': shape_scalar, + 'Im2col': shape_not_implemented, + 'ImageData': shape_data, + 'InfogainLoss': shape_scalar, + 'InnerProduct': shape_inner_product, + 'Input': shape_data, + 'LRN': shape_identity, + 'MemoryData': shape_mem_data, + 'MultinomialLogisticLoss': shape_scalar, + 'MVN': shape_not_implemented, + 'Pooling': shape_pool, + 'Power': shape_identity, + 'ReLU': shape_identity, + 'Scale': shape_identity, + 'Sigmoid': shape_identity, + 'SigmoidCrossEntropyLoss': shape_scalar, + 'Silence': shape_not_implemented, + 'Softmax': shape_identity, + 'SoftmaxWithLoss': shape_scalar, + 'Split': shape_not_implemented, + 'Slice': shape_not_implemented, + 'TanH': shape_identity, + 'WindowData': shape_not_implemented, + 'Threshold': shape_identity, +} + +LAYER_TYPES = LAYER_DESCRIPTORS.keys() + +LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES}) + + +class NodeKind(LayerType): + @staticmethod + def map_raw_kind(kind): + if kind in LAYER_TYPES: + return kind + return None + + @staticmethod + def compute_output_shape(node): + try: + val = LAYER_DESCRIPTORS[node.kind](node) + return val + except NotImplementedError: + raise KaffeError( + 'Output shape computation not implemented for type: %s' % + node.kind) + + +class NodeDispatchError(KaffeError): + + pass + + +class NodeDispatch(object): + @staticmethod + def get_handler_name(node_kind): + if len(node_kind) <= 4: + # A catch-all for things like ReLU and tanh + return node_kind.lower() + # Convert from CamelCase to under_scored + name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() + + def get_handler(self, node_kind, prefix): + name = self.get_handler_name(node_kind) + name = '_'.join((prefix, name)) + try: + return getattr(self, name) + except AttributeError: + raise NodeDispatchError( + 'No handler found for node kind: %s (expected: %s)' % + (node_kind, name)) + + +class LayerAdapter(object): + def __init__(self, layer, kind): + self.layer = layer + self.kind = kind + + @property + def parameters(self): + name = NodeDispatch.get_handler_name(self.kind) + name = '_'.join((name, 'param')) + try: + return getattr(self.layer, name) + except AttributeError: + raise NodeDispatchError( + 'Caffe parameters not found for layer kind: %s' % (self.kind)) + + @staticmethod + def get_kernel_value(scalar, repeated, idx, default=None): + if scalar: + return scalar + if repeated: + if isinstance(repeated, numbers.Number): + return repeated + if len(repeated) == 1: + # Same value applies to all spatial dimensions + return int(repeated[0]) + assert idx < len(repeated) + # Extract the value for the given spatial dimension + return repeated[idx] + if default is None: + raise ValueError('Unable to determine kernel parameter!') + return default + + @property + def kernel_parameters(self): + assert self.kind in (NodeKind.Convolution, NodeKind.Pooling) + params = self.parameters + k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) + k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) + s_h = self.get_kernel_value( + params.stride_h, params.stride, 0, default=1) + s_w = self.get_kernel_value( + params.stride_w, params.stride, 1, default=1) + p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) + p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0) + return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w) + + +KernelParameters = namedtuple('KernelParameters', [ + 'kernel_h', 'kernel_w', 'stride_h', 'stride_w', 'pad_h', 'pad_w' +]) diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/__init__.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..685b653c392312ac3868b04f9dfb01b80535f677 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/__init__.py @@ -0,0 +1,2 @@ +from .transformer import Transformer +from .network import Network diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py new file mode 100644 index 0000000000000000000000000000000000000000..620a84e8f1289672151f1f280559a56b37995ce0 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py @@ -0,0 +1,260 @@ +import math +import os +import numpy as np + + +def import_fluid(): + import paddle.v2.fluid as fluid + return fluid + + +def layer(op): + '''Decorator for composable network layers.''' + + def layer_decorated(self, *args, **kwargs): + # Automatically set a name if not provided. + name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) + # Figure out the layer inputs. + if len(self.terminals) == 0: + raise RuntimeError('No input variables found for layer %s.' % name) + elif len(self.terminals) == 1: + layer_input = self.terminals[0] + else: + layer_input = list(self.terminals) + # Perform the operation and get the output. + layer_output = op(self, layer_input, *args, **kwargs) + # Add to layer LUT. + self.layers[name] = layer_output + # This output is now the input for the next layer. + self.feed(layer_output) + # Return self for chained calls. + return self + + return layer_decorated + + +class Network(object): + def __init__(self, inputs, trainable=True): + # The input nodes for this network + self.inputs = inputs + # The current list of terminal nodes + self.terminals = [] + # Mapping from layer names to layers + self.layers = dict(inputs) + # If true, the resulting variables are set as trainable + self.trainable = trainable + # Switch variable for dropout + self.paddle_env = None + self.setup() + + def setup(self): + '''Construct the network. ''' + raise NotImplementedError('Must be implemented by the subclass.') + + def load(self, data_path, exe=None, place=None, ignore_missing=False): + '''Load network weights. + data_path: The path to the numpy-serialized network weights + ignore_missing: If true, serialized weights for missing layers are ignored. + ''' + fluid = import_fluid() + #load fluid mode directly + if os.path.isdir(data_path): + assert (exe is not None), \ + 'must provide a executor to load fluid model' + fluid.io.load_persistables_if_exist(executor=exe, dirname=data_path) + return True + + #load model from a npy file + if exe is None or place is None: + if self.paddle_env is None: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + self.paddle_env = {'place': place, 'exe': exe} + exe = exe.run(fluid.default_startup_program()) + else: + place = self.paddle_env['place'] + exe = self.paddle_env['exe'] + + data_dict = np.load(data_path).item() + for op_name in data_dict: + layer = self.layers[op_name] + for param_name, data in data_dict[op_name].iteritems(): + try: + name = '%s_%s' % (op_name, param_name) + v = fluid.global_scope().find_var(name) + w = v.get_tensor() + w.set(data, place) + except ValueError: + if not ignore_missing: + raise + return True + + def feed(self, *args): + '''Set the input(s) for the next operation by replacing the terminal nodes. + The arguments can be either layer names or the actual layers. + ''' + assert len(args) != 0 + self.terminals = [] + for fed_layer in args: + if isinstance(fed_layer, basestring): + try: + fed_layer = self.layers[fed_layer] + except KeyError: + raise KeyError('Unknown layer name fed: %s' % fed_layer) + self.terminals.append(fed_layer) + return self + + def get_output(self): + '''Returns the current network output.''' + return self.terminals[-1] + + def get_unique_name(self, prefix): + '''Returns an index-suffixed unique name for the given prefix. + This is used for auto-generating layer names based on the type-prefix. + ''' + ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 + return '%s_%d' % (prefix, ident) + + @layer + def conv(self, + input, + k_h, + k_w, + c_o, + s_h, + s_w, + name, + relu=True, + padding=None, + group=1, + biased=True): + if padding is None: + padding = [0, 0] + + # Get the number of channels in the input + c_i, h_i, w_i = input.shape[1:] + + # Verify that the grouping parameter is valid + assert c_i % group == 0 + assert c_o % group == 0 + + fluid = import_fluid() + prefix = name + '_' + output = fluid.layers.conv2d( + input=input, + filter_size=[k_h, k_w], + num_filters=c_o, + stride=[s_h, s_w], + padding=padding, + groups=group, + param_attr=fluid.ParamAttr(name=prefix + "weights"), + bias_attr=fluid.ParamAttr(name=prefix + "biases"), + act="relu" if relu is True else None) + return output + + @layer + def relu(self, input, name): + fluid = import_fluid() + output = fluid.layers.relu(x=input) + return output + + @layer + def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None): + if padding is None: + padding = [0, 0] + + # Get the number of channels in the input + h_i, w_i = input.shape[2:] + fluid = import_fluid() + output = fluid.layers.pool2d( + input=input, + pool_size=[k_h, k_w], + pool_stride=[s_h, s_w], + pool_padding=padding, + pool_type='max') + return output + + @layer + def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None): + if padding is None: + padding = [0, 0] + + # Get the number of channels in the input + h_i, w_i = input.shape[2:] + fluid = import_fluid() + output = fluid.layers.pool2d( + input=input, + pool_size=[k_h, k_w], + pool_stride=[s_h, s_w], + pool_padding=padding, + pool_type='avg') + return output + + @layer + def lrn(self, input, radius, alpha, beta, name, bias=1.0): + raise Exception('lrn() not implemented yet') + + @layer + def concat(self, inputs, axis, name): + fluid = import_fluid() + output = fluid.layers.concat(input=inputs, axis=axis) + return output + + @layer + def add(self, inputs, name): + fluid = import_fluid() + output = inputs[0] + for i in inputs[1:]: + output = fluid.layers.elementwise_add(x=output, y=i) + return output + + @layer + def fc(self, input, num_out, name, relu=True, act=None): + fluid = import_fluid() + + if act is None: + act = 'relu' if relu is True else None + + prefix = name + '_' + output = fluid.layers.fc( + name=name, + input=input, + size=num_out, + act=act, + param_attr=fluid.ParamAttr(name=prefix + 'weights'), + bias_attr=fluid.ParamAttr(name=prefix + 'biases')) + return output + + @layer + def softmax(self, input, name): + fluid = import_fluid() + output = fluid.layers.softmax(x=input, name=name) + return output + + @layer + def batch_normalization(self, input, name, scale_offset=True, relu=False): + # NOTE: Currently, only inference is supported + fluid = import_fluid() + prefix = name + '_' + param_attr = None if scale_offset is False else fluid.ParamAttr( + name=prefix + 'scale') + bias_attr = None if scale_offset is False else fluid.ParamAttr( + name=prefix + 'offset') + mean_name = prefix + 'mean' + variance_name = prefix + 'variance' + output = fluid.layers.batch_norm( + name=name, + input=input, + is_test=True, + param_attr=param_attr, + bias_attr=bias_attr, + moving_mean_name=mean_name, + moving_variance_name=variance_name, + epsilon=1e-5, + act='relu' if relu is True else None) + + return output + + @layer + def dropout(self, input, keep_prob, name): + raise Exception('dropout() not implemented yet') diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py new file mode 100644 index 0000000000000000000000000000000000000000..92b9d32a3a755d8e6a2a8739cc3f42f9c8564b40 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py @@ -0,0 +1,353 @@ +import numpy as np + +from ..errors import KaffeError, print_stderr +from ..graph import GraphBuilder, NodeMapper +from ..layers import NodeKind +from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, + BatchNormScaleBiasFuser, BatchNormPreprocessor, + ParameterNamer) +from . import network + + +def get_padding_type(kernel_params, input_shape, output_shape): + '''Translates Caffe's numeric padding to one of ('SAME', 'VALID'). + Caffe supports arbitrary padding values, while TensorFlow only + supports 'SAME' and 'VALID' modes. So, not all Caffe paddings + can be translated to TensorFlow. There are some subtleties to + how the padding edge-cases are handled. These are described here: + https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto + ''' + k_h, k_w, s_h, s_w, p_h, p_w = kernel_params + if p_h * p_w > 0: + return [p_h, p_w] + else: + return None + + +class TensorFlowNode(object): + '''An intermediate representation for TensorFlow operations.''' + + def __init__(self, op, *args, **kwargs): + # A string corresponding to the TensorFlow operation + self.op = op + # Positional arguments for the operation + self.args = args + # Keyword arguments for the operation + self.kwargs = list(kwargs.items()) + # The source Caffe node + self.node = None + + def format(self, arg): + '''Returns a string representation for the given value.''' + return "'%s'" % arg if isinstance(arg, basestring) else str(arg) + + def pair(self, key, value): + '''Returns key=formatted(value).''' + return '%s=%s' % (key, self.format(value)) + + def emit(self): + '''Emits the Python source for this node.''' + # Format positional arguments + args = map(self.format, self.args) + # Format any keyword arguments + if self.kwargs: + args += [self.pair(k, v) for k, v in self.kwargs] + # Set the node name + args.append(self.pair('name', self.node.name)) + args = ', '.join(args) + return '%s(%s)' % (self.op, args) + + +class MaybeActivated(object): + def __init__(self, node, default=True): + self.inject_kwargs = {} + if node.metadata.get('relu', False) != default: + self.inject_kwargs['relu'] = not default + + def __call__(self, *args, **kwargs): + kwargs.update(self.inject_kwargs) + return TensorFlowNode(*args, **kwargs) + + +class TensorFlowMapper(NodeMapper): + def get_kernel_params(self, node): + kernel_params = node.layer.kernel_parameters + input_shape = node.get_only_parent().output_shape + padding = get_padding_type(kernel_params, input_shape, + node.output_shape) + # Only emit the padding if it's not the default value. + padding = {'padding': padding} if padding is not None else {} + return (kernel_params, padding) + + def map_convolution(self, node): + (kernel_params, kwargs) = self.get_kernel_params(node) + h = kernel_params.kernel_h + w = kernel_params.kernel_w + c_o = node.output_shape[1] + c_i = node.parents[0].output_shape[1] + group = node.parameters.group + if group != 1: + kwargs['group'] = group + if not node.parameters.bias_term: + kwargs['biased'] = False + assert kernel_params.kernel_h == h + assert kernel_params.kernel_w == w + return MaybeActivated(node)( + 'conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o, + kernel_params.stride_h, kernel_params.stride_w, **kwargs) + + def map_relu(self, node): + return TensorFlowNode('relu') + + def map_pooling(self, node): + pool_type = node.parameters.pool + if pool_type == 0: + pool_op = 'max_pool' + elif pool_type == 1: + pool_op = 'avg_pool' + else: + # Stochastic pooling, for instance. + raise KaffeError('Unsupported pooling type.') + (kernel_params, padding) = self.get_kernel_params(node) + return TensorFlowNode(pool_op, kernel_params.kernel_h, + kernel_params.kernel_w, kernel_params.stride_h, + kernel_params.stride_w, **padding) + + def map_inner_product(self, node): + #TODO: Axis + assert node.parameters.axis == 1 + #TODO: Unbiased + assert node.parameters.bias_term == True + return MaybeActivated(node)('fc', node.parameters.num_output) + + def map_softmax(self, node): + return TensorFlowNode('softmax') + + def map_lrn(self, node): + params = node.parameters + # The window size must be an odd value. For a window + # size of (2*n+1), TensorFlow defines depth_radius = n. + assert params.local_size % 2 == 1 + # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow + # just scales by alpha (as does Krizhevsky's paper). + # We'll account for that here. + alpha = params.alpha / float(params.local_size) + return TensorFlowNode('lrn', + int(params.local_size / 2), alpha, params.beta) + + def map_concat(self, node): + return TensorFlowNode('concat', node.parameters.axis) + + def map_dropout(self, node): + return TensorFlowNode('dropout', node.parameters.dropout_ratio) + + def map_batch_norm(self, node): + scale_offset = len(node.data) == 4 + kwargs = {} if scale_offset else {'scale_offset': False} + return MaybeActivated( + node, default=False)('batch_normalization', **kwargs) + + def map_eltwise(self, node): + operations = {0: 'multiply', 1: 'add', 2: 'max'} + op_code = node.parameters.operation + try: + return TensorFlowNode(operations[op_code]) + except KeyError: + raise KaffeError('Unknown elementwise operation: {}'.format( + op_code)) + + def commit(self, chains): + return chains + + +class TensorFlowEmitter(object): + def __init__(self, tab=None): + self.tab = tab or ' ' * 4 + self.prefix = '' + self.net_name = '' + + def indent(self): + self.prefix += self.tab + + def outdent(self): + self.prefix = self.prefix[:-len(self.tab)] + + def statement(self, s): + return self.prefix + s + '\n' + + def emit_imports(self): + import inspect + codes = [] + codes.append( + '### generated by caffe2fluid, your net is in class "%s" ###\n' % + (self.net_name)) + network_source = inspect.getsource(network) + codes.append(network_source + '\n') + return self.statement('\n'.join(codes)) + + def emit_class_def(self, name): + return self.statement('class %s(Network):' % (name)) + + def emit_setup_def(self): + return self.statement('def setup(self):') + + def emit_convert_def(self, input_nodes): + def data_layer_def(name, shape, dtype=None): + if dtype is None: + dtype = 'float32' + + layer_var = name + '_layer' + shape = [str(s) for s in shape[1:]] + layer_def = '%s = fluid.layers.data(name="%s", shape=[%s], dtype="%s")'\ + % (layer_var, name, ','.join(shape), dtype) + return layer_var, layer_def + + codes = [] + inputs = {} + for n in input_nodes: + name = n.name + layer_var, layer_def = data_layer_def(n.name, n.output_shape) + codes.append(layer_def) + inputs[name] = layer_var + + input_dict = ','.join(['"%s": %s' % (n, l) for n, l in inputs.items()]) + + codes.append('feed_data = {' + input_dict + '}') + codes.append('net = cls(feed_data)') + + codes.append("place = fluid.CPUPlace()") + codes.append("exe = fluid.Executor(place)") + codes.append("exe.run(fluid.default_startup_program())") + codes.append("net.load(data_path=npy_model, exe=exe, place=place)") + codes.append( + "fluid.io.save_persistables(executor=exe, dirname=fluid_path)") + + self.outdent() + func_def = self.statement('@classmethod') + func_def += self.statement('def convert(cls, npy_model, fluid_path):') + self.indent() + func_def += self.statement('import paddle.v2.fluid as fluid') + for l in codes: + func_def += self.statement(l) + return '\n\n' + func_def + + def emit_main_def(self, name): + if name is None: + return '' + + self.prefix = '' + main_def = self.statement('if __name__ == "__main__":') + self.indent() + main_def += self.statement("#usage: python xxxnet.py xxx.npy ./model\n") + main_def += self.statement("import sys") + main_def += self.statement("npy_weight = sys.argv[1]") + main_def += self.statement("fluid_model = sys.argv[2]") + main_def += self.statement("%s.convert(npy_weight, fluid_model)" % + (name)) + main_def += self.statement("exit(0)") + return '\n\n' + main_def + + def emit_parents(self, chain): + assert len(chain) + s = 'self.feed(' + sep = ', \n' + self.prefix + (' ' * len(s)) + s += sep.join( + ["'%s'" % parent.name for parent in chain[0].node.parents]) + return self.statement(s + ')') + + def emit_node(self, node): + return self.statement('self.' + node.emit()) + + def emit(self, name, chains, input_nodes=None): + self.net_name = name + s = self.emit_imports() + s += self.emit_class_def(name) + self.indent() + s += self.emit_setup_def() + self.indent() + blocks = [] + for chain in chains: + b = '' + b += self.emit_parents(chain) + for node in chain: + b += self.emit_node(node) + blocks.append(b[:-1]) + s = s + '\n\n'.join(blocks) + s += self.emit_convert_def(input_nodes) + s += self.emit_main_def(name) + return s + + +class Transformer(object): + def __init__(self, def_path, data_path, verbose=True, phase='test'): + self.verbose = verbose + self.phase = phase + self.load(def_path, data_path, phase) + self.params = None + self.source = None + + def load(self, def_path, data_path, phase): + # Build the graph + graph = GraphBuilder(def_path, phase).build() + + if data_path is not None: + # Load and associate learned parameters + graph = DataInjector(def_path, data_path)(graph) + + # Transform the graph + transformers = [ + # Fuse split batch normalization layers + BatchNormScaleBiasFuser(), + + # Fuse ReLUs + # TODO: Move non-linearity application to layer wrapper, allowing + # any arbitrary operation to be optionally activated. + ReLUFuser(allowed_parent_types=[ + NodeKind.Convolution, NodeKind.InnerProduct, NodeKind.BatchNorm + ]), + + # Rename nodes + # Slashes are used for scoping in TensorFlow. Replace slashes + # in node names with underscores. + # (Caffe's GoogLeNet implementation uses slashes) + NodeRenamer(lambda node: node.name.replace('/', '_')) + ] + self.graph = graph.transformed(transformers) + + # Display the graph + if self.verbose: + print_stderr(self.graph) + + def transform_data(self): + if self.params is None: + transformers = [ + # Reshape the parameters to TensorFlow's ordering + DataReshaper({ + # (c_o, c_i, h, w) -> (h, w, c_i, c_o) for TF + NodeKind.Convolution: (0, 1, 2, 3), + + # (c_o, c_i) -> (c_i, c_o) + NodeKind.InnerProduct: (1, 0) + }), + + # Pre-process batch normalization data + BatchNormPreprocessor(), + + # Convert parameters to dictionaries + ParameterNamer(), + ] + self.graph = self.graph.transformed(transformers) + self.params = { + node.name: node.data + for node in self.graph.nodes if node.data + } + return self.params + + def transform_source(self): + if self.source is None: + mapper = TensorFlowMapper(self.graph) + chains = mapper.map() + emitter = TensorFlowEmitter() + input_nodes = self.graph.get_input_nodes() + self.source = emitter.emit(self.graph.name, chains, input_nodes) + return self.source diff --git a/fluid/image_classification/caffe2fluid/kaffe/shapes.py b/fluid/image_classification/caffe2fluid/kaffe/shapes.py new file mode 100644 index 0000000000000000000000000000000000000000..e8124730c66eaecb85f7aff58e08f6dc16668343 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/shapes.py @@ -0,0 +1,88 @@ +import math +from collections import namedtuple + +from .errors import KaffeError + +TensorShape = namedtuple('TensorShape', + ['batch_size', 'channels', 'height', 'width']) + + +def get_filter_output_shape(i_h, i_w, params, round_func): + o_h = (i_h + 2 * params.pad_h - params.kernel_h + ) / float(params.stride_h) + 1 + o_w = (i_w + 2 * params.pad_w - params.kernel_w + ) / float(params.stride_w) + 1 + return (int(round_func(o_h)), int(round_func(o_w))) + + +def get_strided_kernel_output_shape(node, round_func): + assert node.layer is not None + input_shape = node.get_only_parent().output_shape + o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width, + node.layer.kernel_parameters, round_func) + params = node.layer.parameters + has_c_o = hasattr(params, 'num_output') + c = params.num_output if has_c_o else input_shape.channels + return TensorShape(input_shape.batch_size, c, o_h, o_w) + + +def shape_not_implemented(node): + raise NotImplementedError + + +def shape_identity(node): + assert len(node.parents) > 0 + return node.parents[0].output_shape + + +def shape_scalar(node): + return TensorShape(1, 1, 1, 1) + + +def shape_data(node): + if node.output_shape: + # Old-style input specification + return node.output_shape + try: + # New-style input specification + return map(int, node.parameters.shape[0].dim) + except: + # We most likely have a data layer on our hands. The problem is, + # Caffe infers the dimensions of the data from the source (eg: LMDB). + # We want to avoid reading datasets here. Fail for now. + # This can be temporarily fixed by transforming the data layer to + # Caffe's "input" layer (as is usually used in the "deploy" version). + # TODO: Find a better solution for this. + raise KaffeError('Cannot determine dimensions of data layer.\n' + 'See comments in function shape_data for more info.') + + +def shape_mem_data(node): + params = node.parameters + return TensorShape(params.batch_size, params.channels, params.height, + params.width) + + +def shape_concat(node): + axis = node.layer.parameters.axis + output_shape = None + for parent in node.parents: + if output_shape is None: + output_shape = list(parent.output_shape) + else: + output_shape[axis] += parent.output_shape[axis] + return tuple(output_shape) + + +def shape_convolution(node): + return get_strided_kernel_output_shape(node, math.floor) + + +def shape_pool(node): + return get_strided_kernel_output_shape(node, math.ceil) + + +def shape_inner_product(node): + input_shape = node.get_only_parent().output_shape + return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, + 1, 1) diff --git a/fluid/image_classification/caffe2fluid/kaffe/transformers.py b/fluid/image_classification/caffe2fluid/kaffe/transformers.py new file mode 100644 index 0000000000000000000000000000000000000000..9d300ca9c90672c3f3a3dbf7a14e48db6bb48f70 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/kaffe/transformers.py @@ -0,0 +1,303 @@ +''' +A collection of graph transforms. + +A transformer is a callable that accepts a graph and returns a transformed version. +''' +import os +import numpy as np + +from .caffe import get_caffe_resolver, has_pycaffe +from .errors import KaffeError, debug, notice, warn +from .layers import NodeKind + + +class DataInjector(object): + ''' + Associates parameters loaded from a .caffemodel file with their corresponding nodes. + ''' + + def __init__(self, def_path, data_path): + # The .prototxt file defining the graph + self.def_path = def_path + # The .caffemodel file containing the learned parameters + self.data_path = data_path + # Set to true if the fallback protocol-buffer based backend was used + self.did_use_pb = False + # A list containing (layer name, parameters) tuples + self.params = None + # Load the parameters + self.load() + + def load(self): + if has_pycaffe(): + self.load_using_caffe() + else: + self.load_using_pb() + + def load_using_caffe(self): + caffe = get_caffe_resolver().caffe + net = caffe.Net(self.def_path, self.data_path, caffe.TEST) + data = lambda blob: blob.data + self.params = [(k, map(data, v)) for k, v in net.params.items()] + + def load_using_pb(self): + data = get_caffe_resolver().NetParameter() + data.MergeFromString(open(self.data_path, 'rb').read()) + pair = lambda layer: (layer.name, self.normalize_pb_data(layer)) + layers = data.layers or data.layer + self.params = [pair(layer) for layer in layers if layer.blobs] + self.did_use_pb = True + + def normalize_pb_data(self, layer): + transformed = [] + for blob in layer.blobs: + if len(blob.shape.dim): + dims = blob.shape.dim + c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) + else: + c_o = blob.num + c_i = blob.channels + h = blob.height + w = blob.width + data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w) + transformed.append(data) + return transformed + + def adjust_parameters(self, node, data): + if not self.did_use_pb: + return data + # When using the protobuf-backend, each parameter initially has four dimensions. + # In certain cases (like FC layers), we want to eliminate the singleton dimensions. + # This implementation takes care of the common cases. However, it does leave the + # potential for future issues. + # The Caffe-backend does not suffer from this problem. + data = list(data) + squeeze_indices = [1] # Squeeze biases. + if node.kind == NodeKind.InnerProduct: + squeeze_indices.append(0) # Squeeze FC. + + for idx in squeeze_indices: + if idx >= len(data): + continue + + shape_old = data[idx].shape + data[idx] = np.squeeze(data[idx]) + shape_new = data[idx].shape + if len(shape_old) != shape_new: + debug('squeeze idx:%d, with kind:%s,name:%s' % \ + (idx, node.kind, node.name)) + return data + + def __call__(self, graph): + for layer_name, data in self.params: + if layer_name in graph: + node = graph.get_node(layer_name) + node.data = self.adjust_parameters(node, data) + else: + notice('Ignoring parameters for non-existent layer: %s' % \ + layer_name) + return graph + + +class DataReshaper(object): + def __init__(self, mapping, replace=True): + # A dictionary mapping NodeKind to the transposed order. + self.mapping = mapping + # The node kinds eligible for reshaping + self.reshaped_node_types = self.mapping.keys() + # If true, the reshaped data will replace the old one. + # Otherwise, it's set to the reshaped_data attribute. + self.replace = replace + + def has_spatial_parent(self, node): + try: + parent = node.get_only_parent() + s = parent.output_shape + return s.height > 1 or s.width > 1 + except KaffeError: + return False + + def map(self, node_kind): + try: + return self.mapping[node_kind] + except KeyError: + raise + #raise KaffeError('Ordering not found for node kind: {}'.format(node_kind)) + + def __call__(self, graph): + for node in graph.nodes: + if node.data is None: + continue + if node.kind not in self.reshaped_node_types: + # Check for 2+ dimensional data + if any(len(tensor.shape) > 1 for tensor in node.data): + notice('parmaters not reshaped for node: {}'.format(node)) + continue + transpose_order = self.map(node.kind) + weights = node.data[0] + if (node.kind == NodeKind.InnerProduct + ) and self.has_spatial_parent(node): + # The FC layer connected to the spatial layer needs to be + # re-wired to match the new spatial ordering. + in_shape = node.get_only_parent().output_shape + fc_shape = weights.shape + output_channels = fc_shape[0] + weights = weights.reshape((output_channels, -1)) + weights = weights.transpose(transpose_order) + node.reshaped_data = weights + else: + node.reshaped_data = weights.transpose(transpose_order) + + if self.replace: + for node in graph.nodes: + if hasattr(node, 'reshaped_data'): + # Set the weights + node.data[0] = node.reshaped_data + del node.reshaped_data + return graph + + +class SubNodeFuser(object): + ''' + An abstract helper for merging a single-child with its single-parent. + ''' + + def __call__(self, graph): + nodes = graph.nodes + fused_nodes = [] + for node in nodes: + if len(node.parents) != 1: + # We're only fusing nodes with single parents + continue + parent = node.get_only_parent() + if len(parent.children) != 1: + # We can only fuse a node if its parent's + # value isn't used by any other node. + continue + if not self.is_eligible_pair(parent, node): + continue + # Rewrite the fused node's children to its parent. + for child in node.children: + child.parents.remove(node) + parent.add_child(child) + # Disconnect the fused node from the graph. + parent.children.remove(node) + fused_nodes.append(node) + # Let the sub-class merge the fused node in any arbitrary way. + self.merge(parent, node) + transformed_nodes = [node for node in nodes if node not in fused_nodes] + return graph.replaced(transformed_nodes) + + def is_eligible_pair(self, parent, child): + '''Returns true if this parent/child pair is eligible for fusion.''' + raise NotImplementedError('Must be implemented by subclass.') + + def merge(self, parent, child): + '''Merge the child node into the parent.''' + raise NotImplementedError('Must be implemented by subclass') + + +class ReLUFuser(SubNodeFuser): + ''' + Fuses rectified linear units with their parent nodes. + ''' + + def __init__(self, allowed_parent_types=None): + # Fuse ReLUs when the parent node is one of the given types. + # If None, all node types are eligible. + self.allowed_parent_types = allowed_parent_types + + def is_eligible_pair(self, parent, child): + return ((self.allowed_parent_types is None or \ + parent.kind in self.allowed_parent_types) and \ + child.kind == NodeKind.ReLU) + + def merge(self, parent, _): + parent.metadata['relu'] = True + + +class BatchNormScaleBiasFuser(SubNodeFuser): + ''' + The original batch normalization paper includes two learned + parameters: a scaling factor \gamma and a bias \beta. + Caffe's implementation does not include these two. However, it is commonly + replicated by adding a scaling+bias layer immidiately after the batch norm. + + This fuser merges the scaling+bias layer with the batch norm. + ''' + + def is_eligible_pair(self, parent, child): + return (parent.kind == NodeKind.BatchNorm and \ + child.kind == NodeKind.Scale and \ + child.parameters.axis == 1 and \ + child.parameters.bias_term == True) + + def merge(self, parent, child): + parent.scale_bias_node = child + + +class BatchNormPreprocessor(object): + ''' + Prescale batch normalization parameters. + Concatenate gamma (scale) and beta (bias) terms if set. + ''' + + def __call__(self, graph): + for node in graph.nodes: + if node.kind != NodeKind.BatchNorm: + continue + assert node.data is not None + assert len(node.data) == 3 + node.data = [np.squeeze(i) for i in node.data] + mean, variance, scale = node.data + # Prescale the stats + scaling_factor = 1.0 / scale if scale != 0 else 0 + mean *= scaling_factor + variance *= scaling_factor + # Replace with the updated values + node.data = [mean, variance] + if hasattr(node, 'scale_bias_node'): + # Include the scale and bias terms + gamma, beta = node.scale_bias_node.data + node.data += [np.squeeze(i) for i in [gamma, beta]] + return graph + + +class NodeRenamer(object): + ''' + Renames nodes in the graph using a given unary function that + accepts a node and returns its new name. + ''' + + def __init__(self, renamer): + self.renamer = renamer + + def __call__(self, graph): + for node in graph.nodes: + node.name = self.renamer(node) + return graph + + +class ParameterNamer(object): + ''' + Convert layer data arrays to a dictionary mapping parameter names to their values. + ''' + + def __call__(self, graph): + for node in graph.nodes: + if node.data is None: + continue + if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct): + names = ('weights', ) + if node.parameters.bias_term: + names += ('biases', ) + elif node.kind == NodeKind.BatchNorm: + names = ('mean', 'variance') + if len(node.data) == 4: + names += ('scale', 'offset') + else: + warn('Unhandled parameters: {}'.format(node.kind)) + continue + assert len(names) == len(node.data) + node.data = dict(zip(names, node.data)) + return graph diff --git a/fluid/image_classification/caffe2fluid/proto/caffe.proto b/fluid/image_classification/caffe2fluid/proto/caffe.proto new file mode 100644 index 0000000000000000000000000000000000000000..18eb5ca6491cbc59297c36854ddbd2a46ebfab9e --- /dev/null +++ b/fluid/image_classification/caffe2fluid/proto/caffe.proto @@ -0,0 +1,1411 @@ +syntax = "proto2"; + +package caffe; + +// Specifies the shape (dimensions) of a Blob. +message BlobShape { repeated int64 dim = 1 [ packed = true ]; } + +message BlobProto { + optional BlobShape shape = 7; + repeated float data = 5 [ packed = true ]; + repeated float diff = 6 [ packed = true ]; + repeated double double_data = 8 [ packed = true ]; + repeated double double_diff = 9 [ packed = true ]; + + // 4D dimensions -- deprecated. Use "shape" instead. + optional int32 num = 1 [ default = 0 ]; + optional int32 channels = 2 [ default = 0 ]; + optional int32 height = 3 [ default = 0 ]; + optional int32 width = 4 [ default = 0 ]; +} + +// The BlobProtoVector is simply a way to pass multiple blobproto instances +// around. +message BlobProtoVector { repeated BlobProto blobs = 1; } + +message Datum { + optional int32 channels = 1; + optional int32 height = 2; + optional int32 width = 3; + // the actual image data, in bytes + optional bytes data = 4; + optional int32 label = 5; + // Optionally, the datum could also hold float data. + repeated float float_data = 6; + // If true data contains an encoded image that need to be decoded + optional bool encoded = 7 [ default = false ]; +} + +message FillerParameter { + // The filler type. + optional string type = 1 [ default = 'constant' ]; + optional float value = 2 [ default = 0 ]; // the value in constant filler + optional float min = 3 [ default = 0 ]; // the min value in uniform filler + optional float max = 4 [ default = 1 ]; // the max value in uniform filler + optional float mean = 5 [ default = 0 ]; // the mean value in Gaussian filler + optional float std = 6 [ default = 1 ]; // the std value in Gaussian filler + // The expected number of non-zero output weights for a given input in + // Gaussian filler -- the default -1 means don't perform sparsification. + optional int32 sparse = 7 [ default = -1 ]; + // Normalize the filler variance by fan_in, fan_out, or their average. + // Applies to 'xavier' and 'msra' fillers. + enum VarianceNorm { + FAN_IN = 0; + FAN_OUT = 1; + AVERAGE = 2; + } + optional VarianceNorm variance_norm = 8 [ default = FAN_IN ]; +} + +message NetParameter { + optional string name = 1; // consider giving the network a name + // DEPRECATED. See InputParameter. The input blobs to the network. + repeated string input = 3; + // DEPRECATED. See InputParameter. The shape of the input blobs. + repeated BlobShape input_shape = 8; + + // 4D input dimensions -- deprecated. Use "input_shape" instead. + // If specified, for each input blob there should be four + // values specifying the num, channels, height and width of the input blob. + // Thus, there should be a total of (4 * #input) numbers. + repeated int32 input_dim = 4; + + // Whether the network will force every layer to carry out backward operation. + // If set False, then whether to carry out backward is determined + // automatically according to the net structure and learning rates. + optional bool force_backward = 5 [ default = false ]; + // The current "state" of the network, including the phase, level, and stage. + // Some layers may be included/excluded depending on this state and the states + // specified in the layers' include and exclude fields. + optional NetState state = 6; + + // Print debugging information about results while running Net::Forward, + // Net::Backward, and Net::Update. + optional bool debug_info = 7 [ default = false ]; + + // The layers that make up the net. Each of their configurations, including + // connectivity and behavior, is specified as a LayerParameter. + repeated LayerParameter layer = 100; // ID 100 so layers are printed last. + + // DEPRECATED: use 'layer' instead. + repeated V1LayerParameter layers = 2; +} + +// NOTE +// Update the next available ID when you add a new SolverParameter field. +// +// SolverParameter next available ID: 42 (last added: layer_wise_reduce) +message SolverParameter { + ////////////////////////////////////////////////////////////////////////////// + // Specifying the train and test networks + // + // Exactly one train net must be specified using one of the following fields: + // train_net_param, train_net, net_param, net + // One or more test nets may be specified using any of the following fields: + // test_net_param, test_net, net_param, net + // If more than one test net field is specified (e.g., both net and + // test_net are specified), they will be evaluated in the field order given + // above: (1) test_net_param, (2) test_net, (3) net_param/net. + // A test_iter must be specified for each test_net. + // A test_level and/or a test_stage may also be specified for each test_net. + ////////////////////////////////////////////////////////////////////////////// + + // Proto filename for the train net, possibly combined with one or more + // test nets. + optional string net = 24; + // Inline train net param, possibly combined with one or more test nets. + optional NetParameter net_param = 25; + + optional string train_net = 1; // Proto filename for the train net. + repeated string test_net = 2; // Proto filenames for the test nets. + optional NetParameter train_net_param = 21; // Inline train net params. + repeated NetParameter test_net_param = 22; // Inline test net params. + + // The states for the train/test nets. Must be unspecified or + // specified once per net. + // + // By default, train_state will have phase = TRAIN, + // and all test_state's will have phase = TEST. + // Other defaults are set according to the NetState defaults. + optional NetState train_state = 26; + repeated NetState test_state = 27; + + // The number of iterations for each test net. + repeated int32 test_iter = 3; + + // The number of iterations between two testing phases. + optional int32 test_interval = 4 [ default = 0 ]; + optional bool test_compute_loss = 19 [ default = false ]; + // If true, run an initial test pass before the first iteration, + // ensuring memory availability and printing the starting value of the loss. + optional bool test_initialization = 32 [ default = true ]; + optional float base_lr = 5; // The base learning rate + // the number of iterations between displaying info. If display = 0, no info + // will be displayed. + optional int32 display = 6; + // Display the loss averaged over the last average_loss iterations + optional int32 average_loss = 33 [ default = 1 ]; + optional int32 max_iter = 7; // the maximum number of iterations + // accumulate gradients over `iter_size` x `batch_size` instances + optional int32 iter_size = 36 [ default = 1 ]; + + // The learning rate decay policy. The currently implemented learning rate + // policies are as follows: + // - fixed: always return base_lr. + // - step: return base_lr * gamma ^ (floor(iter / step)) + // - exp: return base_lr * gamma ^ iter + // - inv: return base_lr * (1 + gamma * iter) ^ (- power) + // - multistep: similar to step but it allows non uniform steps defined by + // stepvalue + // - poly: the effective learning rate follows a polynomial decay, to be + // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) + // - sigmoid: the effective learning rate follows a sigmod decay + // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) + // + // where base_lr, max_iter, gamma, step, stepvalue and power are defined + // in the solver parameter protocol buffer, and iter is the current iteration. + optional string lr_policy = 8; + optional float gamma = 9; // The parameter to compute the learning rate. + optional float power = 10; // The parameter to compute the learning rate. + optional float momentum = 11; // The momentum value. + optional float weight_decay = 12; // The weight decay. + // regularization types supported: L1 and L2 + // controlled by weight_decay + optional string regularization_type = 29 [ default = "L2" ]; + // the stepsize for learning rate policy "step" + optional int32 stepsize = 13; + // the stepsize for learning rate policy "multistep" + repeated int32 stepvalue = 34; + + // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, + // whenever their actual L2 norm is larger. + optional float clip_gradients = 35 [ default = -1 ]; + + optional int32 snapshot = 14 [ default = 0 ]; // The snapshot interval + optional string snapshot_prefix = 15; // The prefix for the snapshot. + // whether to snapshot diff in the results or not. Snapshotting diff will help + // debugging but the final protocol buffer size will be much larger. + optional bool snapshot_diff = 16 [ default = false ]; + enum SnapshotFormat { + HDF5 = 0; + BINARYPROTO = 1; + } + optional SnapshotFormat snapshot_format = 37 [ default = BINARYPROTO ]; + // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. + enum SolverMode { + CPU = 0; + GPU = 1; + } + optional SolverMode solver_mode = 17 [ default = GPU ]; + // the device_id will that be used in GPU mode. Use device_id = 0 in default. + optional int32 device_id = 18 [ default = 0 ]; + // If non-negative, the seed with which the Solver will initialize the Caffe + // random number generator -- useful for reproducible results. Otherwise, + // (and by default) initialize using a seed derived from the system clock. + optional int64 random_seed = 20 [ default = -1 ]; + + // type of the solver + optional string type = 40 [ default = "SGD" ]; + + // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam + optional float delta = 31 [ default = 1e-8 ]; + // parameters for the Adam solver + optional float momentum2 = 39 [ default = 0.999 ]; + + // RMSProp decay value + // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) + optional float rms_decay = 38 [ default = 0.99 ]; + + // If true, print information about the state of the net that may help with + // debugging learning problems. + optional bool debug_info = 23 [ default = false ]; + + // If false, don't save a snapshot after training finishes. + optional bool snapshot_after_train = 28 [ default = true ]; + + // DEPRECATED: old solver enum types, use string instead + enum SolverType { + SGD = 0; + NESTEROV = 1; + ADAGRAD = 2; + RMSPROP = 3; + ADADELTA = 4; + ADAM = 5; + } + // DEPRECATED: use type instead of solver_type + optional SolverType solver_type = 30 [ default = SGD ]; + + // Overlap compute and communication for data parallel training + optional bool layer_wise_reduce = 41 [ default = true ]; +} + +// A message that stores the solver snapshots +message SolverState { + optional int32 iter = 1; // The current iteration + optional string learned_net = 2; // The file that stores the learned net. + repeated BlobProto history = 3; // The history for sgd solvers + optional int32 current_step = 4 + [ default = 0 ]; // The current step for learning rate +} + +enum Phase { + TRAIN = 0; + TEST = 1; +} + +message NetState { + optional Phase phase = 1 [ default = TEST ]; + optional int32 level = 2 [ default = 0 ]; + repeated string stage = 3; +} + +message NetStateRule { + // Set phase to require the NetState have a particular phase (TRAIN or TEST) + // to meet this rule. + optional Phase phase = 1; + + // Set the minimum and/or maximum levels in which the layer should be used. + // Leave undefined to meet the rule regardless of level. + optional int32 min_level = 2; + optional int32 max_level = 3; + + // Customizable sets of stages to include or exclude. + // The net must have ALL of the specified stages and NONE of the specified + // "not_stage"s to meet the rule. + // (Use multiple NetStateRules to specify conjunctions of stages.) + repeated string stage = 4; + repeated string not_stage = 5; +} + +// Specifies training parameters (multipliers on global learning constants, +// and the name and other settings used for weight sharing). +message ParamSpec { + // The names of the parameter blobs -- useful for sharing parameters among + // layers, but never required otherwise. To share a parameter between two + // layers, give it a (non-empty) name. + optional string name = 1; + + // Whether to require shared weights to have the same shape, or just the same + // count -- defaults to STRICT if unspecified. + optional DimCheckMode share_mode = 2; + enum DimCheckMode { + // STRICT (default) requires that num, channels, height, width each match. + STRICT = 0; + // PERMISSIVE requires only the count (num*channels*height*width) to match. + PERMISSIVE = 1; + } + + // The multiplier on the global learning rate for this parameter. + optional float lr_mult = 3 [ default = 1.0 ]; + + // The multiplier on the global weight decay for this parameter. + optional float decay_mult = 4 [ default = 1.0 ]; +} + +// NOTE +// Update the next available ID when you add a new LayerParameter field. +// +// LayerParameter next available layer-specific ID: 147 (last added: +// recurrent_param) +message LayerParameter { + optional string name = 1; // the layer name + optional string type = 2; // the layer type + repeated string bottom = 3; // the name of each bottom blob + repeated string top = 4; // the name of each top blob + + // The train / test phase for computation. + optional Phase phase = 10; + + // The amount of weight to assign each top blob in the objective. + // Each layer assigns a default value, usually of either 0 or 1, + // to each top blob. + repeated float loss_weight = 5; + + // Specifies training parameters (multipliers on global learning constants, + // and the name and other settings used for weight sharing). + repeated ParamSpec param = 6; + + // The blobs containing the numeric parameters of the layer. + repeated BlobProto blobs = 7; + + // Specifies whether to backpropagate to each bottom. If unspecified, + // Caffe will automatically infer whether each input needs backpropagation + // to compute parameter gradients. If set to true for some inputs, + // backpropagation to those inputs is forced; if set false for some inputs, + // backpropagation to those inputs is skipped. + // + // The size must be either 0 or equal to the number of bottoms. + repeated bool propagate_down = 11; + + // Rules controlling whether and when a layer is included in the network, + // based on the current NetState. You may specify a non-zero number of rules + // to include OR exclude, but not both. If no include or exclude rules are + // specified, the layer is always included. If the current NetState meets + // ANY (i.e., one or more) of the specified rules, the layer is + // included/excluded. + repeated NetStateRule include = 8; + repeated NetStateRule exclude = 9; + + // Parameters for data pre-processing. + optional TransformationParameter transform_param = 100; + + // Parameters shared by loss layers. + optional LossParameter loss_param = 101; + + // Layer type-specific parameters. + // + // Note: certain layers may have more than one computational engine + // for their implementation. These layers include an Engine type and + // engine parameter for selecting the implementation. + // The default for the engine is set by the ENGINE switch at compile-time. + optional AccuracyParameter accuracy_param = 102; + optional ArgMaxParameter argmax_param = 103; + optional BatchNormParameter batch_norm_param = 139; + optional BiasParameter bias_param = 141; + optional ConcatParameter concat_param = 104; + optional ContrastiveLossParameter contrastive_loss_param = 105; + optional ConvolutionParameter convolution_param = 106; + optional CropParameter crop_param = 144; + optional DataParameter data_param = 107; + optional DropoutParameter dropout_param = 108; + optional DummyDataParameter dummy_data_param = 109; + optional EltwiseParameter eltwise_param = 110; + optional ELUParameter elu_param = 140; + optional EmbedParameter embed_param = 137; + optional ExpParameter exp_param = 111; + optional FlattenParameter flatten_param = 135; + optional HDF5DataParameter hdf5_data_param = 112; + optional HDF5OutputParameter hdf5_output_param = 113; + optional HingeLossParameter hinge_loss_param = 114; + optional ImageDataParameter image_data_param = 115; + optional InfogainLossParameter infogain_loss_param = 116; + optional InnerProductParameter inner_product_param = 117; + optional InputParameter input_param = 143; + optional LogParameter log_param = 134; + optional LRNParameter lrn_param = 118; + optional MemoryDataParameter memory_data_param = 119; + optional MVNParameter mvn_param = 120; + optional ParameterParameter parameter_param = 145; + optional PoolingParameter pooling_param = 121; + optional PowerParameter power_param = 122; + optional PReLUParameter prelu_param = 131; + optional PythonParameter python_param = 130; + optional RecurrentParameter recurrent_param = 146; + optional ReductionParameter reduction_param = 136; + optional ReLUParameter relu_param = 123; + optional ReshapeParameter reshape_param = 133; + optional ScaleParameter scale_param = 142; + optional SigmoidParameter sigmoid_param = 124; + optional SoftmaxParameter softmax_param = 125; + optional SPPParameter spp_param = 132; + optional SliceParameter slice_param = 126; + optional TanHParameter tanh_param = 127; + optional ThresholdParameter threshold_param = 128; + optional TileParameter tile_param = 138; + optional WindowDataParameter window_data_param = 129; +} + +// Message that stores parameters used to apply transformation +// to the data layer's data +message TransformationParameter { + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 1 [ default = 1 ]; + // Specify if we want to randomly mirror data. + optional bool mirror = 2 [ default = false ]; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 3 [ default = 0 ]; + // mean_file and mean_value cannot be specified at the same time + optional string mean_file = 4; + // if specified can be repeated once (would subtract it from all the channels) + // or can be repeated the same number of times as channels + // (would subtract them from the corresponding channel) + repeated float mean_value = 5; + // Force the decoded image to have 3 color channels. + optional bool force_color = 6 [ default = false ]; + // Force the decoded image to have 1 color channels. + optional bool force_gray = 7 [ default = false ]; +} + +// Message that stores parameters shared by loss layers +message LossParameter { + // If specified, ignore instances with the given label. + optional int32 ignore_label = 1; + // How to normalize the loss for loss layers that aggregate across batches, + // spatial dimensions, or other dimensions. Currently only implemented in + // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. + enum NormalizationMode { + // Divide by the number of examples in the batch times spatial dimensions. + // Outputs that receive the ignore label will NOT be ignored in computing + // the normalization factor. + FULL = 0; + // Divide by the total number of output locations that do not take the + // ignore_label. If ignore_label is not set, this behaves like FULL. + VALID = 1; + // Divide by the batch size. + BATCH_SIZE = 2; + // Do not normalize the loss. + NONE = 3; + } + // For historical reasons, the default normalization for + // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. + optional NormalizationMode normalization = 3 [ default = VALID ]; + // Deprecated. Ignored if normalization is specified. If normalization + // is not specified, then setting this to false will be equivalent to + // normalization = BATCH_SIZE to be consistent with previous behavior. + optional bool normalize = 2; +} + +// Messages that store parameters used by individual layer types follow, in +// alphabetical order. + +message AccuracyParameter { + // When computing accuracy, count as correct by comparing the true label to + // the top k scoring classes. By default, only compare to the top scoring + // class (i.e. argmax). + optional uint32 top_k = 1 [ default = 1 ]; + + // The "label" axis of the prediction blob, whose argmax corresponds to the + // predicted label -- may be negative to index from the end (e.g., -1 for the + // last axis). For example, if axis == 1 and the predictions are + // (N x C x H x W), the label blob is expected to contain N*H*W ground truth + // labels with integer values in {0, 1, ..., C-1}. + optional int32 axis = 2 [ default = 1 ]; + + // If specified, ignore instances with the given label. + optional int32 ignore_label = 3; +} + +message ArgMaxParameter { + // If true produce pairs (argmax, maxval) + optional bool out_max_val = 1 [ default = false ]; + optional uint32 top_k = 2 [ default = 1 ]; + // The axis along which to maximise -- may be negative to index from the + // end (e.g., -1 for the last axis). + // By default ArgMaxLayer maximizes over the flattened trailing dimensions + // for each index of the first / num dimension. + optional int32 axis = 3; +} + +message ConcatParameter { + // The axis along which to concatenate -- may be negative to index from the + // end (e.g., -1 for the last axis). Other axes must have the + // same dimension for all the bottom blobs. + // By default, ConcatLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 2 [ default = 1 ]; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 concat_dim = 1 [ default = 1 ]; +} + +message BatchNormParameter { + // If false, normalization is performed over the current mini-batch + // and global statistics are accumulated (but not yet used) by a moving + // average. + // If true, those accumulated mean and variance values are used for the + // normalization. + // By default, it is set to false when the network is in the training + // phase and true when the network is in the testing phase. + optional bool use_global_stats = 1; + // What fraction of the moving average remains each iteration? + // Smaller values make the moving average decay faster, giving more + // weight to the recent values. + // Each iteration updates the moving average @f$S_{t-1}@f$ with the + // current mean @f$ Y_t @f$ by + // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ + // is the moving_average_fraction parameter. + optional float moving_average_fraction = 2 [ default = .999 ]; + // Small value to add to the variance estimate so that we don't divide by + // zero. + optional float eps = 3 [ default = 1e-5 ]; +} + +message BiasParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar bias. + optional int32 axis = 1 [ default = 1 ]; + + // (num_axes is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the bias + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to add a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [ default = 1 ]; + + // (filler is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer.) + // The initialization for the learned bias parameter. + // Default is the zero (0) initialization, resulting in the BiasLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; +} + +message ContrastiveLossParameter { + // margin for dissimilar pair + optional float margin = 1 [ default = 1.0 ]; + // The first implementation of this cost did not exactly match the cost of + // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. + // legacy_version = false (the default) uses (margin - d)^2 as proposed in the + // Hadsell paper. New models should probably use this version. + // legacy_version = true uses (margin - d^2). This is kept to support / + // reproduce existing models and results + optional bool legacy_version = 2 [ default = false ]; +} + +message ConvolutionParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [ default = true ]; // whether to have bias terms + + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in all spatial dimensions, or once per spatial dimension. + repeated uint32 pad = 3; // The padding size; defaults to 0 + repeated uint32 kernel_size = 4; // The kernel size + repeated uint32 stride = 6; // The stride; defaults to 1 + // Factor used to dilate the kernel, (implicitly) zero-filling the resulting + // holes. (Kernel dilation is sometimes referred to by its use in the + // algorithme à trous from Holschneider et al. 1987.) + repeated uint32 dilation = 18; // The dilation; defaults to 1 + + // For 2D convolution only, the *_h and *_w versions may also be used to + // specify both spatial dimensions. + optional uint32 pad_h = 9 [ default = 0 ]; // The padding height (2D only) + optional uint32 pad_w = 10 [ default = 0 ]; // The padding width (2D only) + optional uint32 kernel_h = 11; // The kernel height (2D only) + optional uint32 kernel_w = 12; // The kernel width (2D only) + optional uint32 stride_h = 13; // The stride height (2D only) + optional uint32 stride_w = 14; // The stride width (2D only) + + optional uint32 group = 5 [ default = 1 ]; // The group size for group conv + + optional FillerParameter weight_filler = 7; // The filler for the weight + optional FillerParameter bias_filler = 8; // The filler for the bias + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 15 [ default = DEFAULT ]; + + // The axis to interpret as "channels" when performing convolution. + // Preceding dimensions are treated as independent inputs; + // succeeding dimensions are treated as "spatial". + // With (N, C, H, W) inputs, and axis == 1 (the default), we perform + // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for + // groups g>1) filters across the spatial axes (H, W) of the input. + // With (N, C, D, H, W) inputs, and axis == 1, we perform + // N independent 3D convolutions, sliding (C/g)-channels + // filters across the spatial axes (D, H, W) of the input. + optional int32 axis = 16 [ default = 1 ]; + + // Whether to force use of the general ND convolution, even if a specific + // implementation for blobs of the appropriate number of spatial dimensions + // is available. (Currently, there is only a 2D-specific convolution + // implementation; for input blobs with num_axes != 2, this option is + // ignored and the ND implementation will be used.) + optional bool force_nd_im2col = 17 [ default = false ]; +} + +message CropParameter { + // To crop, elements of the first bottom are selected to fit the dimensions + // of the second, reference bottom. The crop is configured by + // - the crop `axis` to pick the dimensions for cropping + // - the crop `offset` to set the shift for all/each dimension + // to align the cropped bottom with the reference bottom. + // All dimensions up to but excluding `axis` are preserved, while + // the dimensions including and trailing `axis` are cropped. + // If only one `offset` is set, then all dimensions are offset by this amount. + // Otherwise, the number of offsets must equal the number of cropped axes to + // shift the crop in each dimension accordingly. + // Note: standard dimensions are N,C,H,W so the default is a spatial crop, + // and `axis` may be negative to index from the end (e.g., -1 for the last + // axis). + optional int32 axis = 1 [ default = 2 ]; + repeated uint32 offset = 2; +} + +message DataParameter { + enum DB { + LEVELDB = 0; + LMDB = 1; + } + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 4; + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + // DEPRECATED. Each solver accesses a different subset of the database. + optional uint32 rand_skip = 7 [ default = 0 ]; + optional DB backend = 8 [ default = LEVELDB ]; + // DEPRECATED. See TransformationParameter. For data pre-processing, we can do + // simple scaling and subtracting the data mean, if provided. Note that the + // mean subtraction is always carried out before scaling. + optional float scale = 2 [ default = 1 ]; + optional string mean_file = 3; + // DEPRECATED. See TransformationParameter. Specify if we would like to + // randomly + // crop an image. + optional uint32 crop_size = 5 [ default = 0 ]; + // DEPRECATED. See TransformationParameter. Specify if we want to randomly + // mirror + // data. + optional bool mirror = 6 [ default = false ]; + // Force the encoded image to have 3 color channels + optional bool force_encoded_color = 9 [ default = false ]; + // Prefetch queue (Increase if data feeding bandwidth varies, within the + // limit of device memory for GPU training) + optional uint32 prefetch = 10 [ default = 4 ]; +} + +message DropoutParameter { + optional float dropout_ratio = 1 [ default = 0.5 ]; // dropout ratio +} + +// DummyDataLayer fills any number of arbitrarily shaped blobs with random +// (or constant) data generated by "Fillers" (see "message FillerParameter"). +message DummyDataParameter { + // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or + // N + // shape fields, and 0, 1 or N data_fillers. + // + // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. + // If 1 data_filler is specified, it is applied to all top blobs. If N are + // specified, the ith is applied to the ith top blob. + repeated FillerParameter data_filler = 1; + repeated BlobShape shape = 6; + + // 4D dimensions -- deprecated. Use "shape" instead. + repeated uint32 num = 2; + repeated uint32 channels = 3; + repeated uint32 height = 4; + repeated uint32 width = 5; +} + +message EltwiseParameter { + enum EltwiseOp { + PROD = 0; + SUM = 1; + MAX = 2; + } + optional EltwiseOp operation = 1 [ default = SUM ]; // element-wise operation + repeated float coeff = 2; // blob-wise coefficient for SUM operation + + // Whether to use an asymptotically slower (for >2 inputs) but stabler method + // of computing the gradient for the PROD operation. (No effect for SUM op.) + optional bool stable_prod_grad = 3 [ default = true ]; +} + +// Message that stores parameters used by ELULayer +message ELUParameter { + // Described in: + // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate + // Deep Network Learning by Exponential Linear Units (ELUs). arXiv + optional float alpha = 1 [ default = 1 ]; +} + +// Message that stores parameters used by EmbedLayer +message EmbedParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + // The input is given as integers to be interpreted as one-hot + // vector indices with dimension num_input. Hence num_input should be + // 1 greater than the maximum possible input value. + optional uint32 input_dim = 2; + + optional bool bias_term = 3 [ default = true ]; // Whether to use a bias term + optional FillerParameter weight_filler = 4; // The filler for the weight + optional FillerParameter bias_filler = 5; // The filler for the bias +} + +// Message that stores parameters used by ExpLayer +message ExpParameter { + // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. + // Or if base is set to the default (-1), base is set to e, + // so y = exp(shift + scale * x). + optional float base = 1 [ default = -1.0 ]; + optional float scale = 2 [ default = 1.0 ]; + optional float shift = 3 [ default = 0.0 ]; +} + +/// Message that stores parameters used by FlattenLayer +message FlattenParameter { + // The first axis to flatten: all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 1 [ default = 1 ]; + + // The last axis to flatten: all following axes are retained in the output. + // May be negative to index from the end (e.g., the default -1 for the last + // axis). + optional int32 end_axis = 2 [ default = -1 ]; +} + +// Message that stores parameters used by HDF5DataLayer +message HDF5DataParameter { + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 2; + + // Specify whether to shuffle the data. + // If shuffle == true, the ordering of the HDF5 files is shuffled, + // and the ordering of data within any given HDF5 file is shuffled, + // but data between different files are not interleaved; all of a file's + // data are output (in a random order) before moving onto another file. + optional bool shuffle = 3 [ default = false ]; +} + +message HDF5OutputParameter { optional string file_name = 1; } + +message HingeLossParameter { + enum Norm { + L1 = 1; + L2 = 2; + } + // Specify the Norm to use L1 or L2 + optional Norm norm = 1 [ default = L1 ]; +} + +message ImageDataParameter { + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 4 [ default = 1 ]; + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + optional uint32 rand_skip = 7 [ default = 0 ]; + // Whether or not ImageLayer should shuffle the list of files at every epoch. + optional bool shuffle = 8 [ default = false ]; + // It will also resize images if new_height or new_width are not zero. + optional uint32 new_height = 9 [ default = 0 ]; + optional uint32 new_width = 10 [ default = 0 ]; + // Specify if the images are color or gray + optional bool is_color = 11 [ default = true ]; + // DEPRECATED. See TransformationParameter. For data pre-processing, we can do + // simple scaling and subtracting the data mean, if provided. Note that the + // mean subtraction is always carried out before scaling. + optional float scale = 2 [ default = 1 ]; + optional string mean_file = 3; + // DEPRECATED. See TransformationParameter. Specify if we would like to + // randomly + // crop an image. + optional uint32 crop_size = 5 [ default = 0 ]; + // DEPRECATED. See TransformationParameter. Specify if we want to randomly + // mirror + // data. + optional bool mirror = 6 [ default = false ]; + optional string root_folder = 12 [ default = "" ]; +} + +message InfogainLossParameter { + // Specify the infogain matrix source. + optional string source = 1; + optional int32 axis = 2 [ default = 1 ]; // axis of prob +} + +message InnerProductParameter { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [ default = true ]; // whether to have bias terms + optional FillerParameter weight_filler = 3; // The filler for the weight + optional FillerParameter bias_filler = 4; // The filler for the bias + + // The first axis to be lumped into a single inner product computation; + // all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 5 [ default = 1 ]; + // Specify whether to transpose the weight matrix or not. + // If transpose == true, any operations will be performed on the transpose + // of the weight matrix. The weight matrix itself is not going to be + // transposed + // but rather the transfer flag of operations will be toggled accordingly. + optional bool transpose = 6 [ default = false ]; +} + +message InputParameter { + // This layer produces N >= 1 top blob(s) to be assigned manually. + // Define N shapes to set a shape for each top. + // Define 1 shape to set the same shape for every top. + // Define no shape to defer to reshaping manually. + repeated BlobShape shape = 1; +} + +// Message that stores parameters used by LogLayer +message LogParameter { + // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. + // Or if base is set to the default (-1), base is set to e, + // so y = ln(shift + scale * x) = log_e(shift + scale * x) + optional float base = 1 [ default = -1.0 ]; + optional float scale = 2 [ default = 1.0 ]; + optional float shift = 3 [ default = 0.0 ]; +} + +// Message that stores parameters used by LRNLayer +message LRNParameter { + optional uint32 local_size = 1 [ default = 5 ]; + optional float alpha = 2 [ default = 1. ]; + optional float beta = 3 [ default = 0.75 ]; + enum NormRegion { + ACROSS_CHANNELS = 0; + WITHIN_CHANNEL = 1; + } + optional NormRegion norm_region = 4 [ default = ACROSS_CHANNELS ]; + optional float k = 5 [ default = 1. ]; + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [ default = DEFAULT ]; +} + +message MemoryDataParameter { + optional uint32 batch_size = 1; + optional uint32 channels = 2; + optional uint32 height = 3; + optional uint32 width = 4; +} + +message MVNParameter { + // This parameter can be set to false to normalize mean only + optional bool normalize_variance = 1 [ default = true ]; + + // This parameter can be set to true to perform DNN-like MVN + optional bool across_channels = 2 [ default = false ]; + + // Epsilon for not dividing by zero while normalizing variance + optional float eps = 3 [ default = 1e-9 ]; +} + +message ParameterParameter { optional BlobShape shape = 1; } + +message PoolingParameter { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional PoolMethod pool = 1 [ default = MAX ]; // The pooling method + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in height and width or as Y, X pairs. + optional uint32 pad = 4 [ default = 0 ]; // The padding size (equal in Y, X) + optional uint32 pad_h = 9 [ default = 0 ]; // The padding height + optional uint32 pad_w = 10 [ default = 0 ]; // The padding width + optional uint32 kernel_size = 2; // The kernel size (square) + optional uint32 kernel_h = 5; // The kernel height + optional uint32 kernel_w = 6; // The kernel width + optional uint32 stride = 3 [ default = 1 ]; // The stride (equal in Y, X) + optional uint32 stride_h = 7; // The stride height + optional uint32 stride_w = 8; // The stride width + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 11 [ default = DEFAULT ]; + // If global_pooling then it will pool over the size of the bottom by doing + // kernel_h = bottom->height and kernel_w = bottom->width + optional bool global_pooling = 12 [ default = false ]; +} + +message PowerParameter { + // PowerLayer computes outputs y = (shift + scale * x) ^ power. + optional float power = 1 [ default = 1.0 ]; + optional float scale = 2 [ default = 1.0 ]; + optional float shift = 3 [ default = 0.0 ]; +} + +message PythonParameter { + optional string module = 1; + optional string layer = 2; + // This value is set to the attribute `param_str` of the `PythonLayer` object + // in Python before calling the `setup()` method. This could be a number, + // string, dictionary in Python dict format, JSON, etc. You may parse this + // string in `setup` method and use it in `forward` and `backward`. + optional string param_str = 3 [ default = '']; + // DEPRECATED + optional bool share_in_parallel = 4 [ default = false ]; +} + +// Message that stores parameters used by RecurrentLayer +message RecurrentParameter { + // The dimension of the output (and usually hidden state) representation -- + // must be explicitly set to non-zero. + optional uint32 num_output = 1 [ default = 0 ]; + + optional FillerParameter weight_filler = 2; // The filler for the weight + optional FillerParameter bias_filler = 3; // The filler for the bias + + // Whether to enable displaying debug_info in the unrolled recurrent net. + optional bool debug_info = 4 [ default = false ]; + + // Whether to add as additional inputs (bottoms) the initial hidden state + // blobs, and add as additional outputs (tops) the final timestep hidden state + // blobs. The number of additional bottom/top blobs required depends on the + // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. + optional bool expose_hidden = 5 [ default = false ]; +} + +// Message that stores parameters used by ReductionLayer +message ReductionParameter { + enum ReductionOp { + SUM = 1; + ASUM = 2; + SUMSQ = 3; + MEAN = 4; + } + + optional ReductionOp operation = 1 [ default = SUM ]; // reduction operation + + // The first axis to reduce to a scalar -- may be negative to index from the + // end (e.g., -1 for the last axis). + // (Currently, only reduction along ALL "tail" axes is supported; reduction + // of axis M through N, where N < num_axes - 1, is unsupported.) + // Suppose we have an n-axis bottom Blob with shape: + // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). + // If axis == m, the output Blob will have shape + // (d0, d1, d2, ..., d(m-1)), + // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) + // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. + // If axis == 0 (the default), the output Blob always has the empty shape + // (count 1), performing reduction across the entire input -- + // often useful for creating new loss functions. + optional int32 axis = 2 [ default = 0 ]; + + optional float coeff = 3 [ default = 1.0 ]; // coefficient for output +} + +// Message that stores parameters used by ReLULayer +message ReLUParameter { + // Allow non-zero slope for negative inputs to speed up optimization + // Described in: + // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities + // improve neural network acoustic models. In ICML Workshop on Deep Learning + // for Audio, Speech, and Language Processing. + optional float negative_slope = 1 [ default = 0 ]; + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 2 [ default = DEFAULT ]; +} + +message ReshapeParameter { + // Specify the output dimensions. If some of the dimensions are set to 0, + // the corresponding dimension from the bottom layer is used (unchanged). + // Exactly one dimension may be set to -1, in which case its value is + // inferred from the count of the bottom blob and the remaining dimensions. + // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: + // + // layer { + // type: "Reshape" bottom: "input" top: "output" + // reshape_param { ... } + // } + // + // If "input" is 2D with shape 2 x 8, then the following reshape_param + // specifications are all equivalent, producing a 3D blob "output" with shape + // 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } + // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } + // + optional BlobShape shape = 1; + + // axis and num_axes control the portion of the bottom blob's shape that are + // replaced by (included in) the reshape. By default (axis == 0 and + // num_axes == -1), the entire bottom blob shape is included in the reshape, + // and hence the shape field must specify the entire output shape. + // + // axis may be non-zero to retain some portion of the beginning of the input + // shape (and may be negative to index from the end; e.g., -1 to begin the + // reshape after the last axis, including nothing in the reshape, + // -2 to include only the last axis, etc.). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are all equivalent, + // producing a blob "output" with shape 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } + // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } + // + // num_axes specifies the extent of the reshape. + // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on + // input axes in the range [axis, axis+num_axes]. + // num_axes may also be -1, the default, to include all remaining axes + // (starting from axis). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are equivalent, + // producing a blob "output" with shape 1 x 2 x 8. + // + // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } + // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } + // reshape_param { shape { dim: 1 } num_axes: 0 } + // + // On the other hand, these would produce output blob shape 2 x 1 x 8: + // + // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } + // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } + // + optional int32 axis = 2 [ default = 0 ]; + optional int32 num_axes = 3 [ default = -1 ]; +} + +message ScaleParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar multiplier. + optional int32 axis = 1 [ default = 1 ]; + + // (num_axes is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the scale + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [ default = 1 ]; + + // (filler is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer.) + // The initialization for the learned scale parameter. + // Default is the unit (1) initialization, resulting in the ScaleLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; + + // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but + // may be more efficient). Initialized with bias_filler (defaults to 0). + optional bool bias_term = 4 [ default = false ]; + optional FillerParameter bias_filler = 5; +} + +message SigmoidParameter { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [ default = DEFAULT ]; +} + +message SliceParameter { + // The axis along which to slice -- may be negative to index from the end + // (e.g., -1 for the last axis). + // By default, SliceLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 3 [ default = 1 ]; + repeated uint32 slice_point = 2; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 slice_dim = 1 [ default = 1 ]; +} + +// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer +message SoftmaxParameter { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [ default = DEFAULT ]; + + // The axis along which to perform the softmax -- may be negative to index + // from the end (e.g., -1 for the last axis). + // Any other axes will be evaluated as independent softmaxes. + optional int32 axis = 2 [ default = 1 ]; +} + +message TanHParameter { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [ default = DEFAULT ]; +} + +// Message that stores parameters used by TileLayer +message TileParameter { + // The index of the axis to tile. + optional int32 axis = 1 [ default = 1 ]; + + // The number of copies (tiles) of the blob to output. + optional int32 tiles = 2; +} + +// Message that stores parameters used by ThresholdLayer +message ThresholdParameter { + optional float threshold = 1 [ default = 0 ]; // Strictly positive values +} + +message WindowDataParameter { + // Specify the data source. + optional string source = 1; + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 2 [ default = 1 ]; + optional string mean_file = 3; + // Specify the batch size. + optional uint32 batch_size = 4; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 5 [ default = 0 ]; + // Specify if we want to randomly mirror data. + optional bool mirror = 6 [ default = false ]; + // Foreground (object) overlap threshold + optional float fg_threshold = 7 [ default = 0.5 ]; + // Background (non-object) overlap threshold + optional float bg_threshold = 8 [ default = 0.5 ]; + // Fraction of batch that should be foreground objects + optional float fg_fraction = 9 [ default = 0.25 ]; + // Amount of contextual padding to add around a window + // (used only by the window_data_layer) + optional uint32 context_pad = 10 [ default = 0 ]; + // Mode for cropping out a detection window + // warp: cropped window is warped to a fixed size and aspect ratio + // square: the tightest square around the window is cropped + optional string crop_mode = 11 [ default = "warp" ]; + // cache_images: will load all images in memory for faster access + optional bool cache_images = 12 [ default = false ]; + // append root_folder to locate images + optional string root_folder = 13 [ default = "" ]; +} + +message SPPParameter { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional uint32 pyramid_height = 1; + optional PoolMethod pool = 2 [ default = MAX ]; // The pooling method + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [ default = DEFAULT ]; +} + +// DEPRECATED: use LayerParameter. +message V1LayerParameter { + repeated string bottom = 2; + repeated string top = 3; + optional string name = 4; + repeated NetStateRule include = 32; + repeated NetStateRule exclude = 33; + enum LayerType { + NONE = 0; + ABSVAL = 35; + ACCURACY = 1; + ARGMAX = 30; + BNLL = 2; + CONCAT = 3; + CONTRASTIVE_LOSS = 37; + CONVOLUTION = 4; + DATA = 5; + DECONVOLUTION = 39; + DROPOUT = 6; + DUMMY_DATA = 32; + EUCLIDEAN_LOSS = 7; + ELTWISE = 25; + EXP = 38; + FLATTEN = 8; + HDF5_DATA = 9; + HDF5_OUTPUT = 10; + HINGE_LOSS = 28; + IM2COL = 11; + IMAGE_DATA = 12; + INFOGAIN_LOSS = 13; + INNER_PRODUCT = 14; + LRN = 15; + MEMORY_DATA = 29; + MULTINOMIAL_LOGISTIC_LOSS = 16; + MVN = 34; + POOLING = 17; + POWER = 26; + RELU = 18; + SIGMOID = 19; + SIGMOID_CROSS_ENTROPY_LOSS = 27; + SILENCE = 36; + SOFTMAX = 20; + SOFTMAX_LOSS = 21; + SPLIT = 22; + SLICE = 33; + TANH = 23; + WINDOW_DATA = 24; + THRESHOLD = 31; + } + optional LayerType type = 5; + repeated BlobProto blobs = 6; + repeated string param = 1001; + repeated DimCheckMode blob_share_mode = 1002; + enum DimCheckMode { + STRICT = 0; + PERMISSIVE = 1; + } + repeated float blobs_lr = 7; + repeated float weight_decay = 8; + repeated float loss_weight = 35; + optional AccuracyParameter accuracy_param = 27; + optional ArgMaxParameter argmax_param = 23; + optional ConcatParameter concat_param = 9; + optional ContrastiveLossParameter contrastive_loss_param = 40; + optional ConvolutionParameter convolution_param = 10; + optional DataParameter data_param = 11; + optional DropoutParameter dropout_param = 12; + optional DummyDataParameter dummy_data_param = 26; + optional EltwiseParameter eltwise_param = 24; + optional ExpParameter exp_param = 41; + optional HDF5DataParameter hdf5_data_param = 13; + optional HDF5OutputParameter hdf5_output_param = 14; + optional HingeLossParameter hinge_loss_param = 29; + optional ImageDataParameter image_data_param = 15; + optional InfogainLossParameter infogain_loss_param = 16; + optional InnerProductParameter inner_product_param = 17; + optional LRNParameter lrn_param = 18; + optional MemoryDataParameter memory_data_param = 22; + optional MVNParameter mvn_param = 34; + optional PoolingParameter pooling_param = 19; + optional PowerParameter power_param = 21; + optional ReLUParameter relu_param = 30; + optional SigmoidParameter sigmoid_param = 38; + optional SoftmaxParameter softmax_param = 39; + optional SliceParameter slice_param = 31; + optional TanHParameter tanh_param = 37; + optional ThresholdParameter threshold_param = 25; + optional WindowDataParameter window_data_param = 20; + optional TransformationParameter transform_param = 36; + optional LossParameter loss_param = 42; + optional V0LayerParameter layer = 1; +} + +// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters +// in Caffe. We keep this message type around for legacy support. +message V0LayerParameter { + optional string name = 1; // the layer name + optional string type = 2; // the string to specify the layer type + + // Parameters to specify layers with inner products. + optional uint32 num_output = 3; // The number of outputs for the layer + optional bool biasterm = 4 [ default = true ]; // whether to have bias terms + optional FillerParameter weight_filler = 5; // The filler for the weight + optional FillerParameter bias_filler = 6; // The filler for the bias + + optional uint32 pad = 7 [ default = 0 ]; // The padding size + optional uint32 kernelsize = 8; // The kernel size + optional uint32 group = 9 [ default = 1 ]; // The group size for group conv + optional uint32 stride = 10 [ default = 1 ]; // The stride + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional PoolMethod pool = 11 [ default = MAX ]; // The pooling method + optional float dropout_ratio = 12 [ default = 0.5 ]; // dropout ratio + + optional uint32 local_size = 13 [ default = 5 ]; // for local response norm + optional float alpha = 14 [ default = 1. ]; // for local response norm + optional float beta = 15 [ default = 0.75 ]; // for local response norm + optional float k = 22 [ default = 1. ]; + + // For data layers, specify the data source + optional string source = 16; + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 17 [ default = 1 ]; + optional string meanfile = 18; + // For data layers, specify the batch size. + optional uint32 batchsize = 19; + // For data layers, specify if we would like to randomly crop an image. + optional uint32 cropsize = 20 [ default = 0 ]; + // For data layers, specify if we want to randomly mirror data. + optional bool mirror = 21 [ default = false ]; + + // The blobs containing the numeric parameters of the layer + repeated BlobProto blobs = 50; + // The ratio that is multiplied on the global learning rate. If you want to + // set the learning ratio for one blob, you need to set it for all blobs. + repeated float blobs_lr = 51; + // The weight decay that is multiplied on the global weight decay. + repeated float weight_decay = 52; + + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + optional uint32 rand_skip = 53 [ default = 0 ]; + + // Fields related to detection (det_*) + // foreground (object) overlap threshold + optional float det_fg_threshold = 54 [ default = 0.5 ]; + // background (non-object) overlap threshold + optional float det_bg_threshold = 55 [ default = 0.5 ]; + // Fraction of batch that should be foreground objects + optional float det_fg_fraction = 56 [ default = 0.25 ]; + + // optional bool OBSOLETE_can_clobber = 57 [default = true]; + + // Amount of contextual padding to add around a window + // (used only by the window_data_layer) + optional uint32 det_context_pad = 58 [ default = 0 ]; + + // Mode for cropping out a detection window + // warp: cropped window is warped to a fixed size and aspect ratio + // square: the tightest square around the window is cropped + optional string det_crop_mode = 59 [ default = "warp" ]; + + // For ReshapeLayer, one needs to specify the new dimensions. + optional int32 new_num = 60 [ default = 0 ]; + optional int32 new_channels = 61 [ default = 0 ]; + optional int32 new_height = 62 [ default = 0 ]; + optional int32 new_width = 63 [ default = 0 ]; + + // Whether or not ImageLayer should shuffle the list of files at every epoch. + // It will also resize images if new_height or new_width are not zero. + optional bool shuffle_images = 64 [ default = false ]; + + // For ConcatLayer, one needs to specify the dimension for concatenation, and + // the other dimensions must be the same for all the bottom blobs. + // By default it will concatenate blobs along the channels dimension. + optional uint32 concat_dim = 65 [ default = 1 ]; + + optional HDF5OutputParameter hdf5_output_param = 1001; +} + +message PReLUParameter { + // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: + // Surpassing Human-Level Performance on ImageNet Classification, 2015. + + // Initial value of a_i. Default is a_i=0.25 for all i. + optional FillerParameter filler = 1; + // Whether or not slope parameters are shared across channels. + optional bool channel_shared = 2 [ default = false ]; +} diff --git a/fluid/image_classification/caffe2fluid/proto/compile.sh b/fluid/image_classification/caffe2fluid/proto/compile.sh new file mode 100644 index 0000000000000000000000000000000000000000..f621e0066d11595bc48362ad7411eeab57f035dd --- /dev/null +++ b/fluid/image_classification/caffe2fluid/proto/compile.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +#function: +# script used to generate caffepb.py from caffe.proto using protoc +# + +PROTOC=`which protoc` +if [[ -z $PROTOC ]];then + echo "not found protoc, you should first install it following this[https://github.com/google/protobuf/releases]" + exit 1 +fi + +WORK_ROOT=$(dirname `readlink -f "$BASH_SOURCE[0]"`) +PY_NAME="$WORK_ROOT/caffepb.py" +$PROTOC --proto_path=$WORK_ROOT --python_out=$WORK_ROOT $WORK_ROOT/caffe.proto +ret=$? + +if [ $ret -eq 0 ];then + mv $WORK_ROOT/caffe_pb2.py $PY_NAME +fi + +if [ -e "$PY_NAME" ];then + echo "succeed to generate [$PY_NAME]" + exit 0 +else + echo "failed to generate [$PY_NAME]" +fi +exit $ret diff --git a/fluid/image_classification/caffe2fluid/tests/lenet/README.md b/fluid/image_classification/caffe2fluid/tests/lenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..982edc2aa67f43f849bb2523b1a15edaa02f5d28 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/tests/lenet/README.md @@ -0,0 +1,28 @@ +### Convert lenet model from caffe format into paddle format(fluid api) + +### Howto +1, Prepare your caffepb.py + +2, Download a lenet caffe-model + lenet_iter_10000.caffemodel + download address: https://github.com/ethereon/caffe-tensorflow/raw/master/examples/mnist/lenet_iter_10000.caffemodel + md5: cbec75c1c374b6c1981c4a1eb024ae01 + + lenet.prototxt + download address: https://raw.githubusercontent.com/BVLC/caffe/master/examples/mnist/lenet.prototxt + md5: 27384af843338ab90b00c8d1c81de7d5 + + +2, Convert this model(make sure caffepb.py is ready in ../../proto) + convert to npy format + bash ./convert.sh lenet.prototxt lenet.caffemodel lenet.py lenet.npy + + save to fluid format(optional) + bash ./convert.sh lenet.prototxt lenet.caffemodel lenet.py lenet.npy && python ./lenet.py ./lenet.npy ./fluid.model + +4, Use this new model(paddle installed in this python) + use fluid format + python ./predict.py ./fluid.model + + use npy format + python ./predict.py ./lenet.npy diff --git a/fluid/image_classification/caffe2fluid/tests/lenet/convert.sh b/fluid/image_classification/caffe2fluid/tests/lenet/convert.sh new file mode 100755 index 0000000000000000000000000000000000000000..b3ec1a1dce2434a4466cf5d4609de1b4aec9d346 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/tests/lenet/convert.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +#function: +# convert a caffe model +# eg: +# bash ./convert.sh ./model.caffe/lenet.prototxt ./model.caffe/lenet.caffemodel lenet.py lenet.npy + +if [[ $# -ne 4 ]];then + echo "usage:" + echo " bash $0 [PROTOTXT] [CAFFEMODEL] [PY_NAME] [WEIGHT_NAME]" + echo " eg: bash $0 lenet.prototxt lenet.caffemodel lenet.py lenet.npy" + exit 1 +fi + +WORK_ROOT=$(dirname `readlink -f ${BASH_SOURCE[0]}`) +if [[ -z $PYTHON ]];then + PYTHON=`which python` +fi + +PROTOTXT=$1 +CAFFEMODEL=$2 +PY_NAME=$3 +WEIGHT_NAME=$4 +CONVERTER_PY="$WORK_ROOT/../../convert.py" + +$PYTHON $CONVERTER_PY $PROTOTXT --caffemodel $CAFFEMODEL --code-output-path=$PY_NAME --data-output-path=$WEIGHT_NAME +ret=$? +if [[ $ret -eq 0 ]];then + echo "succeed to convert caffe model[$CAFFEMODEL, $PROTOTXT] to paddle model[$PY_NAME, $WEIGHT_NAME]" +else + echo "failed to convert caffe model[$CAFFEMODEL, $PROTOTXT]" +fi +exit $ret diff --git a/fluid/image_classification/caffe2fluid/tests/lenet/lenet.npy b/fluid/image_classification/caffe2fluid/tests/lenet/lenet.npy new file mode 100644 index 0000000000000000000000000000000000000000..66f773e5ffd54c8f5151b920aecdf3dd4f8c91d2 Binary files /dev/null and b/fluid/image_classification/caffe2fluid/tests/lenet/lenet.npy differ diff --git a/fluid/image_classification/caffe2fluid/tests/lenet/lenet.py b/fluid/image_classification/caffe2fluid/tests/lenet/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..50e6927483a61c574f1152c6dc438a6b2c8a4d90 --- /dev/null +++ b/fluid/image_classification/caffe2fluid/tests/lenet/lenet.py @@ -0,0 +1,297 @@ +### generated by caffe2fluid, your net is in class "LeNet" ### + +import math +import os +import numpy as np + + +def import_fluid(): + import paddle.v2.fluid as fluid + return fluid + + +def layer(op): + '''Decorator for composable network layers.''' + + def layer_decorated(self, *args, **kwargs): + # Automatically set a name if not provided. + name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) + # Figure out the layer inputs. + if len(self.terminals) == 0: + raise RuntimeError('No input variables found for layer %s.' % name) + elif len(self.terminals) == 1: + layer_input = self.terminals[0] + else: + layer_input = list(self.terminals) + # Perform the operation and get the output. + layer_output = op(self, layer_input, *args, **kwargs) + # Add to layer LUT. + self.layers[name] = layer_output + # This output is now the input for the next layer. + self.feed(layer_output) + # Return self for chained calls. + return self + + return layer_decorated + + +class Network(object): + def __init__(self, inputs, trainable=True): + # The input nodes for this network + self.inputs = inputs + # The current list of terminal nodes + self.terminals = [] + # Mapping from layer names to layers + self.layers = dict(inputs) + # If true, the resulting variables are set as trainable + self.trainable = trainable + # Switch variable for dropout + self.paddle_env = None + self.setup() + + def setup(self): + '''Construct the network. ''' + raise NotImplementedError('Must be implemented by the subclass.') + + def load(self, data_path, exe=None, place=None, ignore_missing=False): + '''Load network weights. + data_path: The path to the numpy-serialized network weights + ignore_missing: If true, serialized weights for missing layers are ignored. + ''' + fluid = import_fluid() + #load fluid mode directly + if os.path.isdir(data_path): + assert (exe is not None), \ + 'must provide a executor to load fluid model' + fluid.io.load_persistables_if_exist(executor=exe, dirname=data_path) + return True + + #load model from a npy file + if exe is None or place is None: + if self.paddle_env is None: + place = fluid.CPUPlace() + exe = fluid.Executor(place) + self.paddle_env = {'place': place, 'exe': exe} + exe = exe.run(fluid.default_startup_program()) + else: + place = self.paddle_env['place'] + exe = self.paddle_env['exe'] + + data_dict = np.load(data_path).item() + for op_name in data_dict: + layer = self.layers[op_name] + for param_name, data in data_dict[op_name].iteritems(): + try: + name = '%s_%s' % (op_name, param_name) + v = fluid.global_scope().find_var(name) + w = v.get_tensor() + w.set(data, place) + except ValueError: + if not ignore_missing: + raise + return True + + def feed(self, *args): + '''Set the input(s) for the next operation by replacing the terminal nodes. + The arguments can be either layer names or the actual layers. + ''' + assert len(args) != 0 + self.terminals = [] + for fed_layer in args: + if isinstance(fed_layer, basestring): + try: + fed_layer = self.layers[fed_layer] + except KeyError: + raise KeyError('Unknown layer name fed: %s' % fed_layer) + self.terminals.append(fed_layer) + return self + + def get_output(self): + '''Returns the current network output.''' + return self.terminals[-1] + + def get_unique_name(self, prefix): + '''Returns an index-suffixed unique name for the given prefix. + This is used for auto-generating layer names based on the type-prefix. + ''' + ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 + return '%s_%d' % (prefix, ident) + + @layer + def conv(self, + input, + k_h, + k_w, + c_o, + s_h, + s_w, + name, + relu=True, + padding=None, + group=1, + biased=True): + if padding is None: + padding = [0, 0] + + # Get the number of channels in the input + c_i, h_i, w_i = input.shape[1:] + + # Verify that the grouping parameter is valid + assert c_i % group == 0 + assert c_o % group == 0 + + fluid = import_fluid() + prefix = name + '_' + output = fluid.layers.conv2d( + input=input, + filter_size=[k_h, k_w], + num_filters=c_o, + stride=[s_h, s_w], + padding=padding, + groups=group, + param_attr=fluid.ParamAttr(name=prefix + "weights"), + bias_attr=fluid.ParamAttr(name=prefix + "biases"), + act="relu" if relu is True else None) + return output + + @layer + def relu(self, input, name): + fluid = import_fluid() + output = fluid.layers.relu(x=input) + return output + + @layer + def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None): + if padding is None: + padding = [0, 0] + + # Get the number of channels in the input + h_i, w_i = input.shape[2:] + fluid = import_fluid() + output = fluid.layers.pool2d( + input=input, + pool_size=[k_h, k_w], + pool_stride=[s_h, s_w], + pool_padding=padding, + pool_type='max') + return output + + @layer + def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None): + if padding is None: + padding = [0, 0] + + # Get the number of channels in the input + h_i, w_i = input.shape[2:] + fluid = import_fluid() + output = fluid.layers.pool2d( + input=input, + pool_size=[k_h, k_w], + pool_stride=[s_h, s_w], + pool_padding=padding, + pool_type='avg') + return output + + @layer + def lrn(self, input, radius, alpha, beta, name, bias=1.0): + raise Exception('lrn() not implemented yet') + + @layer + def concat(self, inputs, axis, name): + fluid = import_fluid() + output = fluid.layers.concat(input=inputs, axis=axis) + return output + + @layer + def add(self, inputs, name): + fluid = import_fluid() + output = inputs[0] + for i in inputs[1:]: + output = fluid.layers.elementwise_add(x=output, y=i) + return output + + @layer + def fc(self, input, num_out, name, relu=True, act=None): + fluid = import_fluid() + + if act is None: + act = 'relu' if relu is True else None + + prefix = name + '_' + output = fluid.layers.fc( + name=name, + input=input, + size=num_out, + act=act, + param_attr=fluid.ParamAttr(name=prefix + 'weights'), + bias_attr=fluid.ParamAttr(name=prefix + 'biases')) + return output + + @layer + def softmax(self, input, name): + fluid = import_fluid() + output = fluid.layers.softmax(x=input, name=name) + return output + + @layer + def batch_normalization(self, input, name, scale_offset=True, relu=False): + # NOTE: Currently, only inference is supported + fluid = import_fluid() + prefix = name + '_' + param_attr = None if scale_offset is False else fluid.ParamAttr( + name=prefix + 'scale') + bias_attr = None if scale_offset is False else fluid.ParamAttr( + name=prefix + 'offset') + mean_name = prefix + 'mean' + variance_name = prefix + 'variance' + output = fluid.layers.batch_norm( + name=name, + input=input, + is_test=True, + param_attr=param_attr, + bias_attr=bias_attr, + moving_mean_name=mean_name, + moving_variance_name=variance_name, + epsilon=1e-5, + act='relu' if relu is True else None) + + return output + + @layer + def dropout(self, input, keep_prob, name): + raise Exception('dropout() not implemented yet') + + +class LeNet(Network): + def setup(self): + self.feed('data') + self.conv(5, 5, 20, 1, 1, relu=False, name='conv1') + self.max_pool(2, 2, 2, 2, name='pool1') + self.conv(5, 5, 50, 1, 1, relu=False, name='conv2') + self.max_pool(2, 2, 2, 2, name='pool2') + self.fc(500, name='ip1') + self.fc(10, relu=False, name='ip2') + self.softmax(name='prob') + + @classmethod + def convert(cls, npy_model, fluid_path): + import paddle.v2.fluid as fluid + data_layer = fluid.layers.data( + name="data", shape=[1, 28, 28], dtype="float32") + feed_data = {"data": data_layer} + net = cls(feed_data) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + net.load(data_path=npy_model, exe=exe, place=place) + fluid.io.save_persistables(executor=exe, dirname=fluid_path) + + +if __name__ == "__main__": + #usage: python xxxnet.py xxx.npy ./model + + import sys + npy_weight = sys.argv[1] + fluid_model = sys.argv[2] + LeNet.convert(npy_weight, fluid_model) + exit(0) diff --git a/fluid/image_classification/caffe2fluid/tests/lenet/predict.py b/fluid/image_classification/caffe2fluid/tests/lenet/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..7405cc6f848ea139bc4edd4c3ec0e0af773ea25a --- /dev/null +++ b/fluid/image_classification/caffe2fluid/tests/lenet/predict.py @@ -0,0 +1,74 @@ +#!/bin/env python + +#function: +# demo to show how to use converted model using caffe2fluid +# + +import numpy as np +import paddle.v2 as paddle +import paddle.v2.fluid as fluid + +from lenet import LeNet as MyNet + + +def test_model(exe, test_program, fetch_list, test_reader, feeder): + acc_set = [] + + for data in test_reader(): + acc_np, pred = exe.run(program=test_program, + feed=feeder.feed(data), + fetch_list=fetch_list) + acc_set.append(float(acc_np)) + + acc_val = np.array(acc_set).mean() + return float(acc_val) + + +def main(model_path): + """ main + """ + print('load fluid model in %s' % (model_path)) + + with_gpu = False + paddle.init(use_gpu=with_gpu) + + #1, define network topology + images = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + net = MyNet({'data': images}) + prediction = net.layers['prob'] + acc = fluid.layers.accuracy(input=prediction, label=label) + + place = fluid.CUDAPlace(0) if with_gpu is True else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + #2, load weights + if model_path.find('.npy') > 0: + net.load(data_path=model_path, exe=exe, place=place) + else: + net.load(data_path=model_path, exe=exe) + + #3, test this model + test_program = fluid.default_main_program().clone() + test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + + feeder = fluid.DataFeeder(feed_list=[images, label], place=place) + fetch_list = [acc, prediction] + + print('go to test model using test set') + acc_val = test_model(exe, test_program, \ + fetch_list, test_reader, feeder) + + print('test accuracy is [%.4f], expected value[0.919]' % (acc_val)) + + +if __name__ == "__main__": + import sys + if len(sys.argv) == 2: + fluid_model_path = sys.argv[1] + else: + fluid_model_path = './model.fluid' + + main(fluid_model_path) diff --git a/mt_with_external_memory/README.md b/mt_with_external_memory/README.md index 1b478bd846ec5a5083c877f15c86057014375f8a..6643b4eb6c530c9fcaaf435ae999fc03eb628838 100644 --- a/mt_with_external_memory/README.md +++ b/mt_with_external_memory/README.md @@ -116,7 +116,7 @@ 算法实现于以下几个文件中: - `external_memory.py`: 主要实现简化版的 **神经图灵机** 于 `ExternalMemory` 类,对外提供初始化和读写函数。 -- `model.py`: 相关模型配置函数,包括双向 GPU 编码器(`bidirectional_gru_encoder`),带外部记忆强化的解码器(`memory_enhanced_decoder`),带外部记忆强化的序列到序列模型(`memory_enhanced_decoder`)。 +- `model.py`: 相关模型配置函数,包括双向 GPU 编码器(`bidirectional_gru_encoder`),带外部记忆强化的解码器(`memory_enhanced_decoder`),带外部记忆强化的序列到序列模型(`memory_enhanced_seq2seq`)。 - `data_utils.py`: 相关数据处理辅助函数。 - `train.py`: 模型训练。 - `infer.py`: 部分示例样本的翻译(模型推断)。 @@ -170,6 +170,7 @@ class ExternalMemory(object): a learnable gate function. :type enable_interpolation: bool """ + pass def _content_addressing(self, key_vector): """Get write/read head's addressing weights via content-based addressing. @@ -194,6 +195,7 @@ class ExternalMemory(object): :param write_key: Key vector for write heads to generate writing content and addressing signals. :type write_key: LayerOutput + """ pass def read(self, read_key): @@ -410,7 +412,7 @@ paddle.dataset.wmt14.test(dict_size) 命令行输入: ```bash -python mt_with_external_memory.py +python train.py ``` 或自定义部分参数, 例如: