add more help info in argmax when axis is not set (#874)

cc992072 · walloollaw · qingqing01 · ac7a2931 · cc992072 · cc992072
10 changed file
--- a/fluid/image_classification/caffe2fluid/README.md
+++ b/fluid/image_classification/caffe2fluid/README.md
@@ -24,7 +24,8 @@ This tool is used to convert a Caffe model to a Fluid model

    - Save weights as fluid model file
        ```
-        python alexnet.py alexnet.npy ./fluid
+        python alexnet.py alexnet.npy ./fluid #only infer the last layer's result
+        python alexnet.py alexnet.npy ./fluid fc8,prob #infer these 2 layer's result
      ```

 3. Use the converted model to infer

--- a/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
@@ -43,7 +43,7 @@ def build_model(net_file, net_name):
          (net_file, net_name))

    net_path = os.path.dirname(net_file)
-    module_name = os.path.basename(net_file).rstrip('.py')
+    module_name = os.path.splitext(os.path.basename(net_file))[0]
    if net_path not in sys.path:
        sys.path.insert(0, net_path)

@@ -51,7 +51,7 @@ def build_model(net_file, net_name):
        m = __import__(module_name, fromlist=[net_name])
        MyNet = getattr(m, net_name)
    except Exception as e:
-        print('failed to load module[%s]' % (module_name))
+        print('failed to load module[%s.%s]' % (module_name, net_name))
        print(e)
        return None

@@ -153,7 +153,6 @@ def load_inference_model(dirname, exe):
 def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
    """ do inference using a model which consist 'xxx.py' and 'xxx.npy'
    """
-
    fluid = import_fluid()

    place = fluid.CPUPlace()

--- a/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
@@ -67,7 +67,7 @@ if [[ -z $only_convert ]];then
    imgfile="data/65.jpeg"
    #FIX ME:
    #   only look the first line in prototxt file for the name of this network, maybe not correct
-    net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^\s*name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
+    net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
    if [[ -z $net_name ]];then
        net_name="MyNet"
    fi

--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
@@ -30,8 +30,9 @@ def set_args(f, params):
    kwargs = {}
    for arg_name in arg_list:
        try:
-            v = getattr(node.layer.parameters, arg_name, None)
+            v = getattr(params, arg_name, None)
        except Exception as e:
+            #maybe failed to extract caffe's parameters
            v = None

        if v is not None:

--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/argmax.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/argmax.py
@@ -27,7 +27,9 @@ def argmax_shape(input_shape, out_max_val=False, top_k=1, axis=-1):
        axis += len(input_shape)

    assert (axis + 1 == len(input_shape)
-            ), 'only can be applied on the last dimension now'
+            ), 'only can be applied on the last dimension[axis:%d, %s] now,'\
+                    'make sure you have set axis param in xxx.prototxt file' \
+                    % (axis, str(input_shape))

    output_shape = input_shape
    output_shape[-1] = top_k
@@ -56,14 +58,13 @@ def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1):
    if axis < 0:
        axis += len(input.shape)

-    assert (axis + 1 == len(input_shape)
-            ), 'only can be applied on the last dimension now'
-
    topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
    if out_max_val is True:
-        output = fluid.layers.concate([topk_var, index_var], axis=axis)
+        index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype)
+        output = fluid.layers.concat([index_var, topk_var], axis=axis)
    else:
-        output = topk_var
+        output = index_var
+
    return output



--- a/fluid/image_classification/caffe2fluid/kaffe/graph.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/graph.py
@@ -124,10 +124,18 @@ class Graph(object):
        for node in self.topologically_sorted():
            # If the node has learned parameters, display the first one's shape.
            # In case of convolutions, this corresponds to the weights.
-            data_shape = node.data[0].shape if node.data else '--'
-            out_shape = node.output_shape or '--'
-            s.append('{:<20} {:<30} {:>20} {:>20}'.format(
-                node.kind, node.name, data_shape, tuple(out_shape)))
+            if node.data is None:
+                data_shape = '--'
+                out_shape = node.output_shape or '--'
+                s.append('{:<20} {:<30} {:>20} {:>20}'.format(
+                    node.kind, node.name, data_shape, tuple(out_shape)))
+            else:
+                for d in node.data:
+                    #data_shape = node.data[0].shape if node.data else '--'
+                    data_shape = d.shape
+                    out_shape = node.output_shape or '--'
+                    s.append('{:<20} {:<30} {:>20} {:>20}'.format(
+                        node.kind, node.name, data_shape, tuple(out_shape)))
        return '\n'.join(s)



--- a/fluid/image_classification/caffe2fluid/kaffe/net_template.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/net_template.py
+""" this module is used as a template for generating sub class of Network
+"""
+
+
+class MyNet(object):
+    ### automatically generated by caffe2fluid ###
+    inputs_info = "INPUTS_INFO"
+    custom_layers_path = "CAFFE2FLUID_CUSTOM_LAYERS"
+
+    def custom_layer_factory(self):
+        import os
+
+        pk_paths = []
+        default = os.path.dirname(os.path.abspath(__file__))
+        location = os.environ.get('CAFFE2FLUID_CUSTOM_LAYERS', default)
+        pk_name = 'custom_layers'
+        pk_dir = os.path.join(location, pk_name)
+        pk_paths.append((location, pk_dir))
+
+        location = MyNet.custom_layers_path
+        pk_dir = os.path.join(MyNet.custom_layers_path, pk_name)
+        pk_paths.append((location, pk_dir))
+
+        for loc, pk_dir in pk_paths:
+            if os.path.exists(pk_dir):
+                if loc not in sys.path:
+                    sys.path.insert(0, loc)
+                    break
+
+        try:
+            from custom_layers import make_custom_layer
+            return make_custom_layer
+        except Exception as e:
+            print('maybe you should set $CAFFE2FLUID_CUSTOM_LAYERS first')
+            raise e
+
+    @classmethod
+    def input_shapes(cls):
+        return cls.inputs_info
+
+    @classmethod
+    def convert(cls, npy_model, fluid_path, outputs=None):
+        fluid = import_fluid()
+        shapes = cls.input_shapes()
+        input_name = shapes.keys()[0]
+        feed_data = {}
+        for name, shape in shapes.items():
+            data_layer = fluid.layers.data(
+                name=name, shape=shape, dtype="float32")
+            feed_data[name] = data_layer
+
+        net = cls(feed_data)
+        place = fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+        net.load(data_path=npy_model, exe=exe, place=place)
+        output_vars = []
+        if outputs is None:
+            output_vars.append(net.get_output())
+        else:
+            if type(outputs) is list:
+                for n in outputs:
+                    assert n in net.layers, 'not found layer with this name[%s]' % (
+                        n)
+                    output_vars.append(net.layers[n])
+
+        fluid.io.save_inference_model(
+            fluid_path, [input_name],
+            output_vars,
+            exe,
+            main_program=None,
+            model_filename='model',
+            params_filename='params')
+        return 0
+
+
+def main():
+    """ a tool used to convert caffe model to fluid
+    """
+
+    import sys
+    import os
+    filename = os.path.splitext(os.path.basename(sys.argv[0]))[0]
+    if len(sys.argv) < 3:
+        print('usage:')
+        print('	python %s %s.npy [save_dir] [layer names seperated by comma]' \
+                % (sys.argv[0], filename))
+        print('	eg: python %s %s.npy ./fluid' % (sys.argv[0], filename))
+        print('	eg: python %s %s.npy ./fluid layer_name1,layer_name2' \
+                % (sys.argv[0], filename))
+        return 1
+
+    npy_weight = sys.argv[1]
+    fluid_model = sys.argv[2]
+    outputs = None
+    if len(sys.argv) >= 4:
+        outputs = sys.argv[3].split(',')
+
+    ret = MyNet.convert(npy_weight, fluid_model, outputs)
+    if ret == 0:
+        outputs = 'last output layer' if outputs is None else outputs
+        print('succeed to convert to fluid format with output layers[%s]'
+              ' in directory[%s]' % (outputs, fluid_model))
+    else:
+        print('failed to convert model to fluid format')
+
+    return ret
+
+
+def generate_net_code(net_name, inputs_info):
+    """ generate framework of a custom net code which represent a subclass of Network
+
+    Args:
+        @net_name (str): class name for this net
+        @inputs_info (str): a str which represents a dict,  eg: '{"data": [3, 32, 32]}'
+    Returns:
+        net_codes (str): codes for this subclass
+    """
+    import os
+    import inspect
+
+    net_codes = str(inspect.getsource(MyNet))
+    net_codes = net_codes.replace('MyNet(object)', '%s(Network)' % net_name)
+    net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
+
+    custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
+    net_codes = net_codes.replace('CAFFE2FLUID_CUSTOM_LAYERS', custom_layer_dir)
+    return net_codes
+
+
+def generate_main_code(net_name):
+    """ generate a piece of code for 'main' function
+
+    Args:
+        @net_name (str): class name for this net
+
+    Returns:
+        main_codes (str): codes for this main function
+    """
+    import inspect
+
+    main_codes = str(inspect.getsource(main))
+    main_codes = main_codes.replace('MyNet', net_name)
+    return main_codes
+
+
+if __name__ == "__main__":
+    """ just for testing
+    """
+    print generate_net_code('Attribute', "{'data': [3, 277, 277]}")
+    print generate_main_code('Attribute')
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
@@ -290,20 +290,15 @@ class Network(object):
            input, dropout_prob=drop_prob, is_test=is_test, name=name)
        return output

+    def custom_layer_factory(self):
+        """ get a custom layer maker provided by subclass
+        """
+        raise NotImplementedError(
+            '[custom_layer_factory] must be implemented by the subclass.')
+
    @layer
    def custom_layer(self, inputs, kind, name, *args, **kwargs):
-        """ make custom layer from the package specified by '$CAFFE2FLUID_CUSTOM_LAYERS'
+        """ make custom layer
        """
-        #fluid = import_fluid()
-        #import custom package
-        default = os.path.dirname(os.path.abspath(__file__))
-        p = os.environ.get('CAFFE2FLUID_CUSTOM_LAYERS', default)
-        pk = os.path.join(p, 'custom_layers')
-        assert os.path.exists(pk) is True, "not found custom_layer package [%s],"\
-                "you need to set $CAFFE2FLUID_CUSTOM_LAYERS" % (pk)
-
-        if p not in sys.path:
-            sys.path.insert(0, p)
-
-        from custom_layers import make_custom_layer
-        return make_custom_layer(kind, inputs, name, *args, **kwargs)
+        layer_factory = self.custom_layer_factory()
+        return layer_factory(kind, inputs, name, *args, **kwargs)
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
@@ -198,18 +198,10 @@ class TensorFlowEmitter(object):
        codes.append(network_source + '\n')
        return self.statement('\n'.join(codes))

-    def emit_class_def(self, name):
-        return self.statement('class %s(Network):' % (name))
-
    def emit_setup_def(self):
        return self.statement('def setup(self):')

-    def emit_shape_def(self, input_nodes):
-        self.outdent()
-        func_def = self.statement('@classmethod')
-        func_def += self.statement('def input_shapes(cls):')
-        self.indent()
-
+    def get_inputs_info(self, input_nodes):
        input_shapes = {}
        for n in input_nodes:
            name = n.name
@@ -218,51 +210,7 @@ class TensorFlowEmitter(object):
            input_shapes[name] = ', '.join(shape)
        input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
        shape_str = ','.join(input_shapes)
-        func_def += self.statement('return {%s}' % (shape_str))
-        return '\n\n' + func_def
-
-    def emit_convert_def(self, input_nodes):
-        codes = []
-        inputs = {}
-        #codes.append('shapes = cls.input_shapes()')
-        codes.append('shapes = cls.input_shapes()')
-        codes.append('input_name = shapes.keys()[0]')
-        codes.append('input_shape = shapes[input_name]')
-        for n in input_nodes:
-            name = n.name
-            layer_var = name + '_layer'
-            layer_def = '%s = fluid.layers.data(name="%s", shape=shapes["%s"],'\
-                    ' dtype="float32")' % (layer_var, name, name)
-            #layer_var, layer_def = data_layer_def(n.name, n.output_shape)
-            codes.append(layer_def)
-            inputs[name] = layer_var
-
-        input_dict = ','.join(['"%s": %s' % (n, l) for n, l in inputs.items()])
-
-        codes.append('feed_data = {' + input_dict + '}')
-        codes.append('net = cls(feed_data)')
-
-        codes.append("place = fluid.CPUPlace()")
-        codes.append("exe = fluid.Executor(place)")
-        codes.append("exe.run(fluid.default_startup_program())")
-        codes.append("net.load(data_path=npy_model, exe=exe, place=place)")
-        codes.append("output_vars = [net.get_output()]")
-        codes.append("fluid.io.save_inference_model(" \
-                "fluid_path, [input_name],output_vars," \
-                "exe, main_program=None, model_filename='model'," \
-                "params_filename='params')")
-        codes.append(
-            "print('save fluid model as [model] and [params] in directory [%s]' % (fluid_path))"
-        )
-
-        self.outdent()
-        func_def = self.statement('@classmethod')
-        func_def += self.statement('def convert(cls, npy_model, fluid_path):')
-        self.indent()
-        func_def += self.statement('fluid = import_fluid()')
-        for l in codes:
-            func_def += self.statement(l)
-        return '\n' + func_def
+        return '{%s}' % (shape_str)

    def emit_main_def(self, name):
        if name is None:
@@ -271,22 +219,7 @@ class TensorFlowEmitter(object):
        self.prefix = ''
        main_def = self.statement('if __name__ == "__main__":')
        self.indent()
-        main_def += self.statement(
-            "#usage: save as an inference model for online service\n")
-        main_def += self.statement("import sys")
-        main_def += self.statement("if len(sys.argv) != 3:")
-        self.indent()
-        main_def += self.statement("print('usage:')")
-        main_def += self.statement(
-            "print('\tpython %s [xxxnet.npy] [save_dir]' % (sys.argv[0]))")
-        main_def += self.statement("exit(1)")
-
-        self.outdent()
-        main_def += self.statement("npy_weight = sys.argv[1]")
-        main_def += self.statement("fluid_model = sys.argv[2]")
-        main_def += self.statement("%s.convert(npy_weight, fluid_model)" %
-                                   (name))
-        main_def += self.statement("exit(0)")
+        main_def += self.statement('exit(main())')
        return '\n\n' + main_def

    def emit_parents(self, chain):
@@ -301,10 +234,17 @@ class TensorFlowEmitter(object):
        return self.statement('self.' + node.emit())

    def emit(self, name, chains, input_nodes=None):
+        from ..net_template import generate_net_code
+        from ..net_template import generate_main_code
+
        self.net_name = name
+        inputs_info = self.get_inputs_info(input_nodes)
+
        s = self.emit_imports()
-        s += self.emit_class_def(name)
+        s += generate_net_code(name, inputs_info) + '\n'
        self.indent()
+
+        # define the net using api
        s += self.emit_setup_def()
        self.indent()
        blocks = []
@@ -315,8 +255,9 @@ class TensorFlowEmitter(object):
                b += self.emit_node(node)
            blocks.append(b[:-1])
        s = s + '\n\n'.join(blocks)
-        s += self.emit_shape_def(input_nodes)
-        s += self.emit_convert_def(input_nodes)
+
+        # define the main function
+        s += '\n\n\n' + generate_main_code(name)
        s += self.emit_main_def(name)
        return s

@@ -367,9 +308,6 @@ class Transformer(object):
            transformers = [
                # Reshape the parameters to TensorFlow's ordering
                DataReshaper({
-                    # (c_o, c_i, h, w) -> (h, w, c_i, c_o) for TF
-                    NodeKind.Convolution: (0, 1, 2, 3),
-
                    # (c_o, c_i) -> (c_i, c_o)
                    NodeKind.InnerProduct: (1, 0)
                }),

--- a/fluid/image_classification/caffe2fluid/kaffe/transformers.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/transformers.py
@@ -66,12 +66,14 @@ class DataInjector(object):
    def adjust_parameters(self, node, data):
        if not self.did_use_pb:
            return data
+
        # When using the protobuf-backend, each parameter initially has four dimensions.
        # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
        # This implementation takes care of the common cases. However, it does leave the
        # potential for future issues.
        # The Caffe-backend does not suffer from this problem.
        data = list(data)
+
        squeeze_indices = [1]  # Squeeze biases.
        if node.kind == NodeKind.InnerProduct:
            squeeze_indices.append(0)  # Squeeze FC.
@@ -80,8 +82,22 @@ class DataInjector(object):
            if idx >= len(data):
                continue

-            shape_old = data[idx].shape
-            data[idx] = np.squeeze(data[idx])
+            d = data[idx]
+            assert len(
+                d.shape
+            ) == 4, 'invalid shape[%s] from caffe when adjust_parameters' % (
+                str(d.shape))
+
+            shape_old = d.shape
+            sq_axis = None
+            if idx == 0:
+                sq_axis = (0, 1)
+            elif idx == 1:
+                sq_axis = (0, 1, 2)
+            else:
+                continue
+
+            data[idx] = np.squeeze(d, axis=sq_axis)
            shape_new = data[idx].shape
            if len(shape_old) != shape_new:
                debug('squeeze idx:%d, with kind:%s,name:%s' % \
@@ -131,18 +147,19 @@ class DataReshaper(object):
        for node in graph.nodes:
            if node.data is None:
                continue
+
            if node.kind not in self.reshaped_node_types:
                # Check for 2+ dimensional data
                if any(len(tensor.shape) > 1 for tensor in node.data):
                    notice('parmaters not reshaped for node: {}'.format(node))
                continue
+
            transpose_order = self.map(node.kind)
            weights = node.data[0]
-            if (node.kind == NodeKind.InnerProduct
-                ) and self.has_spatial_parent(node):
+            if node.kind == NodeKind.InnerProduct:
                # The FC layer connected to the spatial layer needs to be
                # re-wired to match the new spatial ordering.
-                in_shape = node.get_only_parent().output_shape
+                #in_shape = node.get_only_parent().output_shape
                fc_shape = weights.shape
                output_channels = fc_shape[0]
                weights = weights.reshape((output_channels, -1))