diff --git a/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py b/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py
index 6ad7767ed8a88f1c0258ad36cc35221c33b641e5..6f439d7b265fcf7f01abba59d29648c666bcc9cd 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/caffe/resolver.py
@@ -9,8 +9,8 @@ def import_caffepb():
     p = os.path.dirname(p)
     p = os.path.join(p, '../../proto')
     sys.path.insert(0, p)
-    import caffepb
-    return caffepb
+    import caffe_pb2
+    return caffe_pb2
 
 
 class CaffeResolver(object):
diff --git a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
index bc480853a0c32dfbad359c3f8e6f9ef04912b020..703c6a0a8091df79c73465be8c52248af518f3ca 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/custom_layers/__init__.py
@@ -60,16 +60,16 @@ def compute_output_shape(kind, node):
 
 
 def make_node(template, kind, node):
-    """ make a TensorFlowNode for custom layer which means construct
+    """ make a PaddleNode for custom layer which means construct
         a piece of code to define a layer implemented in 'custom_layers'
 
     Args:
-        @template (TensorFlowNode): a factory to new a instance of TensorFLowNode
+        @template (PaddleNode): a factory to new a instance of PaddleNode
         @kind (str): type of custom layer
         @node (graph.Node): a layer in the net
 
     Returns:
-        instance of TensorFlowNode
+        instance of PaddleNode
     """
     assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
         kind)
diff --git a/fluid/image_classification/caffe2fluid/kaffe/graph.py b/fluid/image_classification/caffe2fluid/kaffe/graph.py
index c7e3f8c44af44053b1bd67bc49438181300e3567..9d006aa9bc84dd081c7bd3d20c50e041a79da645 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/graph.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/graph.py
@@ -216,15 +216,25 @@ class GraphBuilder(object):
         Newer models use the "Input layer" type.
         '''
         nodes = [Node(name, NodeKind.Data) for name in self.params.input]
-        if len(nodes):
-            input_dim = map(int, self.params.input_dim)
-            if not input_dim:
-                if len(self.params.input_shape) > 0:
-                    input_dim = map(int, self.params.input_shape[0].dim)
-                else:
-                    raise KaffeError('Dimensions for input not specified.')
-            for node in nodes:
-                node.output_shape = tuple(input_dim)
+        inputs_num = len(nodes)
+        if inputs_num > 0:
+            input_dims_num = len(self.params.input_dim)
+            if input_dims_num > 0 and input_dims_num != inputs_num * 4:
+                raise KaffeError('invalid input_dim[%d] param in prototxt' %
+                                 (input_dims_num))
+
+            input_dims = [[]] * inputs_num
+            for i in range(input_dims_num):
+                dim = self.params.input_dim[i]
+                which = int(i / 4)
+                input_dims[which].append(int(dim))
+
+            for i in range(inputs_num):
+                if len(self.params.input_shape) == inputs_num:
+                    input_dim = map(int, self.params.input_shape[i].dim)
+                    input_dims[i] = input_dim
+
+                nodes[i].output_shape = tuple(input_dims[i])
         return nodes
 
     def build(self):
diff --git a/fluid/image_classification/caffe2fluid/kaffe/net_template.py b/fluid/image_classification/caffe2fluid/kaffe/net_template.py
index c0de75aa353ab189e81f2e1a58425c46ad77ed94..9fccacb15df79460d11a37bed99823abe364a885 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/net_template.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/net_template.py
@@ -5,7 +5,7 @@
 class MyNet(object):
     ### automatically generated by caffe2fluid ###
     inputs_info = "INPUTS_INFO"
-    custom_layers_path = "CAFFE2FLUID_CUSTOM_LAYERS"
+    custom_layers_path = "_CAFFE2FLUID_CUSTOM_LAYERS_"
 
     def custom_layer_factory(self):
         import os
@@ -55,22 +55,30 @@ class MyNet(object):
         exe.run(fluid.default_startup_program())
         net.load(data_path=npy_model, exe=exe, place=place)
         output_vars = []
+
+        model_filename = 'model'
+        params_filename = 'params'
         if outputs is None:
             output_vars.append(net.get_output())
         else:
-            if type(outputs) is list:
-                for n in outputs:
-                    assert n in net.layers, 'not found layer with this name[%s]' % (
-                        n)
-                    output_vars.append(net.layers[n])
+            if outputs[0] == 'dump_all':
+                model_filename = None
+                params_filename = None
+                output_vars.append(net.get_output())
+            else:
+                if type(outputs) is list:
+                    for n in outputs:
+                        assert n in net.layers, 'not found layer with this name[%s]' % (
+                            n)
+                        output_vars.append(net.layers[n])
 
         fluid.io.save_inference_model(
             fluid_path, [input_name],
             output_vars,
             exe,
             main_program=None,
-            model_filename='model',
-            params_filename='params')
+            model_filename=model_filename,
+            params_filename=model_filename)
         return 0
 
 
@@ -125,7 +133,8 @@ def generate_net_code(net_name, inputs_info):
     net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
 
     custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
-    net_codes = net_codes.replace('CAFFE2FLUID_CUSTOM_LAYERS', custom_layer_dir)
+    net_codes = net_codes.replace('_CAFFE2FLUID_CUSTOM_LAYERS_',
+                                  custom_layer_dir)
     return net_codes
 
 
diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
index 85fd0b7bb3996cfa613013da97969a6f63162bb5..dabe9fa03c105dd2d4d9acc335c8e81df3377119 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
@@ -262,6 +262,13 @@ class Network(object):
     @layer
     def softmax(self, input, name):
         fluid = import_fluid()
+        shape = input.shape
+        if len(shape) > 2:
+            for sz in shape[2:]:
+                assert sz == 1, "invalid input shape[%s] for softmax" % (
+                    str(shape))
+            input = fluid.layers.reshape(input, shape[0:2])
+
         output = fluid.layers.softmax(input)
         return output
 
diff --git a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
index 77695ecba105fd10f5cfa244f37c6ae29a6d7291..8607b8748a60aa3a72f77a589727190efa2b8a36 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
@@ -11,9 +11,9 @@ from . import network
 
 def get_padding_type(kernel_params, input_shape, output_shape):
     '''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
-    Caffe supports arbitrary padding values, while TensorFlow only
+    Caffe supports arbitrary padding values, while Paddle only
     supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
-    can be translated to TensorFlow. There are some subtleties to
+    can be translated to Paddle. There are some subtleties to
     how the padding edge-cases are handled. These are described here:
     https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
     '''
@@ -24,11 +24,11 @@ def get_padding_type(kernel_params, input_shape, output_shape):
         return None
 
 
-class TensorFlowNode(object):
-    '''An intermediate representation for TensorFlow operations.'''
+class PaddleNode(object):
+    '''An intermediate representation for Paddle operations.'''
 
     def __init__(self, op, *args, **kwargs):
-        # A string corresponding to the TensorFlow operation
+        # A string corresponding to the Paddle operation
         self.op = op
         # Positional arguments for the operation
         self.args = args
@@ -71,10 +71,10 @@ class MaybeActivated(object):
 
     def __call__(self, *args, **kwargs):
         kwargs.update(self.inject_kwargs)
-        return TensorFlowNode(*args, **kwargs)
+        return PaddleNode(*args, **kwargs)
 
 
-class TensorFlowMapper(NodeMapper):
+class PaddleMapper(NodeMapper):
     def get_kernel_params(self, node):
         kernel_params = node.layer.kernel_parameters
         input_shape = node.get_only_parent().output_shape
@@ -102,7 +102,7 @@ class TensorFlowMapper(NodeMapper):
             kernel_params.stride_h, kernel_params.stride_w, **kwargs)
 
     def map_relu(self, node):
-        return TensorFlowNode('relu')
+        return PaddleNode('relu')
 
     def map_pooling(self, node):
         pool_type = node.parameters.pool
@@ -118,21 +118,20 @@ class TensorFlowMapper(NodeMapper):
         global_pool = getattr(node.layer.parameters, 'global_pooling', False)
         if global_pool:
             input_shape = node.get_only_parent().output_shape
-            return TensorFlowNode(pool_op, input_shape.height,
-                                  input_shape.width, 1, 1, ceil_mode)
+            return PaddleNode(pool_op, input_shape.height, input_shape.width, 1,
+                              1, ceil_mode)
         else:
             (kernel_params, padding) = self.get_kernel_params(node)
-            return TensorFlowNode(pool_op, kernel_params.kernel_h,
-                                  kernel_params.kernel_w,
-                                  kernel_params.stride_h,
-                                  kernel_params.stride_w, ceil_mode, **padding)
+            return PaddleNode(pool_op, kernel_params.kernel_h,
+                              kernel_params.kernel_w, kernel_params.stride_h,
+                              kernel_params.stride_w, ceil_mode, **padding)
 
     def map_sigmoid(self, node):
-        return TensorFlowNode('sigmoid')
+        return PaddleNode('sigmoid')
 
     def map_custom(self, node):
         from .. import custom_layers
-        return custom_layers.make_node(TensorFlowNode, node.kind, node)
+        return custom_layers.make_node(PaddleNode, node.kind, node)
 
     def map_inner_product(self, node):
         #TODO: Axis
@@ -142,24 +141,24 @@ class TensorFlowMapper(NodeMapper):
         return MaybeActivated(node)('fc', node.parameters.num_output)
 
     def map_softmax(self, node):
-        return TensorFlowNode('softmax')
+        return PaddleNode('softmax')
 
     def map_lrn(self, node):
         params = node.parameters
         # The window size must be an odd value. For a window
-        # size of (2*n+1), TensorFlow defines depth_radius = n.
+        # size of (2*n+1), Paddle defines depth_radius = n.
         assert params.local_size % 2 == 1
-        # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
+        # Caffe scales by (alpha/(2*n+1)), whereas Paddle
         # just scales by alpha (as does Krizhevsky's paper).
         # We'll account for that here.
         alpha = params.alpha / float(params.local_size)
-        return TensorFlowNode('lrn', params.local_size, alpha, params.beta)
+        return PaddleNode('lrn', params.local_size, alpha, params.beta)
 
     def map_concat(self, node):
-        return TensorFlowNode('concat', node.parameters.axis)
+        return PaddleNode('concat', node.parameters.axis)
 
     def map_dropout(self, node):
-        return TensorFlowNode('dropout', node.parameters.dropout_ratio)
+        return PaddleNode('dropout', node.parameters.dropout_ratio)
 
     def map_batch_norm(self, node):
         scale_offset = len(node.data) == 4
@@ -177,21 +176,20 @@ class TensorFlowMapper(NodeMapper):
         operations = {0: 'multiply', 1: 'add', 2: 'max'}
         op_code = node.parameters.operation
         try:
-            return TensorFlowNode(operations[op_code])
+            return PaddleNode(operations[op_code])
         except KeyError:
             raise KaffeError('Unknown elementwise operation: {}'.format(
                 op_code))
 
     def map_scale(self, node):
         params = node.parameters
-        return TensorFlowNode(
-            'scale', axis=params.axis, num_axes=params.num_axes)
+        return PaddleNode('scale', axis=params.axis, num_axes=params.num_axes)
 
     def commit(self, chains):
         return chains
 
 
-class TensorFlowEmitter(object):
+class PaddleEmitter(object):
     def __init__(self, tab=None):
         self.tab = tab or ' ' * 4
         self.prefix = ''
@@ -309,7 +307,7 @@ class Transformer(object):
             ]),
 
             # Rename nodes
-            # Slashes are used for scoping in TensorFlow. Replace slashes
+            # Slashes are used for scoping in Paddle. Replace slashes
             # in node names with underscores.
             # (Caffe's GoogLeNet implementation uses slashes)
             NodeRenamer(lambda node: node.name.replace('/', '_'))
@@ -324,7 +322,7 @@ class Transformer(object):
     def transform_data(self):
         if self.params is None:
             transformers = [
-                # Reshape the parameters to TensorFlow's ordering
+                # Reshape the parameters to Paddle's ordering
                 DataReshaper({
                     # (c_o, c_i) -> (c_i, c_o)
                     NodeKind.InnerProduct: (1, 0)
@@ -345,9 +343,9 @@ class Transformer(object):
 
     def transform_source(self):
         if self.source is None:
-            mapper = TensorFlowMapper(self.graph)
+            mapper = PaddleMapper(self.graph)
             chains = mapper.map()
-            emitter = TensorFlowEmitter()
+            emitter = PaddleEmitter()
             input_nodes = self.graph.get_input_nodes()
             self.source = emitter.emit(self.graph.name, chains, input_nodes)
         return self.source
diff --git a/fluid/image_classification/caffe2fluid/kaffe/shapes.py b/fluid/image_classification/caffe2fluid/kaffe/shapes.py
index 13873fa35f7ae95dd17e3dd6dad3ae0580bc6e12..379cfce6dd3d4c562fd5b89d3b13c467f65c83f8 100644
--- a/fluid/image_classification/caffe2fluid/kaffe/shapes.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/shapes.py
@@ -58,19 +58,22 @@ def shape_scalar(node):
 def shape_data(node):
     if node.output_shape:
         # Old-style input specification
-        return node.output_shape
-    try:
-        # New-style input specification
-        return map(int, node.parameters.shape[0].dim)
-    except:
-        # We most likely have a data layer on our hands. The problem is,
-        # Caffe infers the dimensions of the data from the source (eg: LMDB).
-        # We want to avoid reading datasets here. Fail for now.
-        # This can be temporarily fixed by transforming the data layer to
-        # Caffe's "input" layer (as is usually used in the "deploy" version).
-        # TODO: Find a better solution for this.
-        raise KaffeError('Cannot determine dimensions of data layer.\n'
-                         'See comments in function shape_data for more info.')
+        shape = node.output_shape
+    else:
+        try:
+            # New-style input specification
+            shape = map(int, node.parameters.shape[0].dim)
+        except:
+            # We most likely have a data layer on our hands. The problem is,
+            # Caffe infers the dimensions of the data from the source (eg: LMDB).
+            # We want to avoid reading datasets here. Fail for now.
+            # This can be temporarily fixed by transforming the data layer to
+            # Caffe's "input" layer (as is usually used in the "deploy" version).
+            # TODO: Find a better solution for this.
+            raise KaffeError(
+                'Cannot determine dimensions of data layer.\n'
+                'See comments in function shape_data for more info.')
+    return shape
 
 
 def shape_mem_data(node):
diff --git a/fluid/image_classification/caffe2fluid/proto/compile.sh b/fluid/image_classification/caffe2fluid/proto/compile.sh
index f621e0066d11595bc48362ad7411eeab57f035dd..5743d9cb2b6e9590044ab61daf2d185438f026a9 100755
--- a/fluid/image_classification/caffe2fluid/proto/compile.sh
+++ b/fluid/image_classification/caffe2fluid/proto/compile.sh
@@ -11,14 +11,10 @@ if [[ -z $PROTOC ]];then
 fi
 
 WORK_ROOT=$(dirname `readlink -f "$BASH_SOURCE[0]"`)
-PY_NAME="$WORK_ROOT/caffepb.py"
+PY_NAME="$WORK_ROOT/caffe_pb2.py"
 $PROTOC --proto_path=$WORK_ROOT --python_out=$WORK_ROOT $WORK_ROOT/caffe.proto
 ret=$?
 
-if [ $ret -eq 0 ];then
-    mv $WORK_ROOT/caffe_pb2.py $PY_NAME
-fi
-
 if [ -e "$PY_NAME" ];then
     echo "succeed to generate [$PY_NAME]"
     exit 0