提交 d41c1571 编写于 作者: W walloollaw 提交者: qingqing01

caffe2fluid: fix bug in softmax; (#918)

support multiple data layer;
fix bug about caffe_pb2;
上级 278969b6
...@@ -9,8 +9,8 @@ def import_caffepb(): ...@@ -9,8 +9,8 @@ def import_caffepb():
p = os.path.dirname(p) p = os.path.dirname(p)
p = os.path.join(p, '../../proto') p = os.path.join(p, '../../proto')
sys.path.insert(0, p) sys.path.insert(0, p)
import caffepb import caffe_pb2
return caffepb return caffe_pb2
class CaffeResolver(object): class CaffeResolver(object):
......
...@@ -60,16 +60,16 @@ def compute_output_shape(kind, node): ...@@ -60,16 +60,16 @@ def compute_output_shape(kind, node):
def make_node(template, kind, node): def make_node(template, kind, node):
""" make a TensorFlowNode for custom layer which means construct """ make a PaddleNode for custom layer which means construct
a piece of code to define a layer implemented in 'custom_layers' a piece of code to define a layer implemented in 'custom_layers'
Args: Args:
@template (TensorFlowNode): a factory to new a instance of TensorFLowNode @template (PaddleNode): a factory to new a instance of PaddleNode
@kind (str): type of custom layer @kind (str): type of custom layer
@node (graph.Node): a layer in the net @node (graph.Node): a layer in the net
Returns: Returns:
instance of TensorFlowNode instance of PaddleNode
""" """
assert kind in custom_layers, "layer[%s] not exist in custom layers" % ( assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
kind) kind)
......
...@@ -216,15 +216,25 @@ class GraphBuilder(object): ...@@ -216,15 +216,25 @@ class GraphBuilder(object):
Newer models use the "Input layer" type. Newer models use the "Input layer" type.
''' '''
nodes = [Node(name, NodeKind.Data) for name in self.params.input] nodes = [Node(name, NodeKind.Data) for name in self.params.input]
if len(nodes): inputs_num = len(nodes)
input_dim = map(int, self.params.input_dim) if inputs_num > 0:
if not input_dim: input_dims_num = len(self.params.input_dim)
if len(self.params.input_shape) > 0: if input_dims_num > 0 and input_dims_num != inputs_num * 4:
input_dim = map(int, self.params.input_shape[0].dim) raise KaffeError('invalid input_dim[%d] param in prototxt' %
else: (input_dims_num))
raise KaffeError('Dimensions for input not specified.')
for node in nodes: input_dims = [[]] * inputs_num
node.output_shape = tuple(input_dim) for i in range(input_dims_num):
dim = self.params.input_dim[i]
which = int(i / 4)
input_dims[which].append(int(dim))
for i in range(inputs_num):
if len(self.params.input_shape) == inputs_num:
input_dim = map(int, self.params.input_shape[i].dim)
input_dims[i] = input_dim
nodes[i].output_shape = tuple(input_dims[i])
return nodes return nodes
def build(self): def build(self):
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
class MyNet(object): class MyNet(object):
### automatically generated by caffe2fluid ### ### automatically generated by caffe2fluid ###
inputs_info = "INPUTS_INFO" inputs_info = "INPUTS_INFO"
custom_layers_path = "CAFFE2FLUID_CUSTOM_LAYERS" custom_layers_path = "_CAFFE2FLUID_CUSTOM_LAYERS_"
def custom_layer_factory(self): def custom_layer_factory(self):
import os import os
...@@ -55,22 +55,30 @@ class MyNet(object): ...@@ -55,22 +55,30 @@ class MyNet(object):
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
net.load(data_path=npy_model, exe=exe, place=place) net.load(data_path=npy_model, exe=exe, place=place)
output_vars = [] output_vars = []
model_filename = 'model'
params_filename = 'params'
if outputs is None: if outputs is None:
output_vars.append(net.get_output()) output_vars.append(net.get_output())
else: else:
if type(outputs) is list: if outputs[0] == 'dump_all':
for n in outputs: model_filename = None
assert n in net.layers, 'not found layer with this name[%s]' % ( params_filename = None
n) output_vars.append(net.get_output())
output_vars.append(net.layers[n]) else:
if type(outputs) is list:
for n in outputs:
assert n in net.layers, 'not found layer with this name[%s]' % (
n)
output_vars.append(net.layers[n])
fluid.io.save_inference_model( fluid.io.save_inference_model(
fluid_path, [input_name], fluid_path, [input_name],
output_vars, output_vars,
exe, exe,
main_program=None, main_program=None,
model_filename='model', model_filename=model_filename,
params_filename='params') params_filename=model_filename)
return 0 return 0
...@@ -125,7 +133,8 @@ def generate_net_code(net_name, inputs_info): ...@@ -125,7 +133,8 @@ def generate_net_code(net_name, inputs_info):
net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info) net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
custom_layer_dir = os.path.dirname(os.path.abspath(__file__)) custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
net_codes = net_codes.replace('CAFFE2FLUID_CUSTOM_LAYERS', custom_layer_dir) net_codes = net_codes.replace('_CAFFE2FLUID_CUSTOM_LAYERS_',
custom_layer_dir)
return net_codes return net_codes
......
...@@ -262,6 +262,13 @@ class Network(object): ...@@ -262,6 +262,13 @@ class Network(object):
@layer @layer
def softmax(self, input, name): def softmax(self, input, name):
fluid = import_fluid() fluid = import_fluid()
shape = input.shape
if len(shape) > 2:
for sz in shape[2:]:
assert sz == 1, "invalid input shape[%s] for softmax" % (
str(shape))
input = fluid.layers.reshape(input, shape[0:2])
output = fluid.layers.softmax(input) output = fluid.layers.softmax(input)
return output return output
......
...@@ -11,9 +11,9 @@ from . import network ...@@ -11,9 +11,9 @@ from . import network
def get_padding_type(kernel_params, input_shape, output_shape): def get_padding_type(kernel_params, input_shape, output_shape):
'''Translates Caffe's numeric padding to one of ('SAME', 'VALID'). '''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
Caffe supports arbitrary padding values, while TensorFlow only Caffe supports arbitrary padding values, while Paddle only
supports 'SAME' and 'VALID' modes. So, not all Caffe paddings supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
can be translated to TensorFlow. There are some subtleties to can be translated to Paddle. There are some subtleties to
how the padding edge-cases are handled. These are described here: how the padding edge-cases are handled. These are described here:
https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
''' '''
...@@ -24,11 +24,11 @@ def get_padding_type(kernel_params, input_shape, output_shape): ...@@ -24,11 +24,11 @@ def get_padding_type(kernel_params, input_shape, output_shape):
return None return None
class TensorFlowNode(object): class PaddleNode(object):
'''An intermediate representation for TensorFlow operations.''' '''An intermediate representation for Paddle operations.'''
def __init__(self, op, *args, **kwargs): def __init__(self, op, *args, **kwargs):
# A string corresponding to the TensorFlow operation # A string corresponding to the Paddle operation
self.op = op self.op = op
# Positional arguments for the operation # Positional arguments for the operation
self.args = args self.args = args
...@@ -71,10 +71,10 @@ class MaybeActivated(object): ...@@ -71,10 +71,10 @@ class MaybeActivated(object):
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
kwargs.update(self.inject_kwargs) kwargs.update(self.inject_kwargs)
return TensorFlowNode(*args, **kwargs) return PaddleNode(*args, **kwargs)
class TensorFlowMapper(NodeMapper): class PaddleMapper(NodeMapper):
def get_kernel_params(self, node): def get_kernel_params(self, node):
kernel_params = node.layer.kernel_parameters kernel_params = node.layer.kernel_parameters
input_shape = node.get_only_parent().output_shape input_shape = node.get_only_parent().output_shape
...@@ -102,7 +102,7 @@ class TensorFlowMapper(NodeMapper): ...@@ -102,7 +102,7 @@ class TensorFlowMapper(NodeMapper):
kernel_params.stride_h, kernel_params.stride_w, **kwargs) kernel_params.stride_h, kernel_params.stride_w, **kwargs)
def map_relu(self, node): def map_relu(self, node):
return TensorFlowNode('relu') return PaddleNode('relu')
def map_pooling(self, node): def map_pooling(self, node):
pool_type = node.parameters.pool pool_type = node.parameters.pool
...@@ -118,21 +118,20 @@ class TensorFlowMapper(NodeMapper): ...@@ -118,21 +118,20 @@ class TensorFlowMapper(NodeMapper):
global_pool = getattr(node.layer.parameters, 'global_pooling', False) global_pool = getattr(node.layer.parameters, 'global_pooling', False)
if global_pool: if global_pool:
input_shape = node.get_only_parent().output_shape input_shape = node.get_only_parent().output_shape
return TensorFlowNode(pool_op, input_shape.height, return PaddleNode(pool_op, input_shape.height, input_shape.width, 1,
input_shape.width, 1, 1, ceil_mode) 1, ceil_mode)
else: else:
(kernel_params, padding) = self.get_kernel_params(node) (kernel_params, padding) = self.get_kernel_params(node)
return TensorFlowNode(pool_op, kernel_params.kernel_h, return PaddleNode(pool_op, kernel_params.kernel_h,
kernel_params.kernel_w, kernel_params.kernel_w, kernel_params.stride_h,
kernel_params.stride_h, kernel_params.stride_w, ceil_mode, **padding)
kernel_params.stride_w, ceil_mode, **padding)
def map_sigmoid(self, node): def map_sigmoid(self, node):
return TensorFlowNode('sigmoid') return PaddleNode('sigmoid')
def map_custom(self, node): def map_custom(self, node):
from .. import custom_layers from .. import custom_layers
return custom_layers.make_node(TensorFlowNode, node.kind, node) return custom_layers.make_node(PaddleNode, node.kind, node)
def map_inner_product(self, node): def map_inner_product(self, node):
#TODO: Axis #TODO: Axis
...@@ -142,24 +141,24 @@ class TensorFlowMapper(NodeMapper): ...@@ -142,24 +141,24 @@ class TensorFlowMapper(NodeMapper):
return MaybeActivated(node)('fc', node.parameters.num_output) return MaybeActivated(node)('fc', node.parameters.num_output)
def map_softmax(self, node): def map_softmax(self, node):
return TensorFlowNode('softmax') return PaddleNode('softmax')
def map_lrn(self, node): def map_lrn(self, node):
params = node.parameters params = node.parameters
# The window size must be an odd value. For a window # The window size must be an odd value. For a window
# size of (2*n+1), TensorFlow defines depth_radius = n. # size of (2*n+1), Paddle defines depth_radius = n.
assert params.local_size % 2 == 1 assert params.local_size % 2 == 1
# Caffe scales by (alpha/(2*n+1)), whereas TensorFlow # Caffe scales by (alpha/(2*n+1)), whereas Paddle
# just scales by alpha (as does Krizhevsky's paper). # just scales by alpha (as does Krizhevsky's paper).
# We'll account for that here. # We'll account for that here.
alpha = params.alpha / float(params.local_size) alpha = params.alpha / float(params.local_size)
return TensorFlowNode('lrn', params.local_size, alpha, params.beta) return PaddleNode('lrn', params.local_size, alpha, params.beta)
def map_concat(self, node): def map_concat(self, node):
return TensorFlowNode('concat', node.parameters.axis) return PaddleNode('concat', node.parameters.axis)
def map_dropout(self, node): def map_dropout(self, node):
return TensorFlowNode('dropout', node.parameters.dropout_ratio) return PaddleNode('dropout', node.parameters.dropout_ratio)
def map_batch_norm(self, node): def map_batch_norm(self, node):
scale_offset = len(node.data) == 4 scale_offset = len(node.data) == 4
...@@ -177,21 +176,20 @@ class TensorFlowMapper(NodeMapper): ...@@ -177,21 +176,20 @@ class TensorFlowMapper(NodeMapper):
operations = {0: 'multiply', 1: 'add', 2: 'max'} operations = {0: 'multiply', 1: 'add', 2: 'max'}
op_code = node.parameters.operation op_code = node.parameters.operation
try: try:
return TensorFlowNode(operations[op_code]) return PaddleNode(operations[op_code])
except KeyError: except KeyError:
raise KaffeError('Unknown elementwise operation: {}'.format( raise KaffeError('Unknown elementwise operation: {}'.format(
op_code)) op_code))
def map_scale(self, node): def map_scale(self, node):
params = node.parameters params = node.parameters
return TensorFlowNode( return PaddleNode('scale', axis=params.axis, num_axes=params.num_axes)
'scale', axis=params.axis, num_axes=params.num_axes)
def commit(self, chains): def commit(self, chains):
return chains return chains
class TensorFlowEmitter(object): class PaddleEmitter(object):
def __init__(self, tab=None): def __init__(self, tab=None):
self.tab = tab or ' ' * 4 self.tab = tab or ' ' * 4
self.prefix = '' self.prefix = ''
...@@ -309,7 +307,7 @@ class Transformer(object): ...@@ -309,7 +307,7 @@ class Transformer(object):
]), ]),
# Rename nodes # Rename nodes
# Slashes are used for scoping in TensorFlow. Replace slashes # Slashes are used for scoping in Paddle. Replace slashes
# in node names with underscores. # in node names with underscores.
# (Caffe's GoogLeNet implementation uses slashes) # (Caffe's GoogLeNet implementation uses slashes)
NodeRenamer(lambda node: node.name.replace('/', '_')) NodeRenamer(lambda node: node.name.replace('/', '_'))
...@@ -324,7 +322,7 @@ class Transformer(object): ...@@ -324,7 +322,7 @@ class Transformer(object):
def transform_data(self): def transform_data(self):
if self.params is None: if self.params is None:
transformers = [ transformers = [
# Reshape the parameters to TensorFlow's ordering # Reshape the parameters to Paddle's ordering
DataReshaper({ DataReshaper({
# (c_o, c_i) -> (c_i, c_o) # (c_o, c_i) -> (c_i, c_o)
NodeKind.InnerProduct: (1, 0) NodeKind.InnerProduct: (1, 0)
...@@ -345,9 +343,9 @@ class Transformer(object): ...@@ -345,9 +343,9 @@ class Transformer(object):
def transform_source(self): def transform_source(self):
if self.source is None: if self.source is None:
mapper = TensorFlowMapper(self.graph) mapper = PaddleMapper(self.graph)
chains = mapper.map() chains = mapper.map()
emitter = TensorFlowEmitter() emitter = PaddleEmitter()
input_nodes = self.graph.get_input_nodes() input_nodes = self.graph.get_input_nodes()
self.source = emitter.emit(self.graph.name, chains, input_nodes) self.source = emitter.emit(self.graph.name, chains, input_nodes)
return self.source return self.source
...@@ -58,19 +58,22 @@ def shape_scalar(node): ...@@ -58,19 +58,22 @@ def shape_scalar(node):
def shape_data(node): def shape_data(node):
if node.output_shape: if node.output_shape:
# Old-style input specification # Old-style input specification
return node.output_shape shape = node.output_shape
try: else:
# New-style input specification try:
return map(int, node.parameters.shape[0].dim) # New-style input specification
except: shape = map(int, node.parameters.shape[0].dim)
# We most likely have a data layer on our hands. The problem is, except:
# Caffe infers the dimensions of the data from the source (eg: LMDB). # We most likely have a data layer on our hands. The problem is,
# We want to avoid reading datasets here. Fail for now. # Caffe infers the dimensions of the data from the source (eg: LMDB).
# This can be temporarily fixed by transforming the data layer to # We want to avoid reading datasets here. Fail for now.
# Caffe's "input" layer (as is usually used in the "deploy" version). # This can be temporarily fixed by transforming the data layer to
# TODO: Find a better solution for this. # Caffe's "input" layer (as is usually used in the "deploy" version).
raise KaffeError('Cannot determine dimensions of data layer.\n' # TODO: Find a better solution for this.
'See comments in function shape_data for more info.') raise KaffeError(
'Cannot determine dimensions of data layer.\n'
'See comments in function shape_data for more info.')
return shape
def shape_mem_data(node): def shape_mem_data(node):
......
...@@ -11,14 +11,10 @@ if [[ -z $PROTOC ]];then ...@@ -11,14 +11,10 @@ if [[ -z $PROTOC ]];then
fi fi
WORK_ROOT=$(dirname `readlink -f "$BASH_SOURCE[0]"`) WORK_ROOT=$(dirname `readlink -f "$BASH_SOURCE[0]"`)
PY_NAME="$WORK_ROOT/caffepb.py" PY_NAME="$WORK_ROOT/caffe_pb2.py"
$PROTOC --proto_path=$WORK_ROOT --python_out=$WORK_ROOT $WORK_ROOT/caffe.proto $PROTOC --proto_path=$WORK_ROOT --python_out=$WORK_ROOT $WORK_ROOT/caffe.proto
ret=$? ret=$?
if [ $ret -eq 0 ];then
mv $WORK_ROOT/caffe_pb2.py $PY_NAME
fi
if [ -e "$PY_NAME" ];then if [ -e "$PY_NAME" ];then
echo "succeed to generate [$PY_NAME]" echo "succeed to generate [$PY_NAME]"
exit 0 exit 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册