提交 d41c1571 编写于 作者: W walloollaw 提交者: qingqing01

caffe2fluid: fix bug in softmax; (#918)

support multiple data layer;
fix bug about caffe_pb2;
上级 278969b6
......@@ -9,8 +9,8 @@ def import_caffepb():
p = os.path.dirname(p)
p = os.path.join(p, '../../proto')
sys.path.insert(0, p)
import caffepb
return caffepb
import caffe_pb2
return caffe_pb2
class CaffeResolver(object):
......
......@@ -60,16 +60,16 @@ def compute_output_shape(kind, node):
def make_node(template, kind, node):
""" make a TensorFlowNode for custom layer which means construct
""" make a PaddleNode for custom layer which means construct
a piece of code to define a layer implemented in 'custom_layers'
Args:
@template (TensorFlowNode): a factory to new a instance of TensorFLowNode
@template (PaddleNode): a factory to new a instance of PaddleNode
@kind (str): type of custom layer
@node (graph.Node): a layer in the net
Returns:
instance of TensorFlowNode
instance of PaddleNode
"""
assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
kind)
......
......@@ -216,15 +216,25 @@ class GraphBuilder(object):
Newer models use the "Input layer" type.
'''
nodes = [Node(name, NodeKind.Data) for name in self.params.input]
if len(nodes):
input_dim = map(int, self.params.input_dim)
if not input_dim:
if len(self.params.input_shape) > 0:
input_dim = map(int, self.params.input_shape[0].dim)
else:
raise KaffeError('Dimensions for input not specified.')
for node in nodes:
node.output_shape = tuple(input_dim)
inputs_num = len(nodes)
if inputs_num > 0:
input_dims_num = len(self.params.input_dim)
if input_dims_num > 0 and input_dims_num != inputs_num * 4:
raise KaffeError('invalid input_dim[%d] param in prototxt' %
(input_dims_num))
input_dims = [[]] * inputs_num
for i in range(input_dims_num):
dim = self.params.input_dim[i]
which = int(i / 4)
input_dims[which].append(int(dim))
for i in range(inputs_num):
if len(self.params.input_shape) == inputs_num:
input_dim = map(int, self.params.input_shape[i].dim)
input_dims[i] = input_dim
nodes[i].output_shape = tuple(input_dims[i])
return nodes
def build(self):
......
......@@ -5,7 +5,7 @@
class MyNet(object):
### automatically generated by caffe2fluid ###
inputs_info = "INPUTS_INFO"
custom_layers_path = "CAFFE2FLUID_CUSTOM_LAYERS"
custom_layers_path = "_CAFFE2FLUID_CUSTOM_LAYERS_"
def custom_layer_factory(self):
import os
......@@ -55,22 +55,30 @@ class MyNet(object):
exe.run(fluid.default_startup_program())
net.load(data_path=npy_model, exe=exe, place=place)
output_vars = []
model_filename = 'model'
params_filename = 'params'
if outputs is None:
output_vars.append(net.get_output())
else:
if type(outputs) is list:
for n in outputs:
assert n in net.layers, 'not found layer with this name[%s]' % (
n)
output_vars.append(net.layers[n])
if outputs[0] == 'dump_all':
model_filename = None
params_filename = None
output_vars.append(net.get_output())
else:
if type(outputs) is list:
for n in outputs:
assert n in net.layers, 'not found layer with this name[%s]' % (
n)
output_vars.append(net.layers[n])
fluid.io.save_inference_model(
fluid_path, [input_name],
output_vars,
exe,
main_program=None,
model_filename='model',
params_filename='params')
model_filename=model_filename,
params_filename=model_filename)
return 0
......@@ -125,7 +133,8 @@ def generate_net_code(net_name, inputs_info):
net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
net_codes = net_codes.replace('CAFFE2FLUID_CUSTOM_LAYERS', custom_layer_dir)
net_codes = net_codes.replace('_CAFFE2FLUID_CUSTOM_LAYERS_',
custom_layer_dir)
return net_codes
......
......@@ -262,6 +262,13 @@ class Network(object):
@layer
def softmax(self, input, name):
fluid = import_fluid()
shape = input.shape
if len(shape) > 2:
for sz in shape[2:]:
assert sz == 1, "invalid input shape[%s] for softmax" % (
str(shape))
input = fluid.layers.reshape(input, shape[0:2])
output = fluid.layers.softmax(input)
return output
......
......@@ -11,9 +11,9 @@ from . import network
def get_padding_type(kernel_params, input_shape, output_shape):
'''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
Caffe supports arbitrary padding values, while TensorFlow only
Caffe supports arbitrary padding values, while Paddle only
supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
can be translated to TensorFlow. There are some subtleties to
can be translated to Paddle. There are some subtleties to
how the padding edge-cases are handled. These are described here:
https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
'''
......@@ -24,11 +24,11 @@ def get_padding_type(kernel_params, input_shape, output_shape):
return None
class TensorFlowNode(object):
'''An intermediate representation for TensorFlow operations.'''
class PaddleNode(object):
'''An intermediate representation for Paddle operations.'''
def __init__(self, op, *args, **kwargs):
# A string corresponding to the TensorFlow operation
# A string corresponding to the Paddle operation
self.op = op
# Positional arguments for the operation
self.args = args
......@@ -71,10 +71,10 @@ class MaybeActivated(object):
def __call__(self, *args, **kwargs):
kwargs.update(self.inject_kwargs)
return TensorFlowNode(*args, **kwargs)
return PaddleNode(*args, **kwargs)
class TensorFlowMapper(NodeMapper):
class PaddleMapper(NodeMapper):
def get_kernel_params(self, node):
kernel_params = node.layer.kernel_parameters
input_shape = node.get_only_parent().output_shape
......@@ -102,7 +102,7 @@ class TensorFlowMapper(NodeMapper):
kernel_params.stride_h, kernel_params.stride_w, **kwargs)
def map_relu(self, node):
return TensorFlowNode('relu')
return PaddleNode('relu')
def map_pooling(self, node):
pool_type = node.parameters.pool
......@@ -118,21 +118,20 @@ class TensorFlowMapper(NodeMapper):
global_pool = getattr(node.layer.parameters, 'global_pooling', False)
if global_pool:
input_shape = node.get_only_parent().output_shape
return TensorFlowNode(pool_op, input_shape.height,
input_shape.width, 1, 1, ceil_mode)
return PaddleNode(pool_op, input_shape.height, input_shape.width, 1,
1, ceil_mode)
else:
(kernel_params, padding) = self.get_kernel_params(node)
return TensorFlowNode(pool_op, kernel_params.kernel_h,
kernel_params.kernel_w,
kernel_params.stride_h,
kernel_params.stride_w, ceil_mode, **padding)
return PaddleNode(pool_op, kernel_params.kernel_h,
kernel_params.kernel_w, kernel_params.stride_h,
kernel_params.stride_w, ceil_mode, **padding)
def map_sigmoid(self, node):
return TensorFlowNode('sigmoid')
return PaddleNode('sigmoid')
def map_custom(self, node):
from .. import custom_layers
return custom_layers.make_node(TensorFlowNode, node.kind, node)
return custom_layers.make_node(PaddleNode, node.kind, node)
def map_inner_product(self, node):
#TODO: Axis
......@@ -142,24 +141,24 @@ class TensorFlowMapper(NodeMapper):
return MaybeActivated(node)('fc', node.parameters.num_output)
def map_softmax(self, node):
return TensorFlowNode('softmax')
return PaddleNode('softmax')
def map_lrn(self, node):
params = node.parameters
# The window size must be an odd value. For a window
# size of (2*n+1), TensorFlow defines depth_radius = n.
# size of (2*n+1), Paddle defines depth_radius = n.
assert params.local_size % 2 == 1
# Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
# Caffe scales by (alpha/(2*n+1)), whereas Paddle
# just scales by alpha (as does Krizhevsky's paper).
# We'll account for that here.
alpha = params.alpha / float(params.local_size)
return TensorFlowNode('lrn', params.local_size, alpha, params.beta)
return PaddleNode('lrn', params.local_size, alpha, params.beta)
def map_concat(self, node):
return TensorFlowNode('concat', node.parameters.axis)
return PaddleNode('concat', node.parameters.axis)
def map_dropout(self, node):
return TensorFlowNode('dropout', node.parameters.dropout_ratio)
return PaddleNode('dropout', node.parameters.dropout_ratio)
def map_batch_norm(self, node):
scale_offset = len(node.data) == 4
......@@ -177,21 +176,20 @@ class TensorFlowMapper(NodeMapper):
operations = {0: 'multiply', 1: 'add', 2: 'max'}
op_code = node.parameters.operation
try:
return TensorFlowNode(operations[op_code])
return PaddleNode(operations[op_code])
except KeyError:
raise KaffeError('Unknown elementwise operation: {}'.format(
op_code))
def map_scale(self, node):
params = node.parameters
return TensorFlowNode(
'scale', axis=params.axis, num_axes=params.num_axes)
return PaddleNode('scale', axis=params.axis, num_axes=params.num_axes)
def commit(self, chains):
return chains
class TensorFlowEmitter(object):
class PaddleEmitter(object):
def __init__(self, tab=None):
self.tab = tab or ' ' * 4
self.prefix = ''
......@@ -309,7 +307,7 @@ class Transformer(object):
]),
# Rename nodes
# Slashes are used for scoping in TensorFlow. Replace slashes
# Slashes are used for scoping in Paddle. Replace slashes
# in node names with underscores.
# (Caffe's GoogLeNet implementation uses slashes)
NodeRenamer(lambda node: node.name.replace('/', '_'))
......@@ -324,7 +322,7 @@ class Transformer(object):
def transform_data(self):
if self.params is None:
transformers = [
# Reshape the parameters to TensorFlow's ordering
# Reshape the parameters to Paddle's ordering
DataReshaper({
# (c_o, c_i) -> (c_i, c_o)
NodeKind.InnerProduct: (1, 0)
......@@ -345,9 +343,9 @@ class Transformer(object):
def transform_source(self):
if self.source is None:
mapper = TensorFlowMapper(self.graph)
mapper = PaddleMapper(self.graph)
chains = mapper.map()
emitter = TensorFlowEmitter()
emitter = PaddleEmitter()
input_nodes = self.graph.get_input_nodes()
self.source = emitter.emit(self.graph.name, chains, input_nodes)
return self.source
......@@ -58,19 +58,22 @@ def shape_scalar(node):
def shape_data(node):
if node.output_shape:
# Old-style input specification
return node.output_shape
try:
# New-style input specification
return map(int, node.parameters.shape[0].dim)
except:
# We most likely have a data layer on our hands. The problem is,
# Caffe infers the dimensions of the data from the source (eg: LMDB).
# We want to avoid reading datasets here. Fail for now.
# This can be temporarily fixed by transforming the data layer to
# Caffe's "input" layer (as is usually used in the "deploy" version).
# TODO: Find a better solution for this.
raise KaffeError('Cannot determine dimensions of data layer.\n'
'See comments in function shape_data for more info.')
shape = node.output_shape
else:
try:
# New-style input specification
shape = map(int, node.parameters.shape[0].dim)
except:
# We most likely have a data layer on our hands. The problem is,
# Caffe infers the dimensions of the data from the source (eg: LMDB).
# We want to avoid reading datasets here. Fail for now.
# This can be temporarily fixed by transforming the data layer to
# Caffe's "input" layer (as is usually used in the "deploy" version).
# TODO: Find a better solution for this.
raise KaffeError(
'Cannot determine dimensions of data layer.\n'
'See comments in function shape_data for more info.')
return shape
def shape_mem_data(node):
......
......@@ -11,14 +11,10 @@ if [[ -z $PROTOC ]];then
fi
WORK_ROOT=$(dirname `readlink -f "$BASH_SOURCE[0]"`)
PY_NAME="$WORK_ROOT/caffepb.py"
PY_NAME="$WORK_ROOT/caffe_pb2.py"
$PROTOC --proto_path=$WORK_ROOT --python_out=$WORK_ROOT $WORK_ROOT/caffe.proto
ret=$?
if [ $ret -eq 0 ];then
mv $WORK_ROOT/caffe_pb2.py $PY_NAME
fi
if [ -e "$PY_NAME" ];then
echo "succeed to generate [$PY_NAME]"
exit 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册