提交 71f3172c 编写于 作者: W walloollaw 提交者: qingqing01

caffe2fluid: support intermediate layer precision compare (#965)

上级 f93838a4
......@@ -20,8 +20,8 @@ def calc_diff(f1, f2):
d1 = np.load(f1)
d2 = np.load(f2)
print d1.shape
print d2.shape
#print d1.shape
#print d2.shape
#print d1[0, 0, 0:10, 0:10]
#print d2[0, 0, 0:10, 0:10]
#d1 = d1[:, :, 1:-2, 1:-2]
......
......@@ -78,6 +78,54 @@ def dump_results(results, names, root):
np.save(filename + '.npy', res)
def normalize_name(name_map):
return {
k.replace('/', '_'): v.replace('/', '_')
for k, v in name_map.items()
}
def rename_layer_name(names, net):
""" because the names of output layers from caffe maybe changed for 'INPLACE' operation,
and paddle's layers maybe fused, so we need to re-mapping their relationship for comparing
"""
#build a mapping from paddle's name to caffe's name
trace = getattr(net, 'name_trace', None)
cf_trace = trace['caffe']
real2cf = normalize_name(cf_trace['real2chg'])
pd_trace = trace['paddle']
pd2real = normalize_name(pd_trace['chg2real'])
pd_deleted = normalize_name(pd_trace['deleted'])
pd2cf_name = {}
for pd_name, real_name in pd2real.items():
if real_name in real2cf:
pd2cf_name[pd_name] = '%s.%s.%s.both_changed' \
% (real2cf[real_name], real_name, pd_name)
else:
pd2cf_name[pd_name] = '%s.%s.pd_changed' % (real_name, pd_name)
for pd_name, trace in pd_deleted.items():
assert pd_name not in pd2cf_name, "this name[%s] has already exist" % (
pd_name)
pd2cf_name[pd_name] = '%s.pd_deleted' % (pd_name)
for real_name, cf_name in real2cf.items():
if cf_name not in pd2cf_name:
pd2cf_name[cf_name] = '%s.cf_deleted' % (cf_name)
if real_name not in pd2cf_name:
pd2cf_name[real_name] = '%s.%s.cf_changed' % (cf_name, real_name)
ret = []
for name in names:
new_name = pd2cf_name[name] if name in pd2cf_name else name
print('remap paddle name[%s] to output name[%s]' % (name, new_name))
ret.append(new_name)
return ret
def load_model(exe, place, net_file, net_name, net_weight, debug):
""" load model using xxxnet.py and xxxnet.npy
"""
......@@ -117,7 +165,8 @@ def load_model(exe, place, net_file, net_name, net_weight, debug):
'feed_names': feed_names,
'fetch_vars': fetch_list_var,
'fetch_names': fetch_list_name,
'feed_shapes': feed_shapes
'feed_shapes': feed_shapes,
'net': net
}
......@@ -171,6 +220,7 @@ def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
fetch_targets = ret['fetch_vars']
fetch_list_name = ret['fetch_names']
feed_shapes = ret['feed_shapes']
net = ret['net']
input_name = feed_names[0]
input_shape = feed_shapes[0]
......@@ -182,7 +232,8 @@ def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
if debug is True:
dump_path = 'results.paddle'
dump_results(results, fetch_list_name, dump_path)
dump_names = rename_layer_name(fetch_list_name, net)
dump_results(results, dump_names, dump_path)
print('all result of layers dumped to [%s]' % (dump_path))
else:
result = results[0]
......
......@@ -19,4 +19,6 @@ if [[ $# -eq 3 ]];then
else
caffe_file="./results/${model_name}.caffe/${2}.npy"
fi
python ./compare.py $paddle_file $caffe_file
cmd="python ./compare.py $paddle_file $caffe_file"
echo $cmd
eval $cmd
......@@ -3,7 +3,7 @@
#function:
# a tool used to compare all layers' results
#
#set -x
if [[ $# -ne 1 ]];then
echo "usage:"
echo " bash $0 [model_name]"
......@@ -13,11 +13,20 @@ fi
model_name=$1
prototxt="models.caffe/$model_name/${model_name}.prototxt"
layers=$(cat $prototxt | perl -ne 'if(/^\s+name\s*:\s*\"([^\"]+)/){print $1."\n";}')
cat $prototxt | grep name | perl -ne 'if(/^\s*name\s*:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
final_layer=$(cat $prototxt | perl -ne 'if(/^\s*top\s*:\s+\"([^\"]+)/){ print $1."\n";}' | tail -n1)
ret=$(grep "^$final_layer$" .layer_names | wc -l)
if [[ $ret -eq 0 ]];then
echo $final_layer >>.layer_names
fi
for i in $layers;do
for i in $(cat .layer_names);do
i=${i//\//_}
cf_npy="results/${model_name}.caffe/${i}.npy"
pd_npy="results/${model_name}.paddle/${i}.npy"
#pd_npy="results/${model_name}.paddle/${i}.npy"
#pd_npy=$(find results/${model_name}.paddle -iname "${i}*.npy" | head -n1)
pd_npy=$(find results/${model_name}.paddle -iname "${i}.*npy" | grep deleted -v | head -n1)
if [[ ! -e $cf_npy ]];then
echo "caffe's result not exist[$cf_npy]"
......
......@@ -29,8 +29,8 @@ fi
mkdir -p $results_root
model_prototxt="models.caffe/$model_name/${model_name}.prototxt"
model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
prototxt="models.caffe/$model_name/${model_name}.prototxt"
caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
#1, dump layers' results from paddle
paddle_results="$results_root/${model_name}.paddle"
......@@ -51,7 +51,7 @@ PYTHON=`which cfpython`
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
$PYTHON ./infer.py caffe $model_prototxt $model_caffemodel $paddle_results/data.npy
$PYTHON ./infer.py caffe $prototxt $caffemodel $paddle_results/data.npy
if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then
echo "not found caffe's results, maybe failed to do inference with caffe"
exit 1
......@@ -59,10 +59,25 @@ fi
mv results.caffe $caffe_results
#3, extract layer names
cat $model_prototxt | grep name | perl -ne 'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
cat $prototxt | grep name | perl -ne 'if(/^\s*name\s*:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
final_layer=$(cat $prototxt | perl -ne 'if(/^\s*top\s*:\s+\"([^\"]+)/){ print $1."\n";}' | tail -n1)
ret=$(grep "^$final_layer$" .layer_names | wc -l)
if [[ $ret -eq 0 ]];then
echo $final_layer >>.layer_names
fi
#4, compare one by one
for i in $(cat ".layer_names" | tail -n1);do
#for i in $(cat .layer_names);do
for i in $(cat .layer_names | tail -n1);do
i=${i//\//_}
echo "process $i"
$PYTHON compare.py $caffe_results/${i}.npy $paddle_results/${i}.npy
pd_npy=$(find $paddle_results/ -iname "${i}.*npy" | grep deleted -v | head -n1)
#pd_npy="$paddle_results/${i}.npy"
if [[ -f $pd_npy ]];then
$PYTHON compare.py $caffe_results/${i}.npy $pd_npy
else
echo "not found npy file[${i}.*npy] for layer[$i]"
exit 1
fi
done
......@@ -71,7 +71,9 @@ if [[ -z $only_convert ]];then
if [[ -z $net_name ]];then
net_name="MyNet"
fi
$PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name
cmd="$PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name"
echo $cmd
eval $cmd
ret=$?
fi
exit $ret
#!/bin/bash
#
#script to test all models
#
models="alexnet vgg16 googlenet resnet152 resnet101 resnet50"
for i in $models;do
echo "begin to process $i"
bash ./tools/diff.sh $i 2>&1
echo "finished to process $i with ret[$?]"
done
......@@ -58,11 +58,13 @@ def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1):
if axis < 0:
axis += len(input.shape)
topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
if out_max_val is True:
topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype)
output = fluid.layers.concat([index_var, topk_var], axis=axis)
output = fluid.layers.concat(
[index_var, topk_var], axis=axis, name=name)
else:
topk_var, index_var = fluid.layers.topk(input=input, k=top_k, name=name)
output = index_var
return output
......
......@@ -43,7 +43,7 @@ def axpy_layer(inputs, name):
x = inputs[1]
y = inputs[2]
output = fluid.layers.elementwise_mul(x, alpha, axis=0)
output = fluid.layers.elementwise_add(output, y)
output = fluid.layers.elementwise_add(output, y, name=name)
return output
......
......@@ -63,9 +63,10 @@ class Node(object):
class Graph(object):
def __init__(self, nodes=None, name=None):
def __init__(self, nodes=None, name=None, trace={}):
self.nodes = nodes or []
self.node_lut = {node.name: node for node in self.nodes}
self.output_trace = trace
if name is None or name == '':
self.name = 'MyNet'
else:
......@@ -81,6 +82,15 @@ class Graph(object):
except KeyError:
raise KaffeError('Layer not found: %s' % name)
def add_name_trace(self, trace, which='caffe'):
self.output_trace[which] = trace
def get_name_trace(self, which=None):
if which is not None:
return self.output_trace[which]
else:
return self.output_trace
def get_input_nodes(self):
return [node for node in self.nodes if len(node.parents) == 0]
......@@ -116,7 +126,7 @@ class Graph(object):
*NodeKind.compute_output_shape(node))
def replaced(self, new_nodes):
return Graph(nodes=new_nodes, name=self.name)
return Graph(nodes=new_nodes, name=self.name, trace=self.output_trace)
def transformed(self, transformers):
graph = self
......@@ -262,6 +272,7 @@ class GraphBuilder(object):
# The current implementation only supports single-output nodes (note that a node can still
# have multiple children, since multiple child nodes can refer to the single top's name).
node_outputs = {}
output_trace = {}
for layer in layers:
node = graph.get_node(layer.name)
for input_name in layer.bottom:
......@@ -291,7 +302,26 @@ class GraphBuilder(object):
#
# For both cases, future references to this top re-routes to this node.
node_outputs[output_name] = node
if output_name in output_trace:
output_trace[output_name].append(node.name)
else:
output_trace[output_name] = [output_name, node.name]
#build a mapping from real-name to changed-name(for caffe's INPLACE inference)
real2chg = {}
deleted = {}
for k, v in output_trace.items():
real2chg[v[-1]] = k
for n in v:
if n in real2chg:
continue
if n not in deleted:
deleted[n] = '%s.%s' % (k, v[-1])
graph.add_name_trace({
'real2chg': real2chg,
'deleted': deleted
}, 'caffe')
graph.compute_output_shapes()
return graph
......
......@@ -216,7 +216,7 @@ class LayerAdapter(object):
s_w = self.get_kernel_value(
params.stride_w, params.stride, 1, default=1)
p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0)
p_w = self.get_kernel_value(params.pad_w, params.pad, 1, default=0)
return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
......
......@@ -47,6 +47,8 @@ class Network(object):
self.trainable = trainable
# Switch variable for dropout
self.paddle_env = None
self.output_names = []
self.name_trace = None
self.setup()
def setup(self):
......@@ -79,6 +81,10 @@ class Network(object):
data_dict = np.load(data_path).item()
for op_name in data_dict:
if op_name == 'caffe2fluid_name_trace':
self.name_trace = data_dict[op_name]
continue
layer = self.layers[op_name]
for param_name, data in data_dict[op_name].iteritems():
try:
......@@ -117,6 +123,15 @@ class Network(object):
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, ident)
def get_unique_output_name(self, prefix, layertype):
'''Returns an index-suffixed unique name for the given prefix.
This is used for auto-generating layer names based on the type-prefix.
'''
ident = sum(t.startswith(prefix) for t in self.output_names) + 1
unique_name = '%s.%s.output.%d' % (prefix, layertype, ident)
self.output_names.append(unique_name)
return unique_name
@layer
def conv(self,
input,
......@@ -152,6 +167,7 @@ class Network(object):
act = None
output = fluid.layers.conv2d(
name=self.get_unique_output_name(name, 'conv2d'),
input=input,
filter_size=[k_h, k_w],
num_filters=c_o,
......@@ -170,7 +186,8 @@ class Network(object):
@layer
def relu(self, input, name):
fluid = import_fluid()
output = fluid.layers.relu(x=input)
output = fluid.layers.relu(
name=self.get_unique_output_name(name, 'relu'), x=input)
return output
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
......@@ -182,6 +199,7 @@ class Network(object):
fluid = import_fluid()
output = fluid.layers.pool2d(
name=name,
input=input,
pool_size=k_hw,
pool_stride=s_hw,
......@@ -200,8 +218,16 @@ class Network(object):
ceil_mode,
padding=[0, 0],
name=None):
return self.pool('max', input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name)
return self.pool(
'max',
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding,
name=self.get_unique_output_name(name, 'max_pool'))
@layer
def avg_pool(self,
......@@ -213,25 +239,41 @@ class Network(object):
ceil_mode,
padding=[0, 0],
name=None):
return self.pool('avg', input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name)
return self.pool(
'avg',
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding,
name=self.get_unique_output_name(name, 'avg_pool'))
@layer
def sigmoid(self, input, name):
fluid = import_fluid()
return fluid.layers.sigmoid(input)
return fluid.layers.sigmoid(
input, name=self.get_unique_output_name(name, 'sigmoid'))
@layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0):
fluid = import_fluid()
output = fluid.layers.lrn(input=input, \
n=radius, k=bias, alpha=alpha, beta=beta, name=name)
output = fluid.layers.lrn(input=input,
n=radius,
k=bias,
alpha=alpha,
beta=beta,
name=self.get_unique_output_name(name, 'lrn'))
return output
@layer
def concat(self, inputs, axis, name):
fluid = import_fluid()
output = fluid.layers.concat(input=inputs, axis=axis)
output = fluid.layers.concat(
input=inputs,
axis=axis,
name=self.get_unique_output_name(name, 'concat'))
return output
@layer
......@@ -239,7 +281,8 @@ class Network(object):
fluid = import_fluid()
output = inputs[0]
for i in inputs[1:]:
output = fluid.layers.elementwise_add(x=output, y=i)
output = fluid.layers.elementwise_add(
x=output, y=i, name=self.get_unique_output_name(name, 'add'))
return output
@layer
......@@ -251,7 +294,7 @@ class Network(object):
prefix = name + '_'
output = fluid.layers.fc(
name=name,
name=self.get_unique_output_name(name, 'fc'),
input=input,
size=num_out,
act=act,
......@@ -269,7 +312,8 @@ class Network(object):
str(shape))
input = fluid.layers.reshape(input, shape[0:2])
output = fluid.layers.softmax(input)
output = fluid.layers.softmax(
input, name=self.get_unique_output_name(name, 'softmax'))
return output
@layer
......@@ -289,7 +333,7 @@ class Network(object):
mean_name = prefix + 'mean'
variance_name = prefix + 'variance'
output = fluid.layers.batch_norm(
name=name,
name=self.get_unique_output_name(name, 'batch_norm'),
input=input,
is_test=True,
param_attr=param_attr,
......@@ -308,7 +352,10 @@ class Network(object):
output = input
else:
output = fluid.layers.dropout(
input, dropout_prob=drop_prob, is_test=is_test)
input,
dropout_prob=drop_prob,
is_test=is_test,
name=self.get_unique_output_name(name, 'dropout'))
return output
@layer
......@@ -328,8 +375,16 @@ class Network(object):
offset_param = fluid.layers.create_parameter(
shape=scale_shape, dtype=input.dtype, name=name, attr=offset_attr)
output = fluid.layers.elementwise_mul(input, scale_param, axis=axis)
output = fluid.layers.elementwise_add(output, offset_param, axis=axis)
output = fluid.layers.elementwise_mul(
input,
scale_param,
axis=axis,
name=self.get_unique_output_name(name, 'scale_mul'))
output = fluid.layers.elementwise_add(
output,
offset_param,
axis=axis,
name=self.get_unique_output_name(name, 'scale_add'))
return output
def custom_layer_factory(self):
......@@ -342,5 +397,6 @@ class Network(object):
def custom_layer(self, inputs, kind, name, *args, **kwargs):
""" make custom layer
"""
name = self.get_unique_output_name(name, kind)
layer_factory = self.custom_layer_factory()
return layer_factory(kind, inputs, name, *args, **kwargs)
......@@ -3,9 +3,9 @@ import numpy as np
from ..errors import KaffeError, print_stderr
from ..graph import GraphBuilder, NodeMapper
from ..layers import NodeKind
from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser,
BatchNormScaleBiasFuser, BatchNormPreprocessor,
ParameterNamer)
from ..transformers import (DataInjector, DataReshaper, NodeRenamer,
SubNodeFuser, ReLUFuser, BatchNormScaleBiasFuser,
BatchNormPreprocessor, ParameterNamer)
from . import network
......@@ -18,7 +18,7 @@ def get_padding_type(kernel_params, input_shape, output_shape):
https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
'''
k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
if p_h * p_w > 0:
if p_h > 0 or p_w > 0:
return [p_h, p_w]
else:
return None
......@@ -315,6 +315,23 @@ class Transformer(object):
self.graph = graph.transformed(transformers)
#for the purpose of recording name mapping because of fused nodes
trace = SubNodeFuser.traced_names()
chg2real = {}
deleted = {}
for k, v in trace.items():
chg2real[k] = v[-1] #mapping from changed-name to real-name
for n in v:
if n in chg2real:
continue
if n not in deleted:
deleted[n] = '%s.%s' % (k, v[-1])
self.graph.add_name_trace({
'chg2real': chg2real,
'deleted': deleted
}, 'paddle')
# Display the graph
if self.verbose:
print_stderr(self.graph)
......@@ -339,6 +356,8 @@ class Transformer(object):
node.name: node.data
for node in self.graph.nodes if node.data
}
self.params['caffe2fluid_name_trace'] = self.graph.get_name_trace()
return self.params
def transform_source(self):
......
......@@ -181,6 +181,20 @@ class SubNodeFuser(object):
'''
An abstract helper for merging a single-child with its single-parent.
'''
_traced_names = {}
@classmethod
def traced_names(cls):
return cls._traced_names
@classmethod
def trace(cls, fname, tname):
""" recording the names mapping,
the value of 'fname' will be replaced by value of 'tname'
"""
if fname not in cls._traced_names:
cls._traced_names[fname] = []
cls._traced_names[fname].append(tname)
def __call__(self, graph):
nodes = graph.nodes
......@@ -234,6 +248,7 @@ class ReLUFuser(SubNodeFuser):
child.kind == NodeKind.ReLU)
def merge(self, parent, child):
SubNodeFuser.trace(parent.name, child.name)
parent.metadata['relu'] = True
parent.metadata['relu_negative_slope'] = child.parameters.negative_slope
......@@ -255,6 +270,7 @@ class BatchNormScaleBiasFuser(SubNodeFuser):
child.parameters.bias_term == True)
def merge(self, parent, child):
SubNodeFuser.trace(parent.name, child.name)
parent.scale_bias_node = child
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册