提交 71f3172c 编写于 作者: W walloollaw 提交者: qingqing01

caffe2fluid: support intermediate layer precision compare (#965)

上级 f93838a4
...@@ -20,8 +20,8 @@ def calc_diff(f1, f2): ...@@ -20,8 +20,8 @@ def calc_diff(f1, f2):
d1 = np.load(f1) d1 = np.load(f1)
d2 = np.load(f2) d2 = np.load(f2)
print d1.shape #print d1.shape
print d2.shape #print d2.shape
#print d1[0, 0, 0:10, 0:10] #print d1[0, 0, 0:10, 0:10]
#print d2[0, 0, 0:10, 0:10] #print d2[0, 0, 0:10, 0:10]
#d1 = d1[:, :, 1:-2, 1:-2] #d1 = d1[:, :, 1:-2, 1:-2]
......
...@@ -78,6 +78,54 @@ def dump_results(results, names, root): ...@@ -78,6 +78,54 @@ def dump_results(results, names, root):
np.save(filename + '.npy', res) np.save(filename + '.npy', res)
def normalize_name(name_map):
return {
k.replace('/', '_'): v.replace('/', '_')
for k, v in name_map.items()
}
def rename_layer_name(names, net):
""" because the names of output layers from caffe maybe changed for 'INPLACE' operation,
and paddle's layers maybe fused, so we need to re-mapping their relationship for comparing
"""
#build a mapping from paddle's name to caffe's name
trace = getattr(net, 'name_trace', None)
cf_trace = trace['caffe']
real2cf = normalize_name(cf_trace['real2chg'])
pd_trace = trace['paddle']
pd2real = normalize_name(pd_trace['chg2real'])
pd_deleted = normalize_name(pd_trace['deleted'])
pd2cf_name = {}
for pd_name, real_name in pd2real.items():
if real_name in real2cf:
pd2cf_name[pd_name] = '%s.%s.%s.both_changed' \
% (real2cf[real_name], real_name, pd_name)
else:
pd2cf_name[pd_name] = '%s.%s.pd_changed' % (real_name, pd_name)
for pd_name, trace in pd_deleted.items():
assert pd_name not in pd2cf_name, "this name[%s] has already exist" % (
pd_name)
pd2cf_name[pd_name] = '%s.pd_deleted' % (pd_name)
for real_name, cf_name in real2cf.items():
if cf_name not in pd2cf_name:
pd2cf_name[cf_name] = '%s.cf_deleted' % (cf_name)
if real_name not in pd2cf_name:
pd2cf_name[real_name] = '%s.%s.cf_changed' % (cf_name, real_name)
ret = []
for name in names:
new_name = pd2cf_name[name] if name in pd2cf_name else name
print('remap paddle name[%s] to output name[%s]' % (name, new_name))
ret.append(new_name)
return ret
def load_model(exe, place, net_file, net_name, net_weight, debug): def load_model(exe, place, net_file, net_name, net_weight, debug):
""" load model using xxxnet.py and xxxnet.npy """ load model using xxxnet.py and xxxnet.npy
""" """
...@@ -117,7 +165,8 @@ def load_model(exe, place, net_file, net_name, net_weight, debug): ...@@ -117,7 +165,8 @@ def load_model(exe, place, net_file, net_name, net_weight, debug):
'feed_names': feed_names, 'feed_names': feed_names,
'fetch_vars': fetch_list_var, 'fetch_vars': fetch_list_var,
'fetch_names': fetch_list_name, 'fetch_names': fetch_list_name,
'feed_shapes': feed_shapes 'feed_shapes': feed_shapes,
'net': net
} }
...@@ -171,6 +220,7 @@ def infer(model_path, imgfile, net_file=None, net_name=None, debug=True): ...@@ -171,6 +220,7 @@ def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
fetch_targets = ret['fetch_vars'] fetch_targets = ret['fetch_vars']
fetch_list_name = ret['fetch_names'] fetch_list_name = ret['fetch_names']
feed_shapes = ret['feed_shapes'] feed_shapes = ret['feed_shapes']
net = ret['net']
input_name = feed_names[0] input_name = feed_names[0]
input_shape = feed_shapes[0] input_shape = feed_shapes[0]
...@@ -182,7 +232,8 @@ def infer(model_path, imgfile, net_file=None, net_name=None, debug=True): ...@@ -182,7 +232,8 @@ def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
if debug is True: if debug is True:
dump_path = 'results.paddle' dump_path = 'results.paddle'
dump_results(results, fetch_list_name, dump_path) dump_names = rename_layer_name(fetch_list_name, net)
dump_results(results, dump_names, dump_path)
print('all result of layers dumped to [%s]' % (dump_path)) print('all result of layers dumped to [%s]' % (dump_path))
else: else:
result = results[0] result = results[0]
......
...@@ -19,4 +19,6 @@ if [[ $# -eq 3 ]];then ...@@ -19,4 +19,6 @@ if [[ $# -eq 3 ]];then
else else
caffe_file="./results/${model_name}.caffe/${2}.npy" caffe_file="./results/${model_name}.caffe/${2}.npy"
fi fi
python ./compare.py $paddle_file $caffe_file cmd="python ./compare.py $paddle_file $caffe_file"
echo $cmd
eval $cmd
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#function: #function:
# a tool used to compare all layers' results # a tool used to compare all layers' results
# #
#set -x
if [[ $# -ne 1 ]];then if [[ $# -ne 1 ]];then
echo "usage:" echo "usage:"
echo " bash $0 [model_name]" echo " bash $0 [model_name]"
...@@ -13,11 +13,20 @@ fi ...@@ -13,11 +13,20 @@ fi
model_name=$1 model_name=$1
prototxt="models.caffe/$model_name/${model_name}.prototxt" prototxt="models.caffe/$model_name/${model_name}.prototxt"
layers=$(cat $prototxt | perl -ne 'if(/^\s+name\s*:\s*\"([^\"]+)/){print $1."\n";}') cat $prototxt | grep name | perl -ne 'if(/^\s*name\s*:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
final_layer=$(cat $prototxt | perl -ne 'if(/^\s*top\s*:\s+\"([^\"]+)/){ print $1."\n";}' | tail -n1)
ret=$(grep "^$final_layer$" .layer_names | wc -l)
if [[ $ret -eq 0 ]];then
echo $final_layer >>.layer_names
fi
for i in $layers;do for i in $(cat .layer_names);do
i=${i//\//_}
cf_npy="results/${model_name}.caffe/${i}.npy" cf_npy="results/${model_name}.caffe/${i}.npy"
pd_npy="results/${model_name}.paddle/${i}.npy" #pd_npy="results/${model_name}.paddle/${i}.npy"
#pd_npy=$(find results/${model_name}.paddle -iname "${i}*.npy" | head -n1)
pd_npy=$(find results/${model_name}.paddle -iname "${i}.*npy" | grep deleted -v | head -n1)
if [[ ! -e $cf_npy ]];then if [[ ! -e $cf_npy ]];then
echo "caffe's result not exist[$cf_npy]" echo "caffe's result not exist[$cf_npy]"
......
...@@ -29,8 +29,8 @@ fi ...@@ -29,8 +29,8 @@ fi
mkdir -p $results_root mkdir -p $results_root
model_prototxt="models.caffe/$model_name/${model_name}.prototxt" prototxt="models.caffe/$model_name/${model_name}.prototxt"
model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel" caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
#1, dump layers' results from paddle #1, dump layers' results from paddle
paddle_results="$results_root/${model_name}.paddle" paddle_results="$results_root/${model_name}.paddle"
...@@ -51,7 +51,7 @@ PYTHON=`which cfpython` ...@@ -51,7 +51,7 @@ PYTHON=`which cfpython`
if [[ -z $PYTHON ]];then if [[ -z $PYTHON ]];then
PYTHON=`which python` PYTHON=`which python`
fi fi
$PYTHON ./infer.py caffe $model_prototxt $model_caffemodel $paddle_results/data.npy $PYTHON ./infer.py caffe $prototxt $caffemodel $paddle_results/data.npy
if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then
echo "not found caffe's results, maybe failed to do inference with caffe" echo "not found caffe's results, maybe failed to do inference with caffe"
exit 1 exit 1
...@@ -59,10 +59,25 @@ fi ...@@ -59,10 +59,25 @@ fi
mv results.caffe $caffe_results mv results.caffe $caffe_results
#3, extract layer names #3, extract layer names
cat $model_prototxt | grep name | perl -ne 'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names cat $prototxt | grep name | perl -ne 'if(/^\s*name\s*:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
final_layer=$(cat $prototxt | perl -ne 'if(/^\s*top\s*:\s+\"([^\"]+)/){ print $1."\n";}' | tail -n1)
ret=$(grep "^$final_layer$" .layer_names | wc -l)
if [[ $ret -eq 0 ]];then
echo $final_layer >>.layer_names
fi
#4, compare one by one #4, compare one by one
for i in $(cat ".layer_names" | tail -n1);do #for i in $(cat .layer_names);do
for i in $(cat .layer_names | tail -n1);do
i=${i//\//_}
echo "process $i" echo "process $i"
$PYTHON compare.py $caffe_results/${i}.npy $paddle_results/${i}.npy pd_npy=$(find $paddle_results/ -iname "${i}.*npy" | grep deleted -v | head -n1)
#pd_npy="$paddle_results/${i}.npy"
if [[ -f $pd_npy ]];then
$PYTHON compare.py $caffe_results/${i}.npy $pd_npy
else
echo "not found npy file[${i}.*npy] for layer[$i]"
exit 1
fi
done done
...@@ -71,7 +71,9 @@ if [[ -z $only_convert ]];then ...@@ -71,7 +71,9 @@ if [[ -z $only_convert ]];then
if [[ -z $net_name ]];then if [[ -z $net_name ]];then
net_name="MyNet" net_name="MyNet"
fi fi
$PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name cmd="$PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name"
echo $cmd
eval $cmd
ret=$? ret=$?
fi fi
exit $ret exit $ret
#!/bin/bash
#
#script to test all models
#
models="alexnet vgg16 googlenet resnet152 resnet101 resnet50"
for i in $models;do
echo "begin to process $i"
bash ./tools/diff.sh $i 2>&1
echo "finished to process $i with ret[$?]"
done
...@@ -58,11 +58,13 @@ def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1): ...@@ -58,11 +58,13 @@ def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1):
if axis < 0: if axis < 0:
axis += len(input.shape) axis += len(input.shape)
topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
if out_max_val is True: if out_max_val is True:
topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype) index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype)
output = fluid.layers.concat([index_var, topk_var], axis=axis) output = fluid.layers.concat(
[index_var, topk_var], axis=axis, name=name)
else: else:
topk_var, index_var = fluid.layers.topk(input=input, k=top_k, name=name)
output = index_var output = index_var
return output return output
......
...@@ -43,7 +43,7 @@ def axpy_layer(inputs, name): ...@@ -43,7 +43,7 @@ def axpy_layer(inputs, name):
x = inputs[1] x = inputs[1]
y = inputs[2] y = inputs[2]
output = fluid.layers.elementwise_mul(x, alpha, axis=0) output = fluid.layers.elementwise_mul(x, alpha, axis=0)
output = fluid.layers.elementwise_add(output, y) output = fluid.layers.elementwise_add(output, y, name=name)
return output return output
......
...@@ -63,9 +63,10 @@ class Node(object): ...@@ -63,9 +63,10 @@ class Node(object):
class Graph(object): class Graph(object):
def __init__(self, nodes=None, name=None): def __init__(self, nodes=None, name=None, trace={}):
self.nodes = nodes or [] self.nodes = nodes or []
self.node_lut = {node.name: node for node in self.nodes} self.node_lut = {node.name: node for node in self.nodes}
self.output_trace = trace
if name is None or name == '': if name is None or name == '':
self.name = 'MyNet' self.name = 'MyNet'
else: else:
...@@ -81,6 +82,15 @@ class Graph(object): ...@@ -81,6 +82,15 @@ class Graph(object):
except KeyError: except KeyError:
raise KaffeError('Layer not found: %s' % name) raise KaffeError('Layer not found: %s' % name)
def add_name_trace(self, trace, which='caffe'):
self.output_trace[which] = trace
def get_name_trace(self, which=None):
if which is not None:
return self.output_trace[which]
else:
return self.output_trace
def get_input_nodes(self): def get_input_nodes(self):
return [node for node in self.nodes if len(node.parents) == 0] return [node for node in self.nodes if len(node.parents) == 0]
...@@ -116,7 +126,7 @@ class Graph(object): ...@@ -116,7 +126,7 @@ class Graph(object):
*NodeKind.compute_output_shape(node)) *NodeKind.compute_output_shape(node))
def replaced(self, new_nodes): def replaced(self, new_nodes):
return Graph(nodes=new_nodes, name=self.name) return Graph(nodes=new_nodes, name=self.name, trace=self.output_trace)
def transformed(self, transformers): def transformed(self, transformers):
graph = self graph = self
...@@ -262,6 +272,7 @@ class GraphBuilder(object): ...@@ -262,6 +272,7 @@ class GraphBuilder(object):
# The current implementation only supports single-output nodes (note that a node can still # The current implementation only supports single-output nodes (note that a node can still
# have multiple children, since multiple child nodes can refer to the single top's name). # have multiple children, since multiple child nodes can refer to the single top's name).
node_outputs = {} node_outputs = {}
output_trace = {}
for layer in layers: for layer in layers:
node = graph.get_node(layer.name) node = graph.get_node(layer.name)
for input_name in layer.bottom: for input_name in layer.bottom:
...@@ -291,7 +302,26 @@ class GraphBuilder(object): ...@@ -291,7 +302,26 @@ class GraphBuilder(object):
# #
# For both cases, future references to this top re-routes to this node. # For both cases, future references to this top re-routes to this node.
node_outputs[output_name] = node node_outputs[output_name] = node
if output_name in output_trace:
output_trace[output_name].append(node.name)
else:
output_trace[output_name] = [output_name, node.name]
#build a mapping from real-name to changed-name(for caffe's INPLACE inference)
real2chg = {}
deleted = {}
for k, v in output_trace.items():
real2chg[v[-1]] = k
for n in v:
if n in real2chg:
continue
if n not in deleted:
deleted[n] = '%s.%s' % (k, v[-1])
graph.add_name_trace({
'real2chg': real2chg,
'deleted': deleted
}, 'caffe')
graph.compute_output_shapes() graph.compute_output_shapes()
return graph return graph
......
...@@ -216,7 +216,7 @@ class LayerAdapter(object): ...@@ -216,7 +216,7 @@ class LayerAdapter(object):
s_w = self.get_kernel_value( s_w = self.get_kernel_value(
params.stride_w, params.stride, 1, default=1) params.stride_w, params.stride, 1, default=1)
p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0) p_w = self.get_kernel_value(params.pad_w, params.pad, 1, default=0)
return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w) return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
......
...@@ -47,6 +47,8 @@ class Network(object): ...@@ -47,6 +47,8 @@ class Network(object):
self.trainable = trainable self.trainable = trainable
# Switch variable for dropout # Switch variable for dropout
self.paddle_env = None self.paddle_env = None
self.output_names = []
self.name_trace = None
self.setup() self.setup()
def setup(self): def setup(self):
...@@ -79,6 +81,10 @@ class Network(object): ...@@ -79,6 +81,10 @@ class Network(object):
data_dict = np.load(data_path).item() data_dict = np.load(data_path).item()
for op_name in data_dict: for op_name in data_dict:
if op_name == 'caffe2fluid_name_trace':
self.name_trace = data_dict[op_name]
continue
layer = self.layers[op_name] layer = self.layers[op_name]
for param_name, data in data_dict[op_name].iteritems(): for param_name, data in data_dict[op_name].iteritems():
try: try:
...@@ -117,6 +123,15 @@ class Network(object): ...@@ -117,6 +123,15 @@ class Network(object):
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, ident) return '%s_%d' % (prefix, ident)
def get_unique_output_name(self, prefix, layertype):
'''Returns an index-suffixed unique name for the given prefix.
This is used for auto-generating layer names based on the type-prefix.
'''
ident = sum(t.startswith(prefix) for t in self.output_names) + 1
unique_name = '%s.%s.output.%d' % (prefix, layertype, ident)
self.output_names.append(unique_name)
return unique_name
@layer @layer
def conv(self, def conv(self,
input, input,
...@@ -152,6 +167,7 @@ class Network(object): ...@@ -152,6 +167,7 @@ class Network(object):
act = None act = None
output = fluid.layers.conv2d( output = fluid.layers.conv2d(
name=self.get_unique_output_name(name, 'conv2d'),
input=input, input=input,
filter_size=[k_h, k_w], filter_size=[k_h, k_w],
num_filters=c_o, num_filters=c_o,
...@@ -170,7 +186,8 @@ class Network(object): ...@@ -170,7 +186,8 @@ class Network(object):
@layer @layer
def relu(self, input, name): def relu(self, input, name):
fluid = import_fluid() fluid = import_fluid()
output = fluid.layers.relu(x=input) output = fluid.layers.relu(
name=self.get_unique_output_name(name, 'relu'), x=input)
return output return output
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding, def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
...@@ -182,6 +199,7 @@ class Network(object): ...@@ -182,6 +199,7 @@ class Network(object):
fluid = import_fluid() fluid = import_fluid()
output = fluid.layers.pool2d( output = fluid.layers.pool2d(
name=name,
input=input, input=input,
pool_size=k_hw, pool_size=k_hw,
pool_stride=s_hw, pool_stride=s_hw,
...@@ -200,8 +218,16 @@ class Network(object): ...@@ -200,8 +218,16 @@ class Network(object):
ceil_mode, ceil_mode,
padding=[0, 0], padding=[0, 0],
name=None): name=None):
return self.pool('max', input, k_h, k_w, s_h, s_w, ceil_mode, padding, return self.pool(
name) 'max',
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding,
name=self.get_unique_output_name(name, 'max_pool'))
@layer @layer
def avg_pool(self, def avg_pool(self,
...@@ -213,25 +239,41 @@ class Network(object): ...@@ -213,25 +239,41 @@ class Network(object):
ceil_mode, ceil_mode,
padding=[0, 0], padding=[0, 0],
name=None): name=None):
return self.pool('avg', input, k_h, k_w, s_h, s_w, ceil_mode, padding, return self.pool(
name) 'avg',
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding,
name=self.get_unique_output_name(name, 'avg_pool'))
@layer @layer
def sigmoid(self, input, name): def sigmoid(self, input, name):
fluid = import_fluid() fluid = import_fluid()
return fluid.layers.sigmoid(input) return fluid.layers.sigmoid(
input, name=self.get_unique_output_name(name, 'sigmoid'))
@layer @layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0): def lrn(self, input, radius, alpha, beta, name, bias=1.0):
fluid = import_fluid() fluid = import_fluid()
output = fluid.layers.lrn(input=input, \ output = fluid.layers.lrn(input=input,
n=radius, k=bias, alpha=alpha, beta=beta, name=name) n=radius,
k=bias,
alpha=alpha,
beta=beta,
name=self.get_unique_output_name(name, 'lrn'))
return output return output
@layer @layer
def concat(self, inputs, axis, name): def concat(self, inputs, axis, name):
fluid = import_fluid() fluid = import_fluid()
output = fluid.layers.concat(input=inputs, axis=axis) output = fluid.layers.concat(
input=inputs,
axis=axis,
name=self.get_unique_output_name(name, 'concat'))
return output return output
@layer @layer
...@@ -239,7 +281,8 @@ class Network(object): ...@@ -239,7 +281,8 @@ class Network(object):
fluid = import_fluid() fluid = import_fluid()
output = inputs[0] output = inputs[0]
for i in inputs[1:]: for i in inputs[1:]:
output = fluid.layers.elementwise_add(x=output, y=i) output = fluid.layers.elementwise_add(
x=output, y=i, name=self.get_unique_output_name(name, 'add'))
return output return output
@layer @layer
...@@ -251,7 +294,7 @@ class Network(object): ...@@ -251,7 +294,7 @@ class Network(object):
prefix = name + '_' prefix = name + '_'
output = fluid.layers.fc( output = fluid.layers.fc(
name=name, name=self.get_unique_output_name(name, 'fc'),
input=input, input=input,
size=num_out, size=num_out,
act=act, act=act,
...@@ -269,7 +312,8 @@ class Network(object): ...@@ -269,7 +312,8 @@ class Network(object):
str(shape)) str(shape))
input = fluid.layers.reshape(input, shape[0:2]) input = fluid.layers.reshape(input, shape[0:2])
output = fluid.layers.softmax(input) output = fluid.layers.softmax(
input, name=self.get_unique_output_name(name, 'softmax'))
return output return output
@layer @layer
...@@ -289,7 +333,7 @@ class Network(object): ...@@ -289,7 +333,7 @@ class Network(object):
mean_name = prefix + 'mean' mean_name = prefix + 'mean'
variance_name = prefix + 'variance' variance_name = prefix + 'variance'
output = fluid.layers.batch_norm( output = fluid.layers.batch_norm(
name=name, name=self.get_unique_output_name(name, 'batch_norm'),
input=input, input=input,
is_test=True, is_test=True,
param_attr=param_attr, param_attr=param_attr,
...@@ -308,7 +352,10 @@ class Network(object): ...@@ -308,7 +352,10 @@ class Network(object):
output = input output = input
else: else:
output = fluid.layers.dropout( output = fluid.layers.dropout(
input, dropout_prob=drop_prob, is_test=is_test) input,
dropout_prob=drop_prob,
is_test=is_test,
name=self.get_unique_output_name(name, 'dropout'))
return output return output
@layer @layer
...@@ -328,8 +375,16 @@ class Network(object): ...@@ -328,8 +375,16 @@ class Network(object):
offset_param = fluid.layers.create_parameter( offset_param = fluid.layers.create_parameter(
shape=scale_shape, dtype=input.dtype, name=name, attr=offset_attr) shape=scale_shape, dtype=input.dtype, name=name, attr=offset_attr)
output = fluid.layers.elementwise_mul(input, scale_param, axis=axis) output = fluid.layers.elementwise_mul(
output = fluid.layers.elementwise_add(output, offset_param, axis=axis) input,
scale_param,
axis=axis,
name=self.get_unique_output_name(name, 'scale_mul'))
output = fluid.layers.elementwise_add(
output,
offset_param,
axis=axis,
name=self.get_unique_output_name(name, 'scale_add'))
return output return output
def custom_layer_factory(self): def custom_layer_factory(self):
...@@ -342,5 +397,6 @@ class Network(object): ...@@ -342,5 +397,6 @@ class Network(object):
def custom_layer(self, inputs, kind, name, *args, **kwargs): def custom_layer(self, inputs, kind, name, *args, **kwargs):
""" make custom layer """ make custom layer
""" """
name = self.get_unique_output_name(name, kind)
layer_factory = self.custom_layer_factory() layer_factory = self.custom_layer_factory()
return layer_factory(kind, inputs, name, *args, **kwargs) return layer_factory(kind, inputs, name, *args, **kwargs)
...@@ -3,9 +3,9 @@ import numpy as np ...@@ -3,9 +3,9 @@ import numpy as np
from ..errors import KaffeError, print_stderr from ..errors import KaffeError, print_stderr
from ..graph import GraphBuilder, NodeMapper from ..graph import GraphBuilder, NodeMapper
from ..layers import NodeKind from ..layers import NodeKind
from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, from ..transformers import (DataInjector, DataReshaper, NodeRenamer,
BatchNormScaleBiasFuser, BatchNormPreprocessor, SubNodeFuser, ReLUFuser, BatchNormScaleBiasFuser,
ParameterNamer) BatchNormPreprocessor, ParameterNamer)
from . import network from . import network
...@@ -18,7 +18,7 @@ def get_padding_type(kernel_params, input_shape, output_shape): ...@@ -18,7 +18,7 @@ def get_padding_type(kernel_params, input_shape, output_shape):
https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
''' '''
k_h, k_w, s_h, s_w, p_h, p_w = kernel_params k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
if p_h * p_w > 0: if p_h > 0 or p_w > 0:
return [p_h, p_w] return [p_h, p_w]
else: else:
return None return None
...@@ -315,6 +315,23 @@ class Transformer(object): ...@@ -315,6 +315,23 @@ class Transformer(object):
self.graph = graph.transformed(transformers) self.graph = graph.transformed(transformers)
#for the purpose of recording name mapping because of fused nodes
trace = SubNodeFuser.traced_names()
chg2real = {}
deleted = {}
for k, v in trace.items():
chg2real[k] = v[-1] #mapping from changed-name to real-name
for n in v:
if n in chg2real:
continue
if n not in deleted:
deleted[n] = '%s.%s' % (k, v[-1])
self.graph.add_name_trace({
'chg2real': chg2real,
'deleted': deleted
}, 'paddle')
# Display the graph # Display the graph
if self.verbose: if self.verbose:
print_stderr(self.graph) print_stderr(self.graph)
...@@ -339,6 +356,8 @@ class Transformer(object): ...@@ -339,6 +356,8 @@ class Transformer(object):
node.name: node.data node.name: node.data
for node in self.graph.nodes if node.data for node in self.graph.nodes if node.data
} }
self.params['caffe2fluid_name_trace'] = self.graph.get_name_trace()
return self.params return self.params
def transform_source(self): def transform_source(self):
......
...@@ -181,6 +181,20 @@ class SubNodeFuser(object): ...@@ -181,6 +181,20 @@ class SubNodeFuser(object):
''' '''
An abstract helper for merging a single-child with its single-parent. An abstract helper for merging a single-child with its single-parent.
''' '''
_traced_names = {}
@classmethod
def traced_names(cls):
return cls._traced_names
@classmethod
def trace(cls, fname, tname):
""" recording the names mapping,
the value of 'fname' will be replaced by value of 'tname'
"""
if fname not in cls._traced_names:
cls._traced_names[fname] = []
cls._traced_names[fname].append(tname)
def __call__(self, graph): def __call__(self, graph):
nodes = graph.nodes nodes = graph.nodes
...@@ -234,6 +248,7 @@ class ReLUFuser(SubNodeFuser): ...@@ -234,6 +248,7 @@ class ReLUFuser(SubNodeFuser):
child.kind == NodeKind.ReLU) child.kind == NodeKind.ReLU)
def merge(self, parent, child): def merge(self, parent, child):
SubNodeFuser.trace(parent.name, child.name)
parent.metadata['relu'] = True parent.metadata['relu'] = True
parent.metadata['relu_negative_slope'] = child.parameters.negative_slope parent.metadata['relu_negative_slope'] = child.parameters.negative_slope
...@@ -255,6 +270,7 @@ class BatchNormScaleBiasFuser(SubNodeFuser): ...@@ -255,6 +270,7 @@ class BatchNormScaleBiasFuser(SubNodeFuser):
child.parameters.bias_term == True) child.parameters.bias_term == True)
def merge(self, parent, child): def merge(self, parent, child):
SubNodeFuser.trace(parent.name, child.name)
parent.scale_bias_node = child parent.scale_bias_node = child
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册