caffe2fluid: miminize the result difference generated by caffe-model and fluid-model (#827)

* fix code style problems * fix bug when loading fluid model * fix code style problem in brain.py

caffe2fluid: miminize the result difference generated by caffe-model and fluid-model (#827)
* fix code style problems * fix bug when loading fluid model * fix code style problem in brain.py
fa5587d6 · walloollaw · qingqing01 · 6fa8a94b · fa5587d6 · fa5587d6
10 changed file
--- a/.gitignore
+++ b/.gitignore
 .DS_Store
 *.pyc
+.*~
--- a/fluid/image_classification/caffe2fluid/README.md
+++ b/fluid/image_classification/caffe2fluid/README.md
@@ -18,19 +18,19 @@ This tool is used to convert a Caffe model to Fluid model


 ### Tested models
- Lenet on mnist dataset
+- Lenet

 - ResNets:(ResNet-50, ResNet-101, ResNet-152)
-    model addr: `https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`_
+[model addr](https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777)

 - GoogleNet:
-    model addr: `https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`_
+[model addr](https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034)

 - VGG:
-    model addr: `https://gist.github.com/ksimonyan/211839e770f7b538e2d8`_
+[model addr](https://gist.github.com/ksimonyan/211839e770f7b538e2d8)

 - AlexNet:
-    model addr: `https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`_
+[model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet)

 ### Notes
 Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
+#!/usr/bin/python
+
+#
+#a tool to compare tensors in two files or two directories
+#
+
+import sys
+import os
+
+
+def walk_dir(rootdir):
+    for subdir, dirs, files in os.walk(rootdir):
+        for file in files:
+            yield file
+
+
+def calc_diff(f1, f2):
+    import numpy as np
+
+    d1 = np.load(f1).flatten()
+    d2 = np.load(f2).flatten()
+
+    d1_num = reduce(lambda x, y: x * y, d1.shape)
+    d2_num = reduce(lambda x, y: x * y, d2.shape)
+    if d1_num != d2_num:
+        print d1.shape
+        print d2.shape
+        assert (d1_num == d2_num), "their shape is not consistent"
+
+    try:
+        df = np.abs(d1 - d2)
+        max_df = np.max(df)
+        sq_df = np.mean(df * df)
+        return max_df, sq_df
+    except Exception as e:
+        return -1.0, -1.0
+
+
+def compare(path1, path2):
+    def diff(f1, f2):
+        max_df, sq_df = calc_diff(f1, f2)
+        print('compare %s <=> %s with result[max_df:%.4e, sq_df:%.4e]' %
+              (f1, f2, max_df, sq_df))
+        assert (max_df < 1e-5), \
+                'max_df is too large with value[%.6e]' % (max_df)
+        assert (sq_df < 1e-10), \
+                'sq_df is too large with value[%.6e]' % (sq_df)
+
+    if os.path.exists(path1) is False:
+        print('not found %s' % (path1))
+        return 1
+    elif os.path.exists(path2) is False:
+        print('not found %s' % (path2))
+        return 1
+
+    if path1.find('.npy') > 0 and path2.find('.npy') > 0:
+        diff(path1, path2)
+        return
+
+    for f in walk_dir(path2):
+        if f.find('.npy') < 0:
+            continue
+
+        f1 = os.path.join(path1, f)
+        f2 = os.path.join(path2, f)
+        diff(f1, f2)
+
+    print('all checking succeed to pass')
+    return 0
+
+
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        path1 = 'lenet.tf/results'
+        path2 = 'lenet.paddle/results'
+    elif len(sys.argv) == 3:
+        path1 = sys.argv[1]
+        path2 = sys.argv[2]
+    else:
+        print('usage:')
+        print(' %s [path1] [path2]' % (sys.argv[0]))
+        exit(1)
+
+    print('compare inner result in %s %s' % (path1, path2))
+    exit(compare(path1, path2))
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
+#!/bin/bash
+
+#
+#function:
+#   a tool used to check the difference of models' results generated by caffe model and paddle model
+#
+#howto:
+#   bash diff.sh resnet50 #when this has been finished, you can get the difference in precision
+#
+#notes:
+#   0, in order to infer using caffe, we need pycaffe installed
+#   1, prepare your caffe model in 'models.caffe/', eg: 'model.caffe/resnet101/resnet101.[prototxt|caffemodel]'
+#   2, converted paddle model will be in 'models'
+#   3, results of layers will be stored in 'results/${model_name}.[paddle|caffe]'
+#   4, only the last layer will be checked by default
+
+model_name="resnet50"
+results_root="results/"
+
+if [[ -n $1 ]];then
+    if [ $1 = "-h" ];then
+        echo "usage:"
+        echo "  bash $0 [model_name]"
+        echo "  eg:bash $0 resnet50"
+        exit 0
+    fi
+    model_name=$1
+fi
+
+mkdir -p $results_root
+
+model_prototxt="models.caffe/$model_name/${model_name}.prototxt"
+model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
+
+#1, dump layers' results from paddle
+paddle_results="$results_root/${model_name}.paddle"
+rm -rf $paddle_results
+rm -rf "results.paddle"
+bash run.sh $model_name ./models.caffe/$model_name ./models/$model_name
+if [[ $? -ne 0 ]] || [[ ! -e "results.paddle" ]];then
+    echo "not found paddle's results, maybe failed to convert"
+    exit 1
+fi
+mv results.paddle $paddle_results
+
+#2, dump layers' results from caffe
+caffe_results="$results_root/${model_name}.caffe"
+rm -rf $caffe_results
+rm -rf "results.caffe"
+cfpython ./infer.py caffe $model_prototxt $model_caffemodel $paddle_results/data.npy
+if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then
+    echo "not found caffe's results, maybe failed to do inference with caffe"
+    exit 1
+fi
+mv results.caffe $caffe_results
+
+#3, extract layer names
+cat $model_prototxt | grep name | perl -ne 'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
+
+#4, compare one by one
+for i in $(cat ".layer_names" | tail -n1);do
+    echo "process $i"
+    python compare.py $caffe_results/${i}.npy $paddle_results/${i}.npy
+done
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
@@ -10,8 +10,11 @@ import os
 import sys
 import inspect
 import numpy as np
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
+
+
+def import_fluid():
+    import paddle.fluid as fluid
+    return fluid


 def load_data(imgfile, shape):
@@ -52,8 +55,10 @@ def build_model(net_file, net_name):
        print(e)
        return None

-    input_name = 'data'
-    input_shape = MyNet.input_shapes()[input_name]
+    fluid = import_fluid()
+    inputs_dict = MyNet.input_shapes()
+    input_name = inputs_dict.keys()[0]
+    input_shape = inputs_dict[input_name]
    images = fluid.layers.data(name='image', shape=input_shape, dtype='float32')
    #label = fluid.layers.data(name='label', shape=[1], dtype='int64')

@@ -64,7 +69,7 @@ def build_model(net_file, net_name):

 def dump_results(results, names, root):
    if os.path.exists(root) is False:
-        os.path.mkdir(root)
+        os.mkdir(root)

    for i in range(len(names)):
        n = names[i]
@@ -73,9 +78,12 @@ def dump_results(results, names, root):
        np.save(filename + '.npy', res)


-def infer(net_file, net_name, model_file, imgfile, debug=False):
+def infer(net_file, net_name, model_file, imgfile, debug=True):
    """ do inference using a model which consist 'xxx.py' and 'xxx.npy'
    """
+
+    fluid = import_fluid()
+
    #1, build model
    net, input_shape = build_model(net_file, net_name)
    prediction = net.get_output()
@@ -109,34 +117,79 @@ def infer(net_file, net_name, model_file, imgfile, debug=False):
                      fetch_list=fetch_list_var)

    if debug is True:
-        dump_path = 'results.layers'
+        dump_path = 'results.paddle'
        dump_results(results, fetch_list_name, dump_path)
-        print('all results dumped to [%s]' % (dump_path))
+        print('all result of layers dumped to [%s]' % (dump_path))
    else:
        result = results[0]
        print('predicted class:', np.argmax(result))

+    return 0
+
+
+def caffe_infer(prototxt, caffemodel, datafile):
+    """ do inference using pycaffe for debug,
+        all intermediate results will be dumpped to 'results.caffe'
+    """
+    import caffe
+
+    net = caffe.Net(prototxt, caffemodel, caffe.TEST)
+    input_layer = net.blobs.keys()[0]
+    print('got name of input layer is:%s' % (input_layer))
+    input_shape = list(net.blobs[input_layer].data.shape[1:])
+
+    if '.npy' in datafile:
+        np_images = np.load(datafile)
+    else:
+        np_images = load_data(datafile, input_shape)
+
+    inputs = {input_layer: np_images}
+    net.forward_all(**inputs)
+
+    results = []
+    names = []
+    for k, v in net.blobs.items():
+        k = k.rstrip('_output')
+        k = k.replace('/', '_')
+        names.append(k)
+        results.append(v.data.copy())
+
+    dump_path = 'results.caffe'
+    dump_results(results, names, dump_path)
+    print('all result of layers dumped to [%s]' % (dump_path))
+    return 0
+

 if __name__ == "__main__":
    """ maybe more convenient to use 'run.sh' to call this tool
    """
    net_file = 'models/resnet50/resnet50.py'
    weight_file = 'models/resnet50/resnet50.npy'
-    imgfile = 'data/65.jpeg'
+    datafile = 'data/65.jpeg'
    net_name = 'ResNet50'

    argc = len(sys.argv)
-    if argc == 5:
+    if sys.argv[1] == 'caffe':
+        if len(sys.argv) != 5:
+            print('usage:')
+            print('\tpython %s caffe [prototxt] [caffemodel] [datafile]' %
+                  (sys.argv[0]))
+            sys.exit(1)
+        prototxt = sys.argv[2]
+        caffemodel = sys.argv[3]
+        datafile = sys.argv[4]
+        sys.exit(caffe_infer(prototxt, caffemodel, datafile))
+    elif argc == 5:
        net_file = sys.argv[1]
        weight_file = sys.argv[2]
-        imgfile = sys.argv[3]
+        datafile = sys.argv[3]
        net_name = sys.argv[4]
    elif argc > 1:
        print('usage:')
-        print('\tpython %s [net_file] [weight_file] [imgfile] [net_name]' %
+        print('\tpython %s [net_file] [weight_file] [datafile] [net_name]' %
              (sys.argv[0]))
        print('\teg:python %s %s %s %s %s' % (sys.argv[0], net_file,
-                                              weight_file, imgfile, net_name))
+                                              weight_file, datafile, net_name))
        sys.exit(1)

-    infer(net_file, net_name, weight_file, imgfile)
+    infer(net_file, net_name, weight_file, datafile)
--- a/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
+++ b/fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
@@ -3,7 +3,7 @@
 #function:
 #   a tool used to:
 #       1, convert a caffe model
-#       2, do inference using this model
+#       2, do inference(only in fluid) using this model
 #
 #usage:
 #   bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
@@ -65,7 +65,12 @@ if [[ -z $only_convert ]];then
        PYTHON=`which python`
    fi
    imgfile="data/65.jpeg"
-    net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/\"([^\"]+)\"/){ print $1."\n";}'`
+    #FIX ME:
+    #   only look the first line in prototxt file for the name of this network, maybe not correct
+    net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^\s*name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
+    if [[ -z $net_name ]];then
+        net_name="MyNet"
+    fi
    $PYTHON ./infer.py $net_file $weight_file $imgfile $net_name
    ret=$?
 fi

--- a/fluid/image_classification/caffe2fluid/kaffe/graph.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/graph.py
@@ -52,7 +52,10 @@ class Graph(object):
    def __init__(self, nodes=None, name=None):
        self.nodes = nodes or []
        self.node_lut = {node.name: node for node in self.nodes}
-        self.name = name
+        if name is None or name == '':
+            self.name = 'MyNet'
+        else:
+            self.name = name

    def add_node(self, node):
        self.nodes.append(node)

--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
@@ -4,7 +4,7 @@ import numpy as np


 def import_fluid():
-    import paddle.v2.fluid as fluid
+    import paddle.fluid as fluid
    return fluid


@@ -64,7 +64,7 @@ class Network(object):
        if os.path.isdir(data_path):
            assert (exe is not None), \
                'must provide a executor to load fluid model'
-            fluid.io.load_persistables_if_exist(executor=exe, dirname=data_path)
+            fluid.io.load_persistables(executor=exe, dirname=data_path)
            return True

        #load model from a npy file
@@ -161,56 +161,28 @@ class Network(object):
        output = fluid.layers.relu(x=input)
        return output

-    def _adjust_pad_if_needed(self, i_hw, k_hw, s_hw, p_hw):
-        #adjust the padding if needed
-        i_h, i_w = i_hw
-        k_h, k_w = k_hw
-        s_h, s_w = s_hw
-        p_h, p_w = p_hw
-
-        def is_consistent(i, k, s, p):
-            o = i + 2 * p - k
-            if o % s == 0:
-                return True
-            else:
-                return False
-
-        real_p_h = 0
-        real_p_w = 0
-        if is_consistent(i_h, k_h, s_h, p_h) is False:
-            real_p_h = int(k_h / 2)
-
-        if is_consistent(i_w, k_w, s_w, p_w) is False:
-            real_p_w = int(k_w / 2)
-
-        return [real_p_h, real_p_w]
-
    def pool(self, pool_type, input, k_h, k_w, s_h, s_w, name, padding):
        # Get the number of channels in the input
        in_hw = input.shape[2:]
        k_hw = [k_h, k_w]
        s_hw = [s_h, s_w]

-        if padding is None:
-            #fix bug about the difference between conv and pool
-            #more info: https://github.com/BVLC/caffe/issues/1318
-            padding = self._adjust_pad_if_needed(in_hw, k_hw, s_hw, [0, 0])
-
        fluid = import_fluid()
        output = fluid.layers.pool2d(
            input=input,
            pool_size=k_hw,
            pool_stride=s_hw,
            pool_padding=padding,
+            ceil_mode=True,
            pool_type=pool_type)
        return output

    @layer
-    def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
+    def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=[0, 0]):
        return self.pool('max', input, k_h, k_w, s_h, s_w, name, padding)

    @layer
-    def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
+    def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=[0, 0]):
        return self.pool('avg', input, k_h, k_w, s_h, s_w, name, padding)

    @layer
@@ -258,7 +230,12 @@ class Network(object):
        return output

    @layer
-    def batch_normalization(self, input, name, scale_offset=True, relu=False):
+    def batch_normalization(self,
+                            input,
+                            name,
+                            scale_offset=True,
+                            eps=1e-5,
+                            relu=False):
        # NOTE: Currently, only inference is supported
        fluid = import_fluid()
        prefix = name + '_'
@@ -276,7 +253,7 @@ class Network(object):
            bias_attr=bias_attr,
            moving_mean_name=mean_name,
            moving_variance_name=variance_name,
-            epsilon=1e-5,
+            epsilon=eps,
            act='relu' if relu is True else None)

        return output

--- a/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
+++ b/fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
@@ -142,7 +142,13 @@ class TensorFlowMapper(NodeMapper):

    def map_batch_norm(self, node):
        scale_offset = len(node.data) == 4
-        kwargs = {} if scale_offset else {'scale_offset': False}
+
+        #this default value comes from caffe's param in batch_norm
+        default_eps = 1e-5
+        kwargs = {'scale_offset': scale_offset}
+        if node.parameters.eps != default_eps:
+            kwargs['eps'] = node.parameters.eps
+
        return MaybeActivated(
            node, default=False)('batch_normalization', **kwargs)

@@ -236,7 +242,7 @@ class TensorFlowEmitter(object):
        func_def = self.statement('@classmethod')
        func_def += self.statement('def convert(cls, npy_model, fluid_path):')
        self.indent()
-        func_def += self.statement('import paddle.v2.fluid as fluid')
+        func_def += self.statement('fluid = import_fluid()')
        for l in codes:
            func_def += self.statement(l)
        return '\n' + func_def

--- a/fluid/policy_gradient/brain.py
+++ b/fluid/policy_gradient/brain.py
@@ -30,15 +30,11 @@ class PolicyGradient:
        acts = fluid.layers.data(name='acts', shape=[1], dtype='int64')
        vt = fluid.layers.data(name='vt', shape=[1], dtype='float32')
        # fc1
-        fc1 = fluid.layers.fc(
-            input=obs,
-            size=10,
-            act="tanh"  # tanh activation
-        )
+        fc1 = fluid.layers.fc(input=obs, size=10, act="tanh")  # tanh activation
        # fc2
        all_act_prob = fluid.layers.fc(input=fc1,
-                                            size=self.n_actions,
-                                            act="softmax")
+                                       size=self.n_actions,
+                                       act="softmax")
        self.inferece_program = fluid.defaul_main_program().clone()
        # to maximize total reward (log_p * R) is to minimize -(log_p * R)
        neg_log_prob = fluid.layers.cross_entropy(
@@ -53,10 +49,9 @@ class PolicyGradient:
        self.exe.run(fluid.default_startup_program())

    def choose_action(self, observation):
-        prob_weights = self.exe.run(
-            self.inferece_program,
-            feed={"obs": observation[np.newaxis, :]},
-            fetch_list=[self.all_act_prob])
+        prob_weights = self.exe.run(self.inferece_program,
+                                    feed={"obs": observation[np.newaxis, :]},
+                                    fetch_list=[self.all_act_prob])
        prob_weights = np.array(prob_weights[0])
        action = np.random.choice(
            range(prob_weights.shape[1]),