create new varible in scope

f45818e7 · Luo Tao · 6e735e1e · f45818e7 · f45818e7 · f45818e7
3 changed file
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -67,6 +67,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [
    'clip',
    'SimpleDistributeTranspiler',
    'DistributeTranspiler',
+    'InferenceTranspiler',
    'memory_optimize',
    'release_memory',
    'profiler',

--- a/python/paddle/fluid/inference_transpiler.py
+++ b/python/paddle/fluid/inference_transpiler.py
@@ -21,7 +21,20 @@ from . import core
 class InferenceTranspiler:
    def transpile(self, program, scope, place):
        '''
-        Transpile the program to a inference program by fused batch normalization.
+        Transpile the program. Support only fuse batch normalization now.
+
+        :param program: program to transpile 
+        :type program: Program
+        :param scope: inference scope 
+        :type scope: Scope
+        :param place: inference place 
+        :type place: Place
+        '''
+        self.fuse_batch_norm(program, scope, place)
+
+    def fuse_batch_norm(self, program, scope, place):
+        '''
+        Transpile the program by fused batch normalization.
 
        The batch normalization followed the convolution or fully connected layer 
        can be integrated with them. Doing so will give us a forward acceleration, 
@@ -57,8 +70,6 @@ class InferenceTranspiler:
        :type scope: Scope
        :param place: inference place 
        :type place: Place
-        :return: program by fused batch normalization
-        :rtype: Program
        '''
        self.scope = scope
        self.place = place
@@ -96,7 +107,7 @@ class InferenceTranspiler:
        # TODO(luotao): use clone() method to flush the program.desc in force, 
        # since some large program.desc will not be flushed immediately. 
        # And a better solution will be considered later.
-        return program.clone()
+        program = program.clone()

    # ====================== private transpiler functions =====================
    def _insert_bias_op(self, index, current_op, bn_op):
@@ -142,11 +153,25 @@ class InferenceTranspiler:
        :type with_bias: Int
        '''

-        def _load_tensor(param_name):
-            return self.scope.find_var(param_name[0]).get_tensor()
+        def _update_param(op, old_param_name, new_param):
+            # For the sake of remaining the original variables the same as before,
+            # create new variables in scope to store the new parameters.
+            old_param_name = old_param_name[0]
+            old_var = self.block.vars[old_param_name]
+            new_param_name = old_param_name + '_fuse_bn'
+            new_var = self.block.create_parameter(
+                name=new_param_name.encode('ascii'),
+                type=old_var.type,
+                dtype=old_var.dtype,
+                shape=old_var.shape)
+            op.rename_input(old_param_name, new_param_name)
+            self.scope.var(new_param_name)
+
+            tensor = self.scope.find_var(new_param_name).get_tensor()
+            tensor.set(np.array(new_param), self.place)

        def _load_param(param_name):
-            return np.array(_load_tensor(param_name))
+            return np.array(self.scope.find_var(param_name[0]).get_tensor())

        bias_bn = _load_param(bn_op.input("Bias"))  #Bias
        scale_bn = _load_param(bn_op.input("Scale"))  #Scale
@@ -155,8 +180,6 @@ class InferenceTranspiler:

        # TODO(luotao1): consider only conv2d now. fc would be delt later.
        current_param = _load_param(current_op.input("Filter"))
-        current_tensor = _load_tensor(current_op.input("Filter"))
-
        std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5)))
        tmp = np.float32(np.divide(scale_bn, std_bn))

@@ -167,8 +190,6 @@ class InferenceTranspiler:
            bias = np.zeros(bias_bn.shape)
        bias = np.float32(
            np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
-        bias_tensor = _load_tensor(bias_op.input("Y"))
-        bias_tensor.set(bias, self.place)

        # re-compute weight of conv2d
        tmp = tmp.reshape(tmp.shape[0], -1)
@@ -176,8 +197,9 @@ class InferenceTranspiler:
        dst_param = np.float32(np.multiply(dst_param, tmp))
        dst_param = dst_param.reshape(current_param.shape)

-        # set the updated parameters
-        current_tensor.set(np.array(dst_param), self.place)
+        # update parameters
+        _update_param(current_op, current_op.input("Filter"), dst_param)
+        _update_param(bias_op, bias_op.input("Y"), bias)

        # collect the renamed input
        self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]

--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -226,16 +226,17 @@ def infer(use_cuda, save_dirname=None):
        batch_size = 1
        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")

+        # Use inference_transpiler to speedup
+        inference_transpiler_program = inference_program.clone()
+        t = fluid.InferenceTranspiler()
+        t.transpile(inference_transpiler_program, inference_scope, place)
+
        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        results = exe.run(inference_program,
                          feed={feed_target_names[0]: tensor_img},
                          fetch_list=fetch_targets)

-        # Use inference_transpiler to speedup
-        t = fluid.InferenceTranspiler()
-        inference_transpiler_program = t.transpile(inference_program,
-                                                   inference_scope, place)
        transpiler_results = exe.run(inference_transpiler_program,
                                     feed={feed_target_names[0]: tensor_img},
                                     fetch_list=fetch_targets)