From 5483258b7e8c6e56968b1d63091539ed34609b1c Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Tue, 10 Apr 2018 19:01:18 +0800
Subject: [PATCH] fuse batch norm for conv operator with bias

---
 python/paddle/fluid/inference_transpiler.py   | 44 +++++++++++++++----
 .../tests/book/test_image_classification.py   | 12 +++--
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/python/paddle/fluid/inference_transpiler.py b/python/paddle/fluid/inference_transpiler.py
index 3791e93576..194f7adf46 100644
--- a/python/paddle/fluid/inference_transpiler.py
+++ b/python/paddle/fluid/inference_transpiler.py
@@ -45,10 +45,11 @@ class InferenceTranspiler:
           - conv->elementwise_add->any_other_op
         
         The transpile stages are:
-        1. insert elementwise_add op when bias == 0, and adjust its input and output.
+        1. insert elementwise_add op when bias == 0.
         2. fuse the batch_norm's parameters to conv and elementwise_add operators.
-        3. remove batch_norm ops and its variables which are not used in any other ops.
-        4. remove unused variables.
+        3. remove batch_norm ops which are not used in any other ops.
+        4. adjust the input of any_other_op to be the output of elementwise_add operator.
+        5. remove unused variables.
 
         :param program: program to transpile 
         :type program: Program
@@ -62,24 +63,35 @@ class InferenceTranspiler:
         self.scope = scope
         self.place = place
         self.block = program.block(0)
+        self.input_map = {}  # store the input names should be adjusted 
+
         i = 0
         while i < len(self.block.ops):
             current_op = self.block.ops[i]
             # TODO(luotao1): consider only conv2d now. fc would be delt later.
             if current_op.type in ['conv2d']:
                 next_op = self.block.ops[i + 1]
-                # TODO(luotao1): consider only conv2d without bias now.
-                # If conv2d with bias, the next_op.type is elementwise_add.
+                # conv2d without bias
                 if (next_op.type == 'batch_norm'):
                     # insert bias op
                     bias_op = self._insert_bias_op(i + 1, current_op, next_op)
                     # fuse batch_norm
-                    self._fuse_param(current_op, next_op, bias_op)
+                    self._fuse_param(current_op, next_op, bias_op, 0)
                     # remove batch_norm_op
                     self.block.remove_op(i + 2)
                     i = i + 1
+                # conv2d with bias, the next_op.type is elementwise_add
+                elif (next_op.type == 'elementwise_add'):
+                    next_next_op = self.block.ops[i + 2]
+                    if (next_next_op.type == 'batch_norm'):
+                        # fuse batch_norm
+                        self._fuse_param(current_op, next_next_op, next_op, 1)
+                        # remove batch_norm_op
+                        self.block.remove_op(i + 2)
+                        i = i + 1
             i = i + 1
 
+        self._adjust_input()
         self._remove_unused_var()
         return program
 
@@ -113,7 +125,7 @@ class InferenceTranspiler:
             attrs={"axis": 1})  # dim_start=1
         return bias_op
 
-    def _fuse_param(self, current_op, bn_op, bias_op):
+    def _fuse_param(self, current_op, bn_op, bias_op, with_bias):
         '''
         fuse the batch_norm_op' parameters to current_op (conv or fc)
         
@@ -123,6 +135,8 @@ class InferenceTranspiler:
         :type bn_op: Operator
         :param bias_op: elementwise_add operator for adding bias
         :type bias_op: Operator
+        :param with_bias: If current operator has bias, with_bias = 1; otherwise 0. 
+        :type with_bias: Int
         '''
 
         def _load_tensor(param_name):
@@ -144,7 +158,10 @@ class InferenceTranspiler:
         tmp = np.float32(np.divide(scale_bn, std_bn))
 
         # add bias of batch_norm_op to conv2d
-        bias = np.zeros(bias_bn.shape)
+        if with_bias:
+            bias = _load_param(bias_op.input("Y"))
+        else:
+            bias = np.zeros(bias_bn.shape)
         bias = np.float32(
             np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
         bias_tensor = _load_tensor(bias_op.input("Y"))
@@ -159,6 +176,17 @@ class InferenceTranspiler:
         # set the updated parameters
         current_tensor.set(np.array(dst_param), self.place)
 
+        # collect the renamed input
+        self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]
+
+    def _adjust_input(self):
+        for i in range(len(self.block.ops)):
+            current_op = self.block.ops[i]
+            for input_arg in current_op.input_arg_names:
+                if input_arg in self.input_map:
+                    current_op.rename_input(input_arg,
+                                            self.input_map[input_arg])
+
     def _remove_unused_var(self):
         '''
         remove unused varibles in program
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index bca42a89cd..5e47bcb2cb 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -26,7 +26,13 @@ import numpy as np
 
 
 def resnet_cifar10(input, depth=32):
-    def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
+    def conv_bn_layer(input,
+                      ch_out,
+                      filter_size,
+                      stride,
+                      padding,
+                      act='relu',
+                      bias_attr=False):
         tmp = fluid.layers.conv2d(
             input=input,
             filter_size=filter_size,
@@ -34,7 +40,7 @@ def resnet_cifar10(input, depth=32):
             stride=stride,
             padding=padding,
             act=None,
-            bias_attr=False)
+            bias_attr=bias_attr)
         return fluid.layers.batch_norm(input=tmp, act=act)
 
     def shortcut(input, ch_in, ch_out, stride):
@@ -45,7 +51,7 @@ def resnet_cifar10(input, depth=32):
 
     def basicblock(input, ch_in, ch_out, stride):
         tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
-        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
+        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
         short = shortcut(input, ch_in, ch_out, stride)
         return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
 
-- 
GitLab