提交 5483258b 编写于 作者: L Luo Tao

fuse batch norm for conv operator with bias

上级 ea0cf6f3
...@@ -45,10 +45,11 @@ class InferenceTranspiler: ...@@ -45,10 +45,11 @@ class InferenceTranspiler:
- conv->elementwise_add->any_other_op - conv->elementwise_add->any_other_op
The transpile stages are: The transpile stages are:
1. insert elementwise_add op when bias == 0, and adjust its input and output. 1. insert elementwise_add op when bias == 0.
2. fuse the batch_norm's parameters to conv and elementwise_add operators. 2. fuse the batch_norm's parameters to conv and elementwise_add operators.
3. remove batch_norm ops and its variables which are not used in any other ops. 3. remove batch_norm ops which are not used in any other ops.
4. remove unused variables. 4. adjust the input of any_other_op to be the output of elementwise_add operator.
5. remove unused variables.
:param program: program to transpile :param program: program to transpile
:type program: Program :type program: Program
...@@ -62,24 +63,35 @@ class InferenceTranspiler: ...@@ -62,24 +63,35 @@ class InferenceTranspiler:
self.scope = scope self.scope = scope
self.place = place self.place = place
self.block = program.block(0) self.block = program.block(0)
self.input_map = {} # store the input names should be adjusted
i = 0 i = 0
while i < len(self.block.ops): while i < len(self.block.ops):
current_op = self.block.ops[i] current_op = self.block.ops[i]
# TODO(luotao1): consider only conv2d now. fc would be delt later. # TODO(luotao1): consider only conv2d now. fc would be delt later.
if current_op.type in ['conv2d']: if current_op.type in ['conv2d']:
next_op = self.block.ops[i + 1] next_op = self.block.ops[i + 1]
# TODO(luotao1): consider only conv2d without bias now. # conv2d without bias
# If conv2d with bias, the next_op.type is elementwise_add.
if (next_op.type == 'batch_norm'): if (next_op.type == 'batch_norm'):
# insert bias op # insert bias op
bias_op = self._insert_bias_op(i + 1, current_op, next_op) bias_op = self._insert_bias_op(i + 1, current_op, next_op)
# fuse batch_norm # fuse batch_norm
self._fuse_param(current_op, next_op, bias_op) self._fuse_param(current_op, next_op, bias_op, 0)
# remove batch_norm_op # remove batch_norm_op
self.block.remove_op(i + 2) self.block.remove_op(i + 2)
i = i + 1 i = i + 1
# conv2d with bias, the next_op.type is elementwise_add
elif (next_op.type == 'elementwise_add'):
next_next_op = self.block.ops[i + 2]
if (next_next_op.type == 'batch_norm'):
# fuse batch_norm
self._fuse_param(current_op, next_next_op, next_op, 1)
# remove batch_norm_op
self.block.remove_op(i + 2)
i = i + 1
i = i + 1 i = i + 1
self._adjust_input()
self._remove_unused_var() self._remove_unused_var()
return program return program
...@@ -113,7 +125,7 @@ class InferenceTranspiler: ...@@ -113,7 +125,7 @@ class InferenceTranspiler:
attrs={"axis": 1}) # dim_start=1 attrs={"axis": 1}) # dim_start=1
return bias_op return bias_op
def _fuse_param(self, current_op, bn_op, bias_op): def _fuse_param(self, current_op, bn_op, bias_op, with_bias):
''' '''
fuse the batch_norm_op' parameters to current_op (conv or fc) fuse the batch_norm_op' parameters to current_op (conv or fc)
...@@ -123,6 +135,8 @@ class InferenceTranspiler: ...@@ -123,6 +135,8 @@ class InferenceTranspiler:
:type bn_op: Operator :type bn_op: Operator
:param bias_op: elementwise_add operator for adding bias :param bias_op: elementwise_add operator for adding bias
:type bias_op: Operator :type bias_op: Operator
:param with_bias: If current operator has bias, with_bias = 1; otherwise 0.
:type with_bias: Int
''' '''
def _load_tensor(param_name): def _load_tensor(param_name):
...@@ -144,7 +158,10 @@ class InferenceTranspiler: ...@@ -144,7 +158,10 @@ class InferenceTranspiler:
tmp = np.float32(np.divide(scale_bn, std_bn)) tmp = np.float32(np.divide(scale_bn, std_bn))
# add bias of batch_norm_op to conv2d # add bias of batch_norm_op to conv2d
bias = np.zeros(bias_bn.shape) if with_bias:
bias = _load_param(bias_op.input("Y"))
else:
bias = np.zeros(bias_bn.shape)
bias = np.float32( bias = np.float32(
np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn)) np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
bias_tensor = _load_tensor(bias_op.input("Y")) bias_tensor = _load_tensor(bias_op.input("Y"))
...@@ -159,6 +176,17 @@ class InferenceTranspiler: ...@@ -159,6 +176,17 @@ class InferenceTranspiler:
# set the updated parameters # set the updated parameters
current_tensor.set(np.array(dst_param), self.place) current_tensor.set(np.array(dst_param), self.place)
# collect the renamed input
self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]
def _adjust_input(self):
for i in range(len(self.block.ops)):
current_op = self.block.ops[i]
for input_arg in current_op.input_arg_names:
if input_arg in self.input_map:
current_op.rename_input(input_arg,
self.input_map[input_arg])
def _remove_unused_var(self): def _remove_unused_var(self):
''' '''
remove unused varibles in program remove unused varibles in program
......
...@@ -26,7 +26,13 @@ import numpy as np ...@@ -26,7 +26,13 @@ import numpy as np
def resnet_cifar10(input, depth=32): def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d( tmp = fluid.layers.conv2d(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
...@@ -34,7 +40,7 @@ def resnet_cifar10(input, depth=32): ...@@ -34,7 +40,7 @@ def resnet_cifar10(input, depth=32):
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
bias_attr=False) bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act) return fluid.layers.batch_norm(input=tmp, act=act)
def shortcut(input, ch_in, ch_out, stride): def shortcut(input, ch_in, ch_out, stride):
...@@ -45,7 +51,7 @@ def resnet_cifar10(input, depth=32): ...@@ -45,7 +51,7 @@ def resnet_cifar10(input, depth=32):
def basicblock(input, ch_in, ch_out, stride): def basicblock(input, ch_in, ch_out, stride):
tmp = conv_bn_layer(input, ch_out, 3, stride, 1) tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
short = shortcut(input, ch_in, ch_out, stride) short = shortcut(input, ch_in, ch_out, stride)
return fluid.layers.elementwise_add(x=tmp, y=short, act='relu') return fluid.layers.elementwise_add(x=tmp, y=short, act='relu')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册