提交 f45818e7 编写于 作者: L Luo Tao

create new varible in scope

上级 6e735e1e
...@@ -67,6 +67,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [ ...@@ -67,6 +67,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [
'clip', 'clip',
'SimpleDistributeTranspiler', 'SimpleDistributeTranspiler',
'DistributeTranspiler', 'DistributeTranspiler',
'InferenceTranspiler',
'memory_optimize', 'memory_optimize',
'release_memory', 'release_memory',
'profiler', 'profiler',
......
...@@ -21,7 +21,20 @@ from . import core ...@@ -21,7 +21,20 @@ from . import core
class InferenceTranspiler: class InferenceTranspiler:
def transpile(self, program, scope, place): def transpile(self, program, scope, place):
''' '''
Transpile the program to a inference program by fused batch normalization. Transpile the program. Support only fuse batch normalization now.
:param program: program to transpile
:type program: Program
:param scope: inference scope
:type scope: Scope
:param place: inference place
:type place: Place
'''
self.fuse_batch_norm(program, scope, place)
def fuse_batch_norm(self, program, scope, place):
'''
Transpile the program by fused batch normalization.
The batch normalization followed the convolution or fully connected layer The batch normalization followed the convolution or fully connected layer
can be integrated with them. Doing so will give us a forward acceleration, can be integrated with them. Doing so will give us a forward acceleration,
...@@ -57,8 +70,6 @@ class InferenceTranspiler: ...@@ -57,8 +70,6 @@ class InferenceTranspiler:
:type scope: Scope :type scope: Scope
:param place: inference place :param place: inference place
:type place: Place :type place: Place
:return: program by fused batch normalization
:rtype: Program
''' '''
self.scope = scope self.scope = scope
self.place = place self.place = place
...@@ -96,7 +107,7 @@ class InferenceTranspiler: ...@@ -96,7 +107,7 @@ class InferenceTranspiler:
# TODO(luotao): use clone() method to flush the program.desc in force, # TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately. # since some large program.desc will not be flushed immediately.
# And a better solution will be considered later. # And a better solution will be considered later.
return program.clone() program = program.clone()
# ====================== private transpiler functions ===================== # ====================== private transpiler functions =====================
def _insert_bias_op(self, index, current_op, bn_op): def _insert_bias_op(self, index, current_op, bn_op):
...@@ -142,11 +153,25 @@ class InferenceTranspiler: ...@@ -142,11 +153,25 @@ class InferenceTranspiler:
:type with_bias: Int :type with_bias: Int
''' '''
def _load_tensor(param_name): def _update_param(op, old_param_name, new_param):
return self.scope.find_var(param_name[0]).get_tensor() # For the sake of remaining the original variables the same as before,
# create new variables in scope to store the new parameters.
old_param_name = old_param_name[0]
old_var = self.block.vars[old_param_name]
new_param_name = old_param_name + '_fuse_bn'
new_var = self.block.create_parameter(
name=new_param_name.encode('ascii'),
type=old_var.type,
dtype=old_var.dtype,
shape=old_var.shape)
op.rename_input(old_param_name, new_param_name)
self.scope.var(new_param_name)
tensor = self.scope.find_var(new_param_name).get_tensor()
tensor.set(np.array(new_param), self.place)
def _load_param(param_name): def _load_param(param_name):
return np.array(_load_tensor(param_name)) return np.array(self.scope.find_var(param_name[0]).get_tensor())
bias_bn = _load_param(bn_op.input("Bias")) #Bias bias_bn = _load_param(bn_op.input("Bias")) #Bias
scale_bn = _load_param(bn_op.input("Scale")) #Scale scale_bn = _load_param(bn_op.input("Scale")) #Scale
...@@ -155,8 +180,6 @@ class InferenceTranspiler: ...@@ -155,8 +180,6 @@ class InferenceTranspiler:
# TODO(luotao1): consider only conv2d now. fc would be delt later. # TODO(luotao1): consider only conv2d now. fc would be delt later.
current_param = _load_param(current_op.input("Filter")) current_param = _load_param(current_op.input("Filter"))
current_tensor = _load_tensor(current_op.input("Filter"))
std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5))) std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5)))
tmp = np.float32(np.divide(scale_bn, std_bn)) tmp = np.float32(np.divide(scale_bn, std_bn))
...@@ -167,8 +190,6 @@ class InferenceTranspiler: ...@@ -167,8 +190,6 @@ class InferenceTranspiler:
bias = np.zeros(bias_bn.shape) bias = np.zeros(bias_bn.shape)
bias = np.float32( bias = np.float32(
np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn)) np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
bias_tensor = _load_tensor(bias_op.input("Y"))
bias_tensor.set(bias, self.place)
# re-compute weight of conv2d # re-compute weight of conv2d
tmp = tmp.reshape(tmp.shape[0], -1) tmp = tmp.reshape(tmp.shape[0], -1)
...@@ -176,8 +197,9 @@ class InferenceTranspiler: ...@@ -176,8 +197,9 @@ class InferenceTranspiler:
dst_param = np.float32(np.multiply(dst_param, tmp)) dst_param = np.float32(np.multiply(dst_param, tmp))
dst_param = dst_param.reshape(current_param.shape) dst_param = dst_param.reshape(current_param.shape)
# set the updated parameters # update parameters
current_tensor.set(np.array(dst_param), self.place) _update_param(current_op, current_op.input("Filter"), dst_param)
_update_param(bias_op, bias_op.input("Y"), bias)
# collect the renamed input # collect the renamed input
self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0] self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]
......
...@@ -226,16 +226,17 @@ def infer(use_cuda, save_dirname=None): ...@@ -226,16 +226,17 @@ def infer(use_cuda, save_dirname=None):
batch_size = 1 batch_size = 1
tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32") tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
# Use inference_transpiler to speedup
inference_transpiler_program = inference_program.clone()
t = fluid.InferenceTranspiler()
t.transpile(inference_transpiler_program, inference_scope, place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data} # Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets. # and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program, results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img}, feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets) fetch_list=fetch_targets)
# Use inference_transpiler to speedup
t = fluid.InferenceTranspiler()
inference_transpiler_program = t.transpile(inference_program,
inference_scope, place)
transpiler_results = exe.run(inference_transpiler_program, transpiler_results = exe.run(inference_transpiler_program,
feed={feed_target_names[0]: tensor_img}, feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets) fetch_list=fetch_targets)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册