未验证 提交 2f8c7e02 编写于 作者: T Tao Luo 提交者: GitHub

remove unused inference_transpiler unit-tests (#19130)

* remove unused inference_transpiler unit-tests

test=develop

* remove InferenceTranspiler usage in quantize_transpiler.py

test=develop
上级 708bd979
...@@ -472,7 +472,7 @@ paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, ...@@ -472,7 +472,7 @@ paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None,
paddle.fluid.contrib.QuantizeTranspiler ('paddle.fluid.contrib.quantize.quantize_transpiler.QuantizeTranspiler', ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.QuantizeTranspiler ('paddle.fluid.contrib.quantize.quantize_transpiler.QuantizeTranspiler', ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size', 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9)), ('document', '14b39f1fcd5667ff556b1aad94357d1d')) paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size', 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd')) paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884')) paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5')) paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5'))
paddle.fluid.contrib.Compressor ('paddle.fluid.contrib.slim.core.compressor.Compressor', ('document', 'a5417774a94aa9ae5560a42b96527e7d')) paddle.fluid.contrib.Compressor ('paddle.fluid.contrib.slim.core.compressor.Compressor', ('document', 'a5417774a94aa9ae5560a42b96527e7d'))
......
...@@ -25,7 +25,6 @@ from paddle.fluid.layer_helper import LayerHelper ...@@ -25,7 +25,6 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.layers.nn import autoincreased_step_counter from paddle.fluid.layers.nn import autoincreased_step_counter
from paddle.fluid.framework import Variable from paddle.fluid.framework import Variable
from paddle.fluid.executor import global_scope from paddle.fluid.executor import global_scope
from paddle.fluid.transpiler.inference_transpiler import InferenceTranspiler
__all__ = ['QuantizeTranspiler'] __all__ = ['QuantizeTranspiler']
...@@ -221,7 +220,7 @@ class QuantizeTranspiler(object): ...@@ -221,7 +220,7 @@ class QuantizeTranspiler(object):
self.activation_quantize_type == 'range_abs_max': self.activation_quantize_type == 'range_abs_max':
self.global_step = autoincreased_step_counter() self.global_step = autoincreased_step_counter()
def freeze_program(self, program, place, fuse_bn=False, scope=None): def freeze_program(self, program, place, scope=None):
"""Freeze input training program for inference. """Freeze input training program for inference.
Args: Args:
...@@ -232,10 +231,6 @@ class QuantizeTranspiler(object): ...@@ -232,10 +231,6 @@ class QuantizeTranspiler(object):
scope = global_scope() if scope is None else scope scope = global_scope() if scope is None else scope
program = default_main_program() if program is None else program program = default_main_program() if program is None else program
if fuse_bn:
bn_fuse_transpiler = BNFuseTranspiler()
bn_fuse_transpiler.transpile(program, place)
persistable_vars = [ persistable_vars = [
v.name v.name
for v in filter(lambda var: var.persistable, program.list_vars()) for v in filter(lambda var: var.persistable, program.list_vars())
...@@ -564,58 +559,3 @@ class QuantizeTranspiler(object): ...@@ -564,58 +559,3 @@ class QuantizeTranspiler(object):
'Scale': scale}, 'Scale': scale},
outputs={"Out": dequant_var}) outputs={"Out": dequant_var})
return dequant_var return dequant_var
class BNFuseTranspiler(InferenceTranspiler):
def _fuse_param(self, current_op, bn_op, bias_op, with_bias):
def _update_param(op, param_name, new_param):
var = self.block.vars[param_name]
tensor = self.scope.find_var(param_name).get_tensor()
tensor.set(np.array(new_param), self.place)
def _load_param(param_name):
return np.array(self.scope.find_var(param_name).get_tensor())
bias_bn = _load_param(bn_op.input("Bias")[0]) #Bias
scale_bn = _load_param(bn_op.input("Scale")[0]) #Scale
mean_bn = _load_param(bn_op.input("Mean")[0]) #Mean
var_bn = _load_param(bn_op.input("Variance")[0]) #Variance
if current_op.type in ['conv2d', 'depthwise_conv2d']:
current_param = _load_param(
_original_var_name(current_op.input("Filter")[0]))
elif current_op.type == 'mul':
current_param = _load_param(
_original_var_name(current_op.input("Y")[0]))
std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5)))
tmp = np.float32(np.divide(scale_bn, std_bn))
# add bias of batch_norm_op to conv2d
if with_bias:
bias = _load_param(bias_op.input("Y"))
else:
bias = np.zeros(bias_bn.shape)
bias = np.float32(
np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
# re-compute weight of conv2d/fc
tmp = tmp.reshape(tmp.shape[0], -1)
dst_param = current_param.reshape((tmp.shape[0], -1))
dst_param = np.float32(np.multiply(dst_param, tmp))
dst_param = dst_param.reshape(current_param.shape)
# update parameters
if current_op.type in ['conv2d', 'depthwise_conv2d']:
_update_param(current_op,
_original_var_name(current_op.input("Filter")[0]),
dst_param)
elif current_op.type == 'mul':
_update_param(current_op,
_original_var_name(current_op.input("Y")[0]),
dst_param)
_update_param(bias_op, bias_op.input("Y")[0], bias)
# collect the renamed input
self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]
...@@ -242,31 +242,16 @@ def infer(use_cuda, save_dirname=None): ...@@ -242,31 +242,16 @@ def infer(use_cuda, save_dirname=None):
batch_size = 1 batch_size = 1
tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32") tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
# Use inference_transpiler to speedup
inference_transpiler_program = inference_program.clone()
t = fluid.transpiler.InferenceTranspiler()
t.transpile(inference_transpiler_program, place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data} # Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets. # and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program, results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img}, feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets) fetch_list=fetch_targets)
transpiler_results = exe.run(inference_transpiler_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
assert len(results[0]) == len(transpiler_results[0])
for i in range(len(results[0])):
np.testing.assert_almost_equal(
results[0][i], transpiler_results[0][i], decimal=4)
print("infer results: ", results[0]) print("infer results: ", results[0])
fluid.io.save_inference_model(save_dirname, feed_target_names, fluid.io.save_inference_model(save_dirname, feed_target_names,
fetch_targets, exe, fetch_targets, exe, inference_program)
inference_transpiler_program)
def main(net_type, use_cuda, is_local=True): def main(net_type, use_cuda, is_local=True):
......
...@@ -221,31 +221,16 @@ def infer(use_cuda, save_dirname=None): ...@@ -221,31 +221,16 @@ def infer(use_cuda, save_dirname=None):
batch_size = 1 batch_size = 1
tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32") tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")
# Use inference_transpiler to speedup
inference_transpiler_program = inference_program.clone()
t = fluid.transpiler.InferenceTranspiler()
t.transpile(inference_transpiler_program, place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data} # Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets. # and results will contain a list of data corresponding to fetch_targets.
results = exe.run(inference_program, results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img}, feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets) fetch_list=fetch_targets)
transpiler_results = exe.run(inference_transpiler_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
assert len(results[0]) == len(transpiler_results[0])
for i in range(len(results[0])):
np.testing.assert_almost_equal(
results[0][i], transpiler_results[0][i], decimal=4)
print("infer results: ", results[0]) print("infer results: ", results[0])
fluid.io.save_inference_model(save_dirname, feed_target_names, fluid.io.save_inference_model(save_dirname, feed_target_names,
fetch_targets, exe, fetch_targets, exe, inference_program)
inference_transpiler_program)
def main(net_type, use_cuda, is_local=True): def main(net_type, use_cuda, is_local=True):
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
from __future__ import print_function from __future__ import print_function
from .distribute_transpiler import DistributeTranspiler, DistributeTranspilerConfig from .distribute_transpiler import DistributeTranspiler, DistributeTranspilerConfig
from .inference_transpiler import InferenceTranspiler
from .memory_optimization_transpiler import memory_optimize, release_memory from .memory_optimization_transpiler import memory_optimize, release_memory
from .ps_dispatcher import HashName, RoundRobin from .ps_dispatcher import HashName, RoundRobin
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import numpy as np
from .. import core
from ..framework import Program
from ..executor import global_scope
class InferenceTranspiler(object):
'''
Convert the fluid program to optimized inference program.
There are several optimizations:
- fuse convolution and batch normalization
- fuse batch normalization and relu (MKLDNN only)
Examples:
.. code-block:: python
# As InferenceTranspiler will modify the original program,
# please clone before use it.
inference_transpiler_program = program.clone()
t = fluid.InferenceTranspiler()
t.transpile(inference_transpiler_program, place)
'''
def transpile(self, program, place, scope=None):
'''
Run the transpiler.
Args:
program (Program): program to transpile
place (Place): inference place
scope (Scope|None): inference Scope
'''
sys.stderr.write("InferenceTranspiler is deprecated since it's not "
"safe. Users should be "
"responsible for constructing the inference program\n")
if not isinstance(program, Program):
raise TypeError("program should be as Program type")
if not isinstance(place, core.CPUPlace) and not isinstance(
place, core.CUDAPlace):
raise TypeError("place should be as CPUPlace/CUDAPlace type")
if scope is None:
scope = global_scope()
if not isinstance(scope, core._Scope):
raise TypeError("scope should be as Scope type or None")
use_mkldnn = bool(os.getenv("FLAGS_use_mkldnn", False))
if use_mkldnn:
self._depthwise_conv_mkldnn(program)
self._fuse_batch_norm(program, place, scope)
if use_mkldnn:
self._fuse_conv_bias_mkldnn(program)
self._fuse_conv_relu_mkldnn(program)
self._fuse_conv_eltwise_mkldnn(program)
self._fuse_conv_relu_mkldnn(
program) # ResNet residual block merging
self._fuse_bn_relu_mkldnn(program)
self._fuse_mul_add_mkldnn(program)
self._is_test_pass(program)
def _is_test_pass(self, program):
'''
Transpile the program setting is_test = true for all layers and
inserts is_test attribute to pooling and activation layers.
As a result some operators might run faster
:param program: program to transpile
:type program: Program
'''
self.block = program.block(0)
i = 0
while i < len(self.block.ops):
current_op = self.block.ops[i]
if current_op.has_attr("is_test"):
current_op._set_attr("is_test", True)
elif current_op.type in [
"pool2d", "sigmoid", "logsigmoid", "softshrink", "exp",
"brelu", "pow", "leaky_relu", "stanh", "relu", "tanh",
"tanh_shrink", "sqrt", "abs", "ceil", "elu", "floor", "cos",
"sin", "round", "reciprocal", "hard_shrink", "hard_sigmoid",
"relu6", "soft_relu", "swish", "thresholded_relu", "log",
"square", "softplus", "softsign"
]:
current_op._set_attr("is_test", True)
i = i + 1
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _depthwise_conv_mkldnn(self, program):
'''
Transpile the program by replacing depthwise_conv2d to conv2d for MKLDNN program.
The result is:
- before:
- any_other_op->depthwise_conv->any_other_op
- after:
- any_other_op->conv->any_other_op
:param program: program to transpile
:type program: Program
'''
self.block = program.block(0)
i = 0
while i < len(self.block.ops):
current_op = self.block.ops[i]
if current_op.type == 'depthwise_conv2d':
current_op.desc.set_type("conv2d")
i = i + 1
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _fuse_conv_eltwise_mkldnn(self, program):
'''
Transpile the program fusing elementwise_add into conv for MKLDNN
program. Elementwise add following convolution OP can be fused by adding
'fuse_residual_connection' attribute to convolution OP and replacing its output
Tensor with second parameter of elementwise_add.
The result of fuse is:
- before:
- conv->elementwise_add->any_other_op
- after:
- conv->any_other_op
:param program: program to transpile
:type program: Program
'''
self.block = program.block(0)
i = 0
while i < len(self.block.ops):
current_op = self.block.ops[i]
if current_op.type in ['conv2d']:
next_op = self.block.ops[i + 1]
if next_op.type == 'elementwise_add':
self._fuse_conv_eltwise(i, current_op, next_op)
self.block._remove_op(i + 1) # Remove old conv
self.block._remove_op(i + 1) # Remove elementwise_add
i = i + 1
self._adjust_input()
self._remove_unused_var()
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _fuse_conv_relu_mkldnn(self, program):
'''
Transpile the program by fused relu activation for MKLDNN program.
Relu activation following convolution OP can be fused by adding
'fuse_relu' attribute to convolution OP.
The result of fuse is:
- before:
- conv->relu->any_other_op
- after:
- conv->any_other_op
:param program: program to transpile
:type program: Program
'''
self.block = program.block(0)
i = 0
while i < len(self.block.ops):
current_op = self.block.ops[i]
if current_op.type in ['conv2d']:
next_op = self.block.ops[i + 1]
if next_op.type == 'relu':
# modify bnorm OP to include relu
current_op._set_attr("fuse_relu", True)
# remove relu OP
self.block._remove_op(i + 1)
i = i + 1
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _fuse_bn_relu_mkldnn(self, program):
'''
Transpile the program by fused relu activation for MKLDNN program.
Relu activation following batch norm OP can be fused by adding
:math:`fuse_with_relu` attribute to batch norm OP.
The result of fuse is:
- before:
- batch_norm->relu->any_other_op
- after:
- batch_norm->any_other_op
:param program: program to transpile
:type program: Program
'''
self.block = program.block(0)
i = 0
while i < len(self.block.ops) - 1:
current_op = self.block.ops[i]
if current_op.type in ['batch_norm']:
next_op = self.block.ops[i + 1]
if next_op.type == 'relu':
# modify bnorm OP to include relu
current_op._set_attr("fuse_with_relu", True)
# remove relu OP
self.block._remove_op(i + 1)
i = i + 1
self._remove_unused_var()
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _fuse_conv_bias_mkldnn(self, program):
'''
Transpile the program by fused convolution and elementwise_add.
Replace conv2d and elementwise_add ops with a new conv2d op
based on an old conv2d op and the :math:`Bias` taken from
elementwise_add.
For input :math:`X`:
- Conv process: :math:`X = input * W`
- Elementwise_add process: :math` X = X + bias`
After fuse into one operation:
.. math::
X = input * W + bias
The operator transformation is:
- before:
- conv->elementwise_add->any_other_op
- after:
- conv->any_other_op
The transpile stages are:
1. Extract bias and output variables from elementwise_add.
2. Extract Input, Weight and attributes from conv op.
3. Create a new convolution op based on extracted params.
4. Remove old conv op.
5. Remove elementwise_add.
5. Remove unused variables.
Args:
program (Program): program to transpile
'''
self.block = program.block(0)
i = 0
while i < len(self.block.ops) - 2:
current_op = self.block.ops[i]
next_op = self.block.ops[i + 1]
# conv2d with bias
if current_op.type in ['conv2d'] and \
next_op.type in ['elementwise_add']:
self._fuse_conv_bias(i, current_op, next_op)
self.block._remove_op(i + 1) # Remove old conv
self.block._remove_op(i + 1) # Remove elementwise_add
i = i + 1
self._remove_unused_var()
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _fuse_batch_norm(self, program, place, scope):
'''
Transpile the program by fused batch normalization.
The batch normalization followed the convolution or fully connected layer
can be integrated with them. Doing so will give us a forward acceleration,
especially in environments like mobile or embedded.
For input :math:`X`:
- Conv process: :math:`X = input * W + bias`
- Batch norm process: :math:`X' = (X - mean) / std`
- Scale Process: :math:`Y = a * X' + b`
After fuse into one operation:
.. math::
Y &= (input * W + bias - mean) / std * a + b \\\\
&= input * a * W / std + ((bias - mean) / std * a + b)
The operator transformation is:
- before:
- conv->batch_norm->any_other_op (bias == 0)
- conv->elementwise_add->batch_norm->any_other_op (bias != 0)
- after:
- conv->elementwise_add->any_other_op
The transpile stages are:
1. insert elementwise_add op when bias == 0.
2. fuse the batch_norm's parameters to conv and elementwise_add operators.
3. remove batch_norm ops which are not used in any other ops.
4. adjust the input of any_other_op to be the output of elementwise_add operator.
5. remove unused variables.
Args:
program (Program): program to transpile
place (Place): inference place
scope (Scope): inference Scope
'''
self.scope = scope
self.place = place
self.block = program.block(0)
self.input_map = {} # store the input names should be adjusted
i = 0
while i < len(self.block.ops) - 2:
current_op = self.block.ops[i]
# TODO(luotao1): consider only conv2d now. fc would be delt later.
if current_op.type in ['conv2d']:
# TODO(luotao1): consider single chain network now.
# For branch network, we counldn't use block.ops[i + 1] as
# the judgment condition.
next_op = self.block.ops[i + 1]
# conv2d without bias
if (next_op.type == 'batch_norm'):
# insert bias op
bias_op = self._insert_bias_op(i + 1, current_op, next_op)
# fuse batch_norm
self._fuse_param(current_op, next_op, bias_op, 0)
# remove batch_norm_op
self.block._remove_op(i + 2)
i = i + 1
# conv2d with bias, the next_op.type is elementwise_add
elif (next_op.type == 'elementwise_add'):
next_next_op = self.block.ops[i + 2]
if (next_next_op.type == 'batch_norm'):
# fuse batch_norm
self._fuse_param(current_op, next_next_op, next_op, 1)
# remove batch_norm_op
self.block._remove_op(i + 2)
i = i + 1
i = i + 1
self._adjust_input()
self._remove_unused_var()
# TODO(luotao): use clone() method to flush the program.desc in force,
# since some large program.desc will not be flushed immediately.
# And a better solution will be considered later.
program = program.clone()
def _fuse_mul_add_mkldnn(self, program):
'''
Transpile the program by fusing Mul+Add layers to FC layer with the MKL-DNN inner product.
The MUL following a Elementwise_add layer can be replaced by the MKL-DNN FC.
The Elementwise add's bias input 'Y' has to be added into the
MKL-DNN-based FC input 'Bias'.
The operator transformation is:
- before:
- MUL->elementwise_add -> any_other_op
- after:
- FC -> any_other_op
The transpile stages are:
1. insert a new MKL-DNN-based FC operator with `Bias` input
taken from the Elementwise add's input 'Y' (bias),
2. fuse the parameters of MUL and Elemenwise add,
3. remove the MUL, elementwise_add operators,
4. make the input of the deleted Elementwise add operator to be the input of the
new FC operator,
5. remove unused variables,
Args:
program (Program): program to transpile
'''
self.block = program.block(0)
self.input_map = {} # store the input names should be adjusted
i = 0
while i < len(self.block.ops):
# find a elementwise add op
if self.block.ops[i].type == 'elementwise_add':
add_op = self.block.ops[i]
add_idx = i
mul_idx = -1
# find the preceding mul op
for j in reversed(range(add_idx)):
if self.block.ops[j].type == 'mul':
mul_out_name = self.block.ops[j].output_arg_names[0]
if self.block.ops[j].output_arg_names[
0] in add_op.input_arg_names:
mul_op = self.block.ops[j]
mul_idx = j
break
if mul_idx < 0:
i += 1
continue
# create and insert a new fc op
fc_op_new = self._insert_fc_op(add_idx + 1, mul_op, add_op)
# remove the old operators
self.block._remove_op(add_idx)
self.block._remove_op(mul_idx)
# restart scanning for elementwise add from the deleted mul's index
i = mul_idx
i += 1
self._adjust_input()
self._remove_unused_var()
program = program.clone()
# ====================== private transpiler functions =====================
def _insert_bias_op(self, index, current_op, bn_op):
'''
Construct elementwise_add operator for adding bias
and insert it into program.
:param index: insert location of bias_op
:type index: Int
:param current_op: current operator (conv or fc)
:type current_op: Operator
:param bn_op: batch norm operator
:type bn_op: Operator
:return: bias_op
:rtype: Operator
'''
# The input of bias_op is current_op's output and Bias of bn_op
# The output of bias_op is bn_op's output
x_var = self.block.var(current_op.output("Output")[0])
y_var = self.block.var(bn_op.input("Bias")[0])
out_var = self.block.var(bn_op.output("Y")[0])
bias_op = self.block._insert_op(
index,
type="elementwise_add",
inputs={"X": x_var,
"Y": y_var},
outputs={"Out": out_var},
attrs={"axis": 1}) # dim_start=1
return bias_op
def _fuse_param(self, current_op, bn_op, bias_op, with_bias):
'''
fuse the batch_norm_op' parameters to current_op (conv or fc)
:param current_op: current operator (conv or fc)
:type current_op: Operator
:param bn_op: batch norm operator
:type bn_op: Operator
:param bias_op: elementwise_add operator for adding bias
:type bias_op: Operator
:param with_bias: If current operator has bias, with_bias = 1; otherwise 0.
:type with_bias: Int
'''
def _update_param(op, old_param_name, new_param):
# For the sake of remaining the original variables the same as before,
# create new variables in scope to store the new parameters.
old_param_name = old_param_name[0]
old_var = self.block.vars[old_param_name]
new_param_name = old_param_name + '_fuse_bn'
new_var = self.block.create_parameter(
name=new_param_name.encode('ascii'),
type=old_var.type,
dtype=old_var.dtype,
shape=old_var.shape)
op._rename_input(old_param_name, new_param_name)
self.scope.var(new_param_name)
tensor = self.scope.find_var(new_param_name).get_tensor()
tensor.set(np.array(new_param), self.place)
def _load_param(param_name):
return np.array(self.scope.find_var(param_name[0]).get_tensor())
bias_bn = _load_param(bn_op.input("Bias")) #Bias
scale_bn = _load_param(bn_op.input("Scale")) #Scale
mean_bn = _load_param(bn_op.input("Mean")) #Mean
var_bn = _load_param(bn_op.input("Variance")) #Variance
# TODO(luotao1): consider only conv2d now. fc would be delt later.
current_param = _load_param(current_op.input("Filter"))
std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5)))
tmp = np.float32(np.divide(scale_bn, std_bn))
# add bias of batch_norm_op to conv2d
if with_bias:
bias = _load_param(bias_op.input("Y"))
else:
bias = np.zeros(bias_bn.shape)
bias = np.float32(
np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn))
# re-compute weight of conv2d
tmp = tmp.reshape(tmp.shape[0], -1)
dst_param = current_param.reshape((tmp.shape[0], -1))
dst_param = np.float32(np.multiply(dst_param, tmp))
dst_param = dst_param.reshape(current_param.shape)
# update parameters
_update_param(current_op, current_op.input("Filter"), dst_param)
_update_param(bias_op, bias_op.input("Y"), bias)
# collect the renamed input
self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0]
def _fuse_conv_bias(self, index, conv_op, elementwise_add_op):
'''
fuse the conv op with elementwise_add
:param index: index of the conv_op in ops list
:type index: Int
:param conv_op: convolution operator
:type conv_op: Operator
:param elementwise_add_op: convolution's bias operator
:type elementwise_add_op: Operator
'''
bias_var = self.block.var(elementwise_add_op.input("Y")[0])
out_var = self.block.var(elementwise_add_op.output("Out")[0])
filter_var = self.block.var(conv_op.input("Filter")[0])
in_var = self.block.var(conv_op.input("Input")[0])
attrs = {name: conv_op.attr(name) for name in conv_op.attr_names}
self.block._insert_op(
index,
type="conv2d",
inputs={"Input": in_var,
"Filter": filter_var,
"Bias": bias_var},
outputs={"Output": out_var},
attrs=attrs)
def _insert_fc_op(self, index, mul_op, add_op):
'''
Construct a new FC operator by copying the old Mul and adding the
'Y' input taken from the Elementwise add's input 'Y'.
:param index: insert location of FC
:type index: Int
:param mul_op: MUL operator to be copied
:type mul_op: Operator
:param add_op: Elementwise add operator taken bias from
:type add_op: Operator
:return: fc_op_new
:type: Operator
'''
def get_op_outputs(op, names):
result = {}
for name in names:
result[name] = self.block.var(op.output(name)[0])
return result
fc_inputs = {}
fc_inputs['Input'] = self.block.var(mul_op.input('X')[0])
fc_inputs['W'] = self.block.var(mul_op.input('Y')[0])
fc_inputs['Bias'] = self.block.var(add_op.input('Y')[0])
fc_outputs = get_op_outputs(add_op, ['Out'])
fc_attrs = {}
fc_attrs['use_mkldnn'] = True
fc_op_new = self.block._insert_op(
index,
type='fc',
inputs=fc_inputs,
outputs=fc_outputs,
attrs=fc_attrs)
return fc_op_new
def _fuse_conv_eltwise(self, index, conv_op, eltwise_op):
'''
fuse the conv op with elementwise_add
:param conv_op: convolution operator
:type conv_op: Operator
:param eltwise_op: operator adding data from skip connection
:type eltwise_op: Operator
'''
eltwise_input = "X"
if eltwise_op.input("X")[0] == conv_op.output("Output")[0]:
eltwise_input = "Y"
residual_var = self.block.vars[eltwise_op.input(eltwise_input)[0]]
out_var = self.block.vars[eltwise_op.output("Out")[0]]
filter_var = self.block.vars[conv_op.input("Filter")[0]]
in_var = self.block.vars[conv_op.input("Input")[0]]
bias_var = self.block.vars[conv_op.input("Bias")[0]]
conv_op._set_attr("fuse_residual_connection", True)
attrs = {name: conv_op.attr(name) for name in conv_op.attr_names}
self.block._insert_op(
index,
type="conv2d",
inputs={
"Input": in_var,
"Filter": filter_var,
"Bias": bias_var,
"ResidualData": residual_var
},
outputs={"Output": out_var},
attrs=attrs)
def _adjust_input(self):
for i in range(len(self.block.ops)):
current_op = self.block.ops[i]
for input_arg in current_op.input_arg_names:
if input_arg in self.input_map:
current_op._rename_input(input_arg,
self.input_map[input_arg])
def _remove_unused_var(self):
'''
remove unused varibles in program
'''
args = []
for i in range(len(self.block.ops)):
current_op = self.block.ops[i]
args += current_op.input_arg_names
args += current_op.output_arg_names
args = list(set(args)) # unique the input and output arguments
for var in list(self.block.vars.keys()):
if var not in args:
self.block._remove_var(var)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册