diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index d14167bdf7603187d058b242c138d4446b644e30..ecc659152169182b002e8b7af60ae78fc9f7854d 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -472,7 +472,7 @@ paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, paddle.fluid.contrib.QuantizeTranspiler ('paddle.fluid.contrib.quantize.quantize_transpiler.QuantizeTranspiler', ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size', 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9)), ('document', '14b39f1fcd5667ff556b1aad94357d1d')) paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd')) +paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '909675a1ab055c69b436a7893fcae4fd')) paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884')) paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5')) paddle.fluid.contrib.Compressor ('paddle.fluid.contrib.slim.core.compressor.Compressor', ('document', 'a5417774a94aa9ae5560a42b96527e7d')) diff --git a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py index 8eddf18cece50fd7bc6db31294d078fe6a5b95cd..471a796eb3e0a75a1fa0a9eb28499c9b168a3ee3 100644 --- a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py +++ b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py @@ -25,7 +25,6 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers.nn import autoincreased_step_counter from paddle.fluid.framework import Variable from paddle.fluid.executor import global_scope -from paddle.fluid.transpiler.inference_transpiler import InferenceTranspiler __all__ = ['QuantizeTranspiler'] @@ -221,7 +220,7 @@ class QuantizeTranspiler(object): self.activation_quantize_type == 'range_abs_max': self.global_step = autoincreased_step_counter() - def freeze_program(self, program, place, fuse_bn=False, scope=None): + def freeze_program(self, program, place, scope=None): """Freeze input training program for inference. Args: @@ -232,10 +231,6 @@ class QuantizeTranspiler(object): scope = global_scope() if scope is None else scope program = default_main_program() if program is None else program - if fuse_bn: - bn_fuse_transpiler = BNFuseTranspiler() - bn_fuse_transpiler.transpile(program, place) - persistable_vars = [ v.name for v in filter(lambda var: var.persistable, program.list_vars()) @@ -564,58 +559,3 @@ class QuantizeTranspiler(object): 'Scale': scale}, outputs={"Out": dequant_var}) return dequant_var - - -class BNFuseTranspiler(InferenceTranspiler): - def _fuse_param(self, current_op, bn_op, bias_op, with_bias): - def _update_param(op, param_name, new_param): - var = self.block.vars[param_name] - tensor = self.scope.find_var(param_name).get_tensor() - tensor.set(np.array(new_param), self.place) - - def _load_param(param_name): - return np.array(self.scope.find_var(param_name).get_tensor()) - - bias_bn = _load_param(bn_op.input("Bias")[0]) #Bias - scale_bn = _load_param(bn_op.input("Scale")[0]) #Scale - mean_bn = _load_param(bn_op.input("Mean")[0]) #Mean - var_bn = _load_param(bn_op.input("Variance")[0]) #Variance - - if current_op.type in ['conv2d', 'depthwise_conv2d']: - current_param = _load_param( - _original_var_name(current_op.input("Filter")[0])) - elif current_op.type == 'mul': - current_param = _load_param( - _original_var_name(current_op.input("Y")[0])) - - std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5))) - tmp = np.float32(np.divide(scale_bn, std_bn)) - - # add bias of batch_norm_op to conv2d - if with_bias: - bias = _load_param(bias_op.input("Y")) - else: - bias = np.zeros(bias_bn.shape) - bias = np.float32( - np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn)) - - # re-compute weight of conv2d/fc - tmp = tmp.reshape(tmp.shape[0], -1) - dst_param = current_param.reshape((tmp.shape[0], -1)) - dst_param = np.float32(np.multiply(dst_param, tmp)) - dst_param = dst_param.reshape(current_param.shape) - - # update parameters - if current_op.type in ['conv2d', 'depthwise_conv2d']: - _update_param(current_op, - _original_var_name(current_op.input("Filter")[0]), - dst_param) - elif current_op.type == 'mul': - _update_param(current_op, - _original_var_name(current_op.input("Y")[0]), - dst_param) - - _update_param(bias_op, bias_op.input("Y")[0], bias) - - # collect the renamed input - self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0] diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index a2e700803dcf3a2da5b7f1e15b68fb8b274a939a..bde77b3d316b555d82a76d32a9f8b2f0724d203e 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -242,31 +242,16 @@ def infer(use_cuda, save_dirname=None): batch_size = 1 tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32") - # Use inference_transpiler to speedup - inference_transpiler_program = inference_program.clone() - t = fluid.transpiler.InferenceTranspiler() - t.transpile(inference_transpiler_program, place) - # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. results = exe.run(inference_program, feed={feed_target_names[0]: tensor_img}, fetch_list=fetch_targets) - transpiler_results = exe.run(inference_transpiler_program, - feed={feed_target_names[0]: tensor_img}, - fetch_list=fetch_targets) - - assert len(results[0]) == len(transpiler_results[0]) - for i in range(len(results[0])): - np.testing.assert_almost_equal( - results[0][i], transpiler_results[0][i], decimal=4) - print("infer results: ", results[0]) fluid.io.save_inference_model(save_dirname, feed_target_names, - fetch_targets, exe, - inference_transpiler_program) + fetch_targets, exe, inference_program) def main(net_type, use_cuda, is_local=True): diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index c91bd27895dcd8bf04cb62d7521f0fa9496fe7d9..95d71d72c156484eddc4eaf26aaa61bb5a93b1b1 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -221,31 +221,16 @@ def infer(use_cuda, save_dirname=None): batch_size = 1 tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32") - # Use inference_transpiler to speedup - inference_transpiler_program = inference_program.clone() - t = fluid.transpiler.InferenceTranspiler() - t.transpile(inference_transpiler_program, place) - # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. results = exe.run(inference_program, feed={feed_target_names[0]: tensor_img}, fetch_list=fetch_targets) - transpiler_results = exe.run(inference_transpiler_program, - feed={feed_target_names[0]: tensor_img}, - fetch_list=fetch_targets) - - assert len(results[0]) == len(transpiler_results[0]) - for i in range(len(results[0])): - np.testing.assert_almost_equal( - results[0][i], transpiler_results[0][i], decimal=4) - print("infer results: ", results[0]) fluid.io.save_inference_model(save_dirname, feed_target_names, - fetch_targets, exe, - inference_transpiler_program) + fetch_targets, exe, inference_program) def main(net_type, use_cuda, is_local=True): diff --git a/python/paddle/fluid/transpiler/__init__.py b/python/paddle/fluid/transpiler/__init__.py index c9a8176a72fb744963ae466e965a25bdfb0a44de..c5d2502ddbb4afa1dba1f97e8867174469382abe 100644 --- a/python/paddle/fluid/transpiler/__init__.py +++ b/python/paddle/fluid/transpiler/__init__.py @@ -15,7 +15,6 @@ from __future__ import print_function from .distribute_transpiler import DistributeTranspiler, DistributeTranspilerConfig -from .inference_transpiler import InferenceTranspiler from .memory_optimization_transpiler import memory_optimize, release_memory from .ps_dispatcher import HashName, RoundRobin diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py deleted file mode 100644 index 8917fb75128f5a9fb6f40f4a6520223693840573..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ /dev/null @@ -1,661 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import os -import sys -import numpy as np -from .. import core -from ..framework import Program -from ..executor import global_scope - - -class InferenceTranspiler(object): - ''' - Convert the fluid program to optimized inference program. - - There are several optimizations: - - - fuse convolution and batch normalization - - fuse batch normalization and relu (MKLDNN only) - - Examples: - - .. code-block:: python - - # As InferenceTranspiler will modify the original program, - # please clone before use it. - inference_transpiler_program = program.clone() - t = fluid.InferenceTranspiler() - t.transpile(inference_transpiler_program, place) - ''' - - def transpile(self, program, place, scope=None): - ''' - Run the transpiler. - - Args: - program (Program): program to transpile - place (Place): inference place - scope (Scope|None): inference Scope - ''' - sys.stderr.write("InferenceTranspiler is deprecated since it's not " - "safe. Users should be " - "responsible for constructing the inference program\n") - if not isinstance(program, Program): - raise TypeError("program should be as Program type") - if not isinstance(place, core.CPUPlace) and not isinstance( - place, core.CUDAPlace): - raise TypeError("place should be as CPUPlace/CUDAPlace type") - if scope is None: - scope = global_scope() - if not isinstance(scope, core._Scope): - raise TypeError("scope should be as Scope type or None") - use_mkldnn = bool(os.getenv("FLAGS_use_mkldnn", False)) - - if use_mkldnn: - self._depthwise_conv_mkldnn(program) - - self._fuse_batch_norm(program, place, scope) - if use_mkldnn: - self._fuse_conv_bias_mkldnn(program) - self._fuse_conv_relu_mkldnn(program) - self._fuse_conv_eltwise_mkldnn(program) - self._fuse_conv_relu_mkldnn( - program) # ResNet residual block merging - self._fuse_bn_relu_mkldnn(program) - self._fuse_mul_add_mkldnn(program) - - self._is_test_pass(program) - - def _is_test_pass(self, program): - ''' - Transpile the program setting is_test = true for all layers and - inserts is_test attribute to pooling and activation layers. - As a result some operators might run faster - :param program: program to transpile - :type program: Program - ''' - self.block = program.block(0) - - i = 0 - while i < len(self.block.ops): - current_op = self.block.ops[i] - if current_op.has_attr("is_test"): - current_op._set_attr("is_test", True) - elif current_op.type in [ - "pool2d", "sigmoid", "logsigmoid", "softshrink", "exp", - "brelu", "pow", "leaky_relu", "stanh", "relu", "tanh", - "tanh_shrink", "sqrt", "abs", "ceil", "elu", "floor", "cos", - "sin", "round", "reciprocal", "hard_shrink", "hard_sigmoid", - "relu6", "soft_relu", "swish", "thresholded_relu", "log", - "square", "softplus", "softsign" - ]: - current_op._set_attr("is_test", True) - i = i + 1 - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _depthwise_conv_mkldnn(self, program): - ''' - Transpile the program by replacing depthwise_conv2d to conv2d for MKLDNN program. - The result is: - - before: - - any_other_op->depthwise_conv->any_other_op - - after: - - any_other_op->conv->any_other_op - :param program: program to transpile - :type program: Program - ''' - self.block = program.block(0) - - i = 0 - while i < len(self.block.ops): - current_op = self.block.ops[i] - if current_op.type == 'depthwise_conv2d': - current_op.desc.set_type("conv2d") - i = i + 1 - - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _fuse_conv_eltwise_mkldnn(self, program): - ''' - Transpile the program fusing elementwise_add into conv for MKLDNN - program. Elementwise add following convolution OP can be fused by adding - 'fuse_residual_connection' attribute to convolution OP and replacing its output - Tensor with second parameter of elementwise_add. - The result of fuse is: - - before: - - conv->elementwise_add->any_other_op - - after: - - conv->any_other_op - :param program: program to transpile - :type program: Program - ''' - self.block = program.block(0) - - i = 0 - while i < len(self.block.ops): - current_op = self.block.ops[i] - if current_op.type in ['conv2d']: - next_op = self.block.ops[i + 1] - if next_op.type == 'elementwise_add': - self._fuse_conv_eltwise(i, current_op, next_op) - self.block._remove_op(i + 1) # Remove old conv - self.block._remove_op(i + 1) # Remove elementwise_add - i = i + 1 - self._adjust_input() - self._remove_unused_var() - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _fuse_conv_relu_mkldnn(self, program): - ''' - Transpile the program by fused relu activation for MKLDNN program. - Relu activation following convolution OP can be fused by adding - 'fuse_relu' attribute to convolution OP. - The result of fuse is: - - before: - - conv->relu->any_other_op - - after: - - conv->any_other_op - :param program: program to transpile - :type program: Program - ''' - self.block = program.block(0) - - i = 0 - while i < len(self.block.ops): - current_op = self.block.ops[i] - if current_op.type in ['conv2d']: - next_op = self.block.ops[i + 1] - if next_op.type == 'relu': - # modify bnorm OP to include relu - current_op._set_attr("fuse_relu", True) - # remove relu OP - self.block._remove_op(i + 1) - i = i + 1 - - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _fuse_bn_relu_mkldnn(self, program): - ''' - Transpile the program by fused relu activation for MKLDNN program. - - Relu activation following batch norm OP can be fused by adding - :math:`fuse_with_relu` attribute to batch norm OP. - - The result of fuse is: - - - before: - - - batch_norm->relu->any_other_op - - - after: - - - batch_norm->any_other_op - - :param program: program to transpile - :type program: Program - ''' - self.block = program.block(0) - - i = 0 - while i < len(self.block.ops) - 1: - current_op = self.block.ops[i] - if current_op.type in ['batch_norm']: - next_op = self.block.ops[i + 1] - if next_op.type == 'relu': - # modify bnorm OP to include relu - current_op._set_attr("fuse_with_relu", True) - # remove relu OP - self.block._remove_op(i + 1) - i = i + 1 - - self._remove_unused_var() - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _fuse_conv_bias_mkldnn(self, program): - ''' - Transpile the program by fused convolution and elementwise_add. - - Replace conv2d and elementwise_add ops with a new conv2d op - based on an old conv2d op and the :math:`Bias` taken from - elementwise_add. - - For input :math:`X`: - - - Conv process: :math:`X = input * W` - - Elementwise_add process: :math` X = X + bias` - - After fuse into one operation: - - .. math:: - - X = input * W + bias - - The operator transformation is: - - - before: - - - conv->elementwise_add->any_other_op - - - after: - - - conv->any_other_op - - The transpile stages are: - - 1. Extract bias and output variables from elementwise_add. - 2. Extract Input, Weight and attributes from conv op. - 3. Create a new convolution op based on extracted params. - 4. Remove old conv op. - 5. Remove elementwise_add. - 5. Remove unused variables. - - Args: - program (Program): program to transpile - - ''' - self.block = program.block(0) - - i = 0 - while i < len(self.block.ops) - 2: - current_op = self.block.ops[i] - next_op = self.block.ops[i + 1] - # conv2d with bias - if current_op.type in ['conv2d'] and \ - next_op.type in ['elementwise_add']: - self._fuse_conv_bias(i, current_op, next_op) - self.block._remove_op(i + 1) # Remove old conv - self.block._remove_op(i + 1) # Remove elementwise_add - i = i + 1 - - self._remove_unused_var() - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _fuse_batch_norm(self, program, place, scope): - ''' - Transpile the program by fused batch normalization. - - The batch normalization followed the convolution or fully connected layer - can be integrated with them. Doing so will give us a forward acceleration, - especially in environments like mobile or embedded. - - For input :math:`X`: - - - Conv process: :math:`X = input * W + bias` - - Batch norm process: :math:`X' = (X - mean) / std` - - Scale Process: :math:`Y = a * X' + b` - - After fuse into one operation: - - .. math:: - - Y &= (input * W + bias - mean) / std * a + b \\\\ - &= input * a * W / std + ((bias - mean) / std * a + b) - - The operator transformation is: - - - before: - - - conv->batch_norm->any_other_op (bias == 0) - - conv->elementwise_add->batch_norm->any_other_op (bias != 0) - - - after: - - - conv->elementwise_add->any_other_op - - The transpile stages are: - - 1. insert elementwise_add op when bias == 0. - 2. fuse the batch_norm's parameters to conv and elementwise_add operators. - 3. remove batch_norm ops which are not used in any other ops. - 4. adjust the input of any_other_op to be the output of elementwise_add operator. - 5. remove unused variables. - - Args: - program (Program): program to transpile - place (Place): inference place - scope (Scope): inference Scope - - ''' - self.scope = scope - self.place = place - self.block = program.block(0) - self.input_map = {} # store the input names should be adjusted - - i = 0 - while i < len(self.block.ops) - 2: - current_op = self.block.ops[i] - # TODO(luotao1): consider only conv2d now. fc would be delt later. - if current_op.type in ['conv2d']: - # TODO(luotao1): consider single chain network now. - # For branch network, we counldn't use block.ops[i + 1] as - # the judgment condition. - next_op = self.block.ops[i + 1] - # conv2d without bias - if (next_op.type == 'batch_norm'): - # insert bias op - bias_op = self._insert_bias_op(i + 1, current_op, next_op) - # fuse batch_norm - self._fuse_param(current_op, next_op, bias_op, 0) - # remove batch_norm_op - self.block._remove_op(i + 2) - i = i + 1 - # conv2d with bias, the next_op.type is elementwise_add - elif (next_op.type == 'elementwise_add'): - next_next_op = self.block.ops[i + 2] - if (next_next_op.type == 'batch_norm'): - # fuse batch_norm - self._fuse_param(current_op, next_next_op, next_op, 1) - # remove batch_norm_op - self.block._remove_op(i + 2) - i = i + 1 - i = i + 1 - self._adjust_input() - self._remove_unused_var() - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. - # And a better solution will be considered later. - program = program.clone() - - def _fuse_mul_add_mkldnn(self, program): - ''' - Transpile the program by fusing Mul+Add layers to FC layer with the MKL-DNN inner product. - The MUL following a Elementwise_add layer can be replaced by the MKL-DNN FC. - The Elementwise add's bias input 'Y' has to be added into the - MKL-DNN-based FC input 'Bias'. - The operator transformation is: - - before: - - MUL->elementwise_add -> any_other_op - - after: - - FC -> any_other_op - The transpile stages are: - 1. insert a new MKL-DNN-based FC operator with `Bias` input - taken from the Elementwise add's input 'Y' (bias), - 2. fuse the parameters of MUL and Elemenwise add, - 3. remove the MUL, elementwise_add operators, - 4. make the input of the deleted Elementwise add operator to be the input of the - new FC operator, - 5. remove unused variables, - Args: - program (Program): program to transpile - ''' - - self.block = program.block(0) - self.input_map = {} # store the input names should be adjusted - i = 0 - while i < len(self.block.ops): - # find a elementwise add op - if self.block.ops[i].type == 'elementwise_add': - add_op = self.block.ops[i] - add_idx = i - mul_idx = -1 - # find the preceding mul op - for j in reversed(range(add_idx)): - if self.block.ops[j].type == 'mul': - mul_out_name = self.block.ops[j].output_arg_names[0] - if self.block.ops[j].output_arg_names[ - 0] in add_op.input_arg_names: - mul_op = self.block.ops[j] - mul_idx = j - break - if mul_idx < 0: - i += 1 - continue - # create and insert a new fc op - fc_op_new = self._insert_fc_op(add_idx + 1, mul_op, add_op) - # remove the old operators - self.block._remove_op(add_idx) - self.block._remove_op(mul_idx) - # restart scanning for elementwise add from the deleted mul's index - i = mul_idx - i += 1 - self._adjust_input() - self._remove_unused_var() - program = program.clone() - - # ====================== private transpiler functions ===================== - def _insert_bias_op(self, index, current_op, bn_op): - ''' - Construct elementwise_add operator for adding bias - and insert it into program. - - :param index: insert location of bias_op - :type index: Int - :param current_op: current operator (conv or fc) - :type current_op: Operator - :param bn_op: batch norm operator - :type bn_op: Operator - :return: bias_op - :rtype: Operator - ''' - # The input of bias_op is current_op's output and Bias of bn_op - # The output of bias_op is bn_op's output - x_var = self.block.var(current_op.output("Output")[0]) - y_var = self.block.var(bn_op.input("Bias")[0]) - out_var = self.block.var(bn_op.output("Y")[0]) - - bias_op = self.block._insert_op( - index, - type="elementwise_add", - inputs={"X": x_var, - "Y": y_var}, - outputs={"Out": out_var}, - attrs={"axis": 1}) # dim_start=1 - return bias_op - - def _fuse_param(self, current_op, bn_op, bias_op, with_bias): - ''' - fuse the batch_norm_op' parameters to current_op (conv or fc) - - :param current_op: current operator (conv or fc) - :type current_op: Operator - :param bn_op: batch norm operator - :type bn_op: Operator - :param bias_op: elementwise_add operator for adding bias - :type bias_op: Operator - :param with_bias: If current operator has bias, with_bias = 1; otherwise 0. - :type with_bias: Int - ''' - - def _update_param(op, old_param_name, new_param): - # For the sake of remaining the original variables the same as before, - # create new variables in scope to store the new parameters. - old_param_name = old_param_name[0] - old_var = self.block.vars[old_param_name] - new_param_name = old_param_name + '_fuse_bn' - new_var = self.block.create_parameter( - name=new_param_name.encode('ascii'), - type=old_var.type, - dtype=old_var.dtype, - shape=old_var.shape) - op._rename_input(old_param_name, new_param_name) - self.scope.var(new_param_name) - - tensor = self.scope.find_var(new_param_name).get_tensor() - tensor.set(np.array(new_param), self.place) - - def _load_param(param_name): - return np.array(self.scope.find_var(param_name[0]).get_tensor()) - - bias_bn = _load_param(bn_op.input("Bias")) #Bias - scale_bn = _load_param(bn_op.input("Scale")) #Scale - mean_bn = _load_param(bn_op.input("Mean")) #Mean - var_bn = _load_param(bn_op.input("Variance")) #Variance - - # TODO(luotao1): consider only conv2d now. fc would be delt later. - current_param = _load_param(current_op.input("Filter")) - std_bn = np.float32(np.sqrt(np.add(var_bn, 1e-5))) - tmp = np.float32(np.divide(scale_bn, std_bn)) - - # add bias of batch_norm_op to conv2d - if with_bias: - bias = _load_param(bias_op.input("Y")) - else: - bias = np.zeros(bias_bn.shape) - bias = np.float32( - np.add(np.multiply(np.subtract(bias, mean_bn), tmp), bias_bn)) - - # re-compute weight of conv2d - tmp = tmp.reshape(tmp.shape[0], -1) - dst_param = current_param.reshape((tmp.shape[0], -1)) - dst_param = np.float32(np.multiply(dst_param, tmp)) - dst_param = dst_param.reshape(current_param.shape) - - # update parameters - _update_param(current_op, current_op.input("Filter"), dst_param) - _update_param(bias_op, bias_op.input("Y"), bias) - - # collect the renamed input - self.input_map[bn_op.output("Y")[0]] = bias_op.output("Out")[0] - - def _fuse_conv_bias(self, index, conv_op, elementwise_add_op): - ''' - fuse the conv op with elementwise_add - - :param index: index of the conv_op in ops list - :type index: Int - :param conv_op: convolution operator - :type conv_op: Operator - :param elementwise_add_op: convolution's bias operator - :type elementwise_add_op: Operator - ''' - - bias_var = self.block.var(elementwise_add_op.input("Y")[0]) - out_var = self.block.var(elementwise_add_op.output("Out")[0]) - filter_var = self.block.var(conv_op.input("Filter")[0]) - in_var = self.block.var(conv_op.input("Input")[0]) - attrs = {name: conv_op.attr(name) for name in conv_op.attr_names} - - self.block._insert_op( - index, - type="conv2d", - inputs={"Input": in_var, - "Filter": filter_var, - "Bias": bias_var}, - outputs={"Output": out_var}, - attrs=attrs) - - def _insert_fc_op(self, index, mul_op, add_op): - ''' - Construct a new FC operator by copying the old Mul and adding the - 'Y' input taken from the Elementwise add's input 'Y'. - :param index: insert location of FC - :type index: Int - :param mul_op: MUL operator to be copied - :type mul_op: Operator - :param add_op: Elementwise add operator taken bias from - :type add_op: Operator - :return: fc_op_new - :type: Operator - ''' - - def get_op_outputs(op, names): - result = {} - for name in names: - result[name] = self.block.var(op.output(name)[0]) - return result - - fc_inputs = {} - fc_inputs['Input'] = self.block.var(mul_op.input('X')[0]) - fc_inputs['W'] = self.block.var(mul_op.input('Y')[0]) - fc_inputs['Bias'] = self.block.var(add_op.input('Y')[0]) - fc_outputs = get_op_outputs(add_op, ['Out']) - fc_attrs = {} - fc_attrs['use_mkldnn'] = True - - fc_op_new = self.block._insert_op( - index, - type='fc', - inputs=fc_inputs, - outputs=fc_outputs, - attrs=fc_attrs) - return fc_op_new - - def _fuse_conv_eltwise(self, index, conv_op, eltwise_op): - ''' - fuse the conv op with elementwise_add - - :param conv_op: convolution operator - :type conv_op: Operator - :param eltwise_op: operator adding data from skip connection - :type eltwise_op: Operator - ''' - - eltwise_input = "X" - if eltwise_op.input("X")[0] == conv_op.output("Output")[0]: - eltwise_input = "Y" - - residual_var = self.block.vars[eltwise_op.input(eltwise_input)[0]] - out_var = self.block.vars[eltwise_op.output("Out")[0]] - filter_var = self.block.vars[conv_op.input("Filter")[0]] - in_var = self.block.vars[conv_op.input("Input")[0]] - bias_var = self.block.vars[conv_op.input("Bias")[0]] - - conv_op._set_attr("fuse_residual_connection", True) - attrs = {name: conv_op.attr(name) for name in conv_op.attr_names} - - self.block._insert_op( - index, - type="conv2d", - inputs={ - "Input": in_var, - "Filter": filter_var, - "Bias": bias_var, - "ResidualData": residual_var - }, - outputs={"Output": out_var}, - attrs=attrs) - - def _adjust_input(self): - for i in range(len(self.block.ops)): - current_op = self.block.ops[i] - for input_arg in current_op.input_arg_names: - if input_arg in self.input_map: - current_op._rename_input(input_arg, - self.input_map[input_arg]) - - def _remove_unused_var(self): - ''' - remove unused varibles in program - ''' - args = [] - for i in range(len(self.block.ops)): - current_op = self.block.ops[i] - args += current_op.input_arg_names - args += current_op.output_arg_names - args = list(set(args)) # unique the input and output arguments - - for var in list(self.block.vars.keys()): - if var not in args: - self.block._remove_var(var)