# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from collections import OrderedDict from .... import io from .... import compiler from ....framework import Program from ....framework import program_guard from ....framework import Parameter from ....framework import Variable from ....executor import Executor import copy from collections import Iterable from ....io import save_inference_model, load_inference_model, save_persistables import numpy as np import pickle import os __all__ = ['GraphWrapper', 'VarWrapper', 'OpWrapper'] OPTIMIZER_OPS = [ 'momentum', 'lars_momentum', 'adagrad', 'adam', 'adamax', 'decayed_adagrad', 'adadelta', 'rmsprop', ] class VarWrapper(object): def __init__(self, var, graph): assert isinstance(var, Variable) assert isinstance(graph, GraphWrapper) self._var = var self._graph = graph def __eq__(self, v): """ Overwrite this function for ...in... syntax in python. """ return self._var.name == v._var.name def name(self): """ Get the name of the variable. """ return self._var.name def shape(self): """ Get the shape of the varibale. """ return self._var.shape def set_shape(self, shape): """ Set the shape of the variable. """ self._var.desc.set_shape(shape) def inputs(self): """ Get all the operators that use this variable as output. Returns: list: A list of operators. """ ops = [] for op in self._graph.ops(): if self in op.all_inputs(): ops.append(op) return ops def outputs(self): """ Get all the operators that use this variable as input. Returns: list: A list of operators. """ ops = [] for op in self._graph.ops(): if self in op.all_outputs(): ops.append(op) return ops class OpWrapper(object): def __init__(self, op, graph): assert isinstance(graph, GraphWrapper) self._op = op self._graph = graph def __eq__(self, op): """ Overwrite this function for ...in... syntax in python. """ return self.idx() == op.idx() def all_inputs(self): """ Get all the input variables of this operator. """ return [ self._graph.var(var_name) for var_name in self._op.input_arg_names ] def all_outputs(self): """ Get all the output variables of this operator. """ return [ self._graph.var(var_name) for var_name in self._op.output_arg_names ] def idx(self): """ Get the id of this operator. """ return self._op.idx def type(self): """ Get the type of this operator. """ return self._op.type def is_bwd_op(self): """ Whether this operator is backward op. """ return self.type().endswith('_grad') def is_opt_op(self): """ Whether this operator is optimizer op. """ return self.type() in OPTIMIZER_OPS def inputs(self, name): """ Get all the varibales by the input name. """ return [self._graph.var(var_name) for var_name in self._op.input(name)] def outputs(self, name): """ Get all the varibales by the output name. """ return [self._graph.var(var_name) for var_name in self._op.output(name)] def set_attr(self, key, value): """ Set the value of attribute by attribute's name. Args: key(str): the attribute name. value(bool|int|str|float|list): the value of the attribute. """ self._op._set_attr(key, value) def attr(self, name): """ Get the attribute by name. Args: name(str): the attribute name. Returns: bool|int|str|float|list: The attribute value. The return value can be any valid attribute type. """ return self._op.attr(name) class GraphWrapper(object): """ It is a wrapper of paddle.fluid.framework.IrGraph with some special functions for paddle slim framework. """ def __init__(self, program=None, in_nodes=[], out_nodes=[]): """ Args: program(framework.Program): A program with in_nodes(dict): A dict to indicate the input nodes of the graph. The key is user-defined and human-readable name. The value is the name of Variable. out_nodes(dict): A dict to indicate the input nodes of the graph. The key is user-defined and human-readable name. The value is the name of Variable. """ super(GraphWrapper, self).__init__() self.program = Program() if program is None else program self.persistables = {} for var in self.program.list_vars(): if var.persistable: self.persistables[var.name] = var self.compiled_graph = None in_nodes = [] if in_nodes is None else in_nodes self.in_nodes = OrderedDict(in_nodes) self.out_nodes = OrderedDict(out_nodes) self._attrs = OrderedDict() def all_parameters(self): """ Get all the parameters in this graph. Returns: list: A list of VarWrapper instances. """ params = [] for block in self.program.blocks: for param in block.all_parameters(): params.append(VarWrapper(param, self)) return params def is_parameter(self, var): """ Whether the given variable is parameter. Args: var(VarWrapper): The given varibale. """ return isinstance(var._var, Parameter) def is_persistable(self, var): """ Whether the given variable is persistable. Args: var(VarWrapper): The given varibale. """ return var._var.persistable def compile(self, for_parallel=True, for_test=False, mem_opt=False): """ Compile the program in this wrapper to framework.CompiledProgram for next running. This function must be called if the program is modified. Args: for_parallel(bool): Whether the program to run in data parallel way. default: True. for_test(bool): Whether the compiled program is used for test. """ target = self.program if for_test: loss = None else: loss = self.out_nodes['loss'] if for_parallel: # disable memory optimize for stable training build_strategy = compiler.BuildStrategy() build_strategy.enable_inplace = mem_opt build_strategy.memory_optimize = mem_opt # build_strategy.async_mode = False self.compiled_graph = compiler.CompiledProgram( target).with_data_parallel( loss_name=loss, build_strategy=build_strategy) else: self.compiled_graph = compiler.CompiledProgram(target) def ops(self): """ Return all operator nodes included in the graph as a set. """ ops = [] for block in self.program.blocks: for op in block.ops: ops.append(OpWrapper(op, self)) return ops def vars(self): """ Get all the variables. """ return [VarWrapper(var, self) for var in self.program.list_vars()] def var(self, name): """ Get the variable by variable name. """ return VarWrapper(self.program.global_block().var(name), self) def clone(self, for_test=False): """ Clone a new graph from current graph. Returns: (GraphWrapper): The wrapper of a new graph. """ return GraphWrapper( self.program.clone(for_test), copy.deepcopy(self.in_nodes), copy.deepcopy(self.out_nodes)) def merge(self, graph): """ Merge a graph into current graph. Args: graph(GraphWrapper): The graph to be merged by current graph. """ for var in graph.program.list_vars(): new_var = self.program.global_block()._clone_variable( var, force_persistable=False) new_var.stop_gradient = var.stop_gradient # TODO: parameters should be cloned for op in graph.ops(): op = op._op inputs = {} outputs = {} attrs = {} for input_name in op.input_names: inputs[input_name] = [ self.var(in_var_name)._var for in_var_name in op.input(input_name) ] for output_name in op.output_names: outputs[output_name] = [ self.var(out_var_name)._var for out_var_name in op.output(output_name) ] for attr_name in op.attr_names: attrs[attr_name] = op.attr(attr_name) self.program.global_block().append_op( type=op.type, inputs=inputs, outputs=outputs, attrs=attrs) def program(self): """ Get the program in current wrapper. """ return self.program def pre_ops(self, op): """ Get all the previous operators of target operator. Args: op(OpWrapper): Target operator.. Returns: list: A list of operators. """ ops = [] for p in self.ops(): for in_var in op.all_inputs(): if in_var in p.all_outputs(): ops.append(p) return ops def next_ops(self, op): """ Get all the next operators of target operator. Args: op(OpWrapper): Target operator.. Returns: list: A list of operators. """ ops = [] for p in self.ops(): for out_var in op.all_outputs(): if out_var in p.all_inputs(): ops.append(p) return ops def get_param_by_op(self, op): """ Get the parameters used by target operator. """ assert isinstance(op, OpWrapper) params = [] for var in op.all_inputs(): if isinstance(var._var, Parameter): params.append(var) assert len(params) > 0 return params def numel_params(self): """ Get the number of elements in all parameters. """ ret = 0 for param in self.all_parameters(): ret += np.product(param.shape()) return ret def get_optimize_graph(self, optimizer, place, scope, no_grad_var_names=[]): """ Get a new graph for training by appending some backward operators and optimization operators. Args: optimizer: The optimzier used to generate training graph. place: The place to run the graph. scope: The scope used to run the graph. Some new variable will be added into this scope. no_grad_var_names(list): Names of variables that should be ignored while computing gradients. default: []. Returns: (GraphWrapper): The wrapper of new graph with backward ops and optimization ops. """ graph = self.clone() startup_program = Program() with program_guard( main_program=graph.program, startup_program=startup_program): target_name = None if 'loss' in graph.out_nodes: target_name = graph.out_nodes['loss'] elif 'cost' in graph.out_nodes: target_name = graph.out_nodes['cost'] target = graph.var(target_name)._var # The learning rate variable may be created in other program. # Update information in optimizer to make # learning rate variable being accessible in current program. if isinstance(optimizer._learning_rate, Variable): optimizer._learning_rate_map[ graph.program] = optimizer._learning_rate optimizer.minimize(target, no_grad_set=no_grad_var_names) exe = Executor(place) exe.run(program=startup_program, scope=scope) return graph def flops(self, only_conv=False): """ Get the flops of current graph. Args: only_conv: Only calculating the conv layers. default: False. Returns: int: The flops of current graph. """ flops = 0 for op in self.ops(): if op.type() in ['conv2d', 'depthwise_conv2d']: filter_shape = op.inputs("Filter")[0].shape() input_shape = op.inputs("Input")[0].shape() output_shape = op.outputs("Output")[0].shape() c_out, c_in, k_h, k_w = filter_shape _, _, h_out, w_out = output_shape groups = op.attr("groups") kernel_ops = k_h * k_w * (c_in / groups) if len(op.inputs("Bias")) > 0: with_bias = 1 else: with_bias = 0 flops += 2 * h_out * w_out * c_out * (kernel_ops + with_bias) elif op.type() == 'pool2d' and not only_conv: input_shape = op.inputs("X")[0].shape() output_shape = op.outputs("Out")[0].shape() _, c_out, h_out, w_out = output_shape k_size = op.attr("ksize") flops += h_out * w_out * c_out * (k_size[0]**2) elif op.type() == 'mul' and not only_conv: x_shape = list(op.inputs("X")[0].shape()) y_shape = op.inputs("Y")[0].shape() if x_shape[0] == -1: x_shape[0] = 1 flops += 2 * x_shape[0] * x_shape[1] * y_shape[1] elif op.type() in ['relu', 'sigmoid', 'batch_norm' ] and not only_conv: input_shape = list(op.inputs("X")[0].shape()) if input_shape[0] == -1: input_shape[0] = 1 flops += np.product(input_shape) return flops def save_persistables(self, path, exe): """ Save all the persistable variables into file. Args: path(str): The path to save the persistables. exe(framework.Executor): The executor used to save the persistables. """ # update persistables from program for var in self.program.list_vars(): if var.persistable and var.name not in self.persistables: self.persistables[var.name] = var io.save_vars(exe.exe, path, vars=self.persistables.values()) def load_persistables(self, path, exe): """ Load the persistable variables from file. Args: path(str): The path to load the persistables. exe(framework.Executor): The executor used to load the persistables. """ def if_exist(var): return os.path.exists(os.path.join(path, var.name)) io.load_vars( exe.exe, path, vars=self.persistables.values(), predicate=if_exist) def update_param_shape(self, scope): """ Update the shape of parameters in the graph according to tensors in scope. It is used after loading pruned parameters from file. """ for param in self.all_parameters(): tensor_shape = np.array(scope.find_var(param.name()).get_tensor( )).shape param.set_shape(tensor_shape) def infer_shape(self): """ Update the groups of convolution layer according to current filters. It is used after loading pruned parameters from file. """ for op in self.ops(): if op.type() != 'conditional_block': op._op.desc.infer_shape(op._op.block.desc) def update_groups_of_conv(self): for op in self.ops(): if op.type() == 'depthwise_conv2d': op.set_attr('groups', op.inputs('Filter')[0].shape()[0])