import copy import itertools from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating from paddle.v2.fluid.initializer import Constant, Xavier class LayerHelper(object): def __init__(self, layer_type, **kwargs): self.kwargs = kwargs self.layer_type = layer_type name = self.kwargs.get('name', None) if name is None: self.kwargs['name'] = unique_name(self.layer_type) @property def name(self): return self.kwargs['name'] @property def main_program(self): prog = self.kwargs.get('main_program', None) if prog is None: return default_main_program() else: return prog @property def startup_program(self): prog = self.kwargs.get('startup_program', None) if prog is None: return default_startup_program() else: return prog def append_op(self, *args, **kwargs): return self.main_program.current_block().append_op(*args, **kwargs) def multiple_input(self, input_param_name='input'): inputs = self.kwargs.get(input_param_name, []) type_error = TypeError( "Input of {0} layer should be Variable or sequence of Variable". format(self.layer_type)) if isinstance(inputs, Variable): inputs = [inputs] elif not isinstance(inputs, list) and not isinstance(inputs, tuple): raise type_error else: for each in inputs: if not isinstance(each, Variable): raise type_error return inputs def input(self, input_param_name='input'): inputs = self.multiple_input(input_param_name) if len(inputs) != 1: raise "{0} layer only takes one input".format(self.layer_type) return inputs[0] @property def param_attr(self): default = {'name': None} actual = self.kwargs.get('param_attr', None) if actual is None: actual = default for default_field in default.keys(): if default_field not in actual: actual[default_field] = default[default_field] return actual @property def bias_attr(self): default = {'name': None} bias_attr = self.kwargs.get('bias_attr', None) if bias_attr is None: bias_attr = default if isinstance(bias_attr, dict): for default_field in default.keys(): if default_field not in bias_attr: bias_attr[default_field] = default[default_field] return bias_attr def multiple_param_attr(self, length): param_attr = self.param_attr if isinstance(param_attr, dict): param_attr = [param_attr] if len(param_attr) != 1 and len(param_attr) != length: raise ValueError("parameter number mismatch") elif len(param_attr) == 1 and length != 1: tmp = [None] * length for i in xrange(length): tmp[i] = copy.deepcopy(param_attr[0]) param_attr = tmp return param_attr def iter_inputs_and_params(self, input_param_name='input'): inputs = self.multiple_input(input_param_name) param_attrs = self.multiple_param_attr(len(inputs)) for ipt, param_attr in itertools.izip(inputs, param_attrs): yield ipt, param_attr def input_dtype(self, input_param_name='input'): inputs = self.multiple_input(input_param_name) dtype = None for each in inputs: if dtype is None: dtype = each.dtype elif dtype != each.dtype: raise ValueError("Data Type mismatch") return dtype def create_parameter(self, attr, shape, dtype, suffix='w', initializer=None): # Deepcopy the attr so that parameters can be shared in program attr_copy = copy.deepcopy(attr) if initializer is not None: attr_copy['initializer'] = initializer else: attr_copy['initializer'] = self._get_default_initializer(dtype) if attr_copy['name'] is None: attr_copy['name'] = unique_name(".".join([self.name, suffix])) self.startup_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr_copy) return self.main_program.global_block().create_parameter( name=attr_copy['name'], dtype=dtype, shape=shape, trainable=attr_copy.get('trainable', True)) def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype, persistable=False) def create_variable(self, *args, **kwargs): return self.main_program.current_block().create_var(*args, **kwargs) def create_global_variable(self, persistable=False, *args, **kwargs): return self.main_program.global_block().create_var( *args, persistable=persistable, **kwargs) def set_variable_initializer(self, var, initializer): assert isinstance(var, Variable) self.startup_program.global_block().create_var( name=var.name, type=var.type, dtype=var.dtype, shape=var.shape, persistable=True, initializer=initializer) def append_bias_op(self, input_var, bias_initializer, dim_start=1, dim_end=None): """ Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var :param input_var: the input variable. The len(input_var.shape) is larger or equal than 2. :bias_initializer: an instance of a subclass of Initializer used to initialize the bias :param dim_start: :param dim_end: the shape of the bias will be input_var.shape[dim_start:dim_end]. The bias is broadcasted to other dimensions and added to input_var to get the output """ size = list(input_var.shape[dim_start:dim_end]) bias_attr = self.bias_attr if not bias_attr: return input_var b = self.create_parameter( attr=bias_attr, shape=size, dtype=input_var.dtype, suffix='b', initializer=bias_initializer) tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type='elementwise_add', inputs={'X': [input_var], 'Y': [b]}, outputs={'Out': [tmp]}, attrs={'axis': dim_start}) return tmp def append_activation(self, input_var): act = self.kwargs.get('act', None) if act is None: return input_var if isinstance(act, basestring): act = {'type': act} tmp = self.create_tmp_variable(dtype=input_var.dtype) act_type = act.pop('type') self.append_op( type=act_type, inputs={"X": [input_var]}, outputs={"Y": [tmp]}, attrs=act) return tmp def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: return Xavier() else: # For integer and boolean types, initialize with all zeros return Constant()