diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index e473e4822a1bcf3177ced913ef47b807fd25c2d2..f40c3cf43a6a400f67732ebd4f55afd35f98c01c 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -22,12 +22,36 @@ def fc(input, num_flatten_dims=1, main_program=None, startup_program=None): - # create helper + """ + Fully Connected Layer. + + Args: + input: The input tensor to the function + size: The size of the layer + param_attr: The parameters/weights to the FC Layer + bias_attr: The bias parameter for the FC layer + name: Name/alias of the function + act: Activation to be applied to the output of FC layer + num_flatten_dims: Number of columns in input + main_program: Name of the main program that calls this + startup_program: Name of the startup program + + This function can take in multiple inputs and performs the Fully Connected + function (linear transformation) on top of each of them. + So for input x, the output will be : Wx + b. Where W is the parameter, + b the bias and x is the input. + + The function also applies an activation (non-linearity) on top of the + output, if activation is passed in the input. + + All the input variables of this function are passed in as local variables + to the LayerHelper constructor. + + """ helper = LayerHelper('fc', **locals()) dtype = helper.input_dtype() - # mul mul_results = [] for input_var, param_attr in helper.iter_inputs_and_params(): input_shape = input_var.shape @@ -68,6 +92,26 @@ def embedding(input, param_attr=None, main_program=None, startup_program=None): + """ + Embedding Layer. + + Args: + input: The input to the function + size: The size of the layer + data_type: The type of data : float32, float_16, int etc + is_sparse: A flag that decleares whether the input is sparse + param_attr: Parameters for this layer + main_program: Name of the main program that calls this + startup_program: Name of the startup program + + This function can take in the input (which is a vector of IDs) and + performs a lookup in the lookup_table using these IDs, to result into + the embedding of each ID in the input. + + All the input variables of this function are passed in as local variables + to the LayerHelper constructor. + + """ helper = LayerHelper('embedding', **locals()) w = helper.create_parameter( attr=helper.param_attr, shape=size, dtype=data_type) @@ -89,6 +133,28 @@ def data(name, main_program=None, startup_program=None, stop_gradient=True): + """ + Data Layer. + + Args: + name: The name/alias of the function + shape: Tuple declaring the shape. + data_type: The type of data : float32, float_16, int etc + type: The output type. By default it is LOD_TENSOR. + append_batch_size: Whether or not to append the data as a batch. + main_program: Name of the main program that calls this + startup_program: Name of the startup program + stop_gradient: A boolean that mentions whether gradient should flow. + + This function takes in input and based on whether data has + to be returned back as a minibatch, it creates the global variable using + the helper functions. The global variables can be accessed by all the + following operations and layers in the graph. + + All the input variables of this function are passed in as local variables + to the LayerHelper constructor. + + """ helper = LayerHelper('data', **locals()) shape = list(shape) for i in xrange(len(shape)): @@ -110,11 +176,32 @@ def data(name, def _convert_(name): + """ + Formatting. + + Args: + name: The name/alias + + This function takes in a name and converts it to a standard format of + group1_group2. Where as per the regular expression, group1 can have + alphabets and numbers and group2 has capital alphabets. + + """ s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() def _create_op_func_(op_type): + """ + Create an Operator for a Function. + + Args: + op_type: The name of the operator to be created + + This function takes in the operator type (sigmoid, mean , average etc) and + creates the operator functionality. + + """ op_proto = OpProtoHolder.instance().get_op_proto(op_type) not_intermediate_outputs = \ filter(lambda output: not output.intermediate, op_proto.outputs) @@ -122,24 +209,26 @@ def _create_op_func_(op_type): filter(lambda output: output.intermediate, op_proto.outputs) if len(not_intermediate_outputs) != 1: - raise ValueError( - "Only one not intermediate output operator can be automatically generated" - ) + raise ValueError("Only one non intermediate output operator can be", + "automatically generated") if not_intermediate_outputs[0].duplicable: raise ValueError( - "Only not duplicable op can be automatically generated") + "Only non duplicable op can be automatically generated") for output in intermediate_outputs: if output.duplicable: - raise ValueError( - "Only when all intermediate ops are not duplicable, " - "this op can be automatically generated") + raise ValueError("The op can be automatically generated only when ", + "all intermediate ops are not duplicable") o_name = not_intermediate_outputs[0].name intermediate_output_names = [output.name for output in intermediate_outputs] def infer_and_check_data_type(op_proto, **kwargs): + """ + This function performs the sanity check for data_type and + instance type. + """ dtype = None for ipt in op_proto.inputs: name = _convert_(ipt.name) @@ -160,6 +249,11 @@ def _create_op_func_(op_type): return dtype def func(**kwargs): + """ + This function implements the function for the operator. This process + involves doing the sanity check (using the function above), reading + inputs from protobuf and applying the activations on top. + """ helper = LayerHelper(op_type, **kwargs) dtype = infer_and_check_data_type(op_proto, **kwargs) @@ -200,6 +294,11 @@ _create_op_func_('transpose') def fill_constant(data_type, shape, value=None, program=None): + """ + This function creates a tensor , with shape as mentioned in the input and + specified data_type and fills this up with a constant value that + comes in the input. + """ helper = LayerHelper('fill_constant', **locals()) out = helper.create_tmp_variable(dtype=data_type) helper.append_op( @@ -212,6 +311,10 @@ def fill_constant(data_type, shape, value=None, program=None): def cast(x, data_type, main_program=None): + """ + This function takes in the input with input_data_type + and casts it to the output_data_type as the output. + """ helper = LayerHelper('cast', **locals()) out = helper.create_tmp_variable(dtype=data_type) helper.append_op( @@ -224,6 +327,10 @@ def cast(x, data_type, main_program=None): def concat(input, axis, main_program=None, startup_program=None): + """ + This function concats the input along the axis mentioned + and returns that as the output. + """ helper = LayerHelper('concat', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) helper.append_op( @@ -235,6 +342,10 @@ def concat(input, axis, main_program=None, startup_program=None): def sums(input, main_program=None, startup_program=None): + """ + This function takes in the input and performs the sum operation on it + and returns that as the output. + """ helper = LayerHelper('sum', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) @@ -242,6 +353,10 @@ def sums(input, main_program=None, startup_program=None): def cos_sim(X, Y, **kwargs): + """ + This function performs the cosine similarity between two tensors + X and Y and returns that as the output. + """ helper = LayerHelper('cos_sim', **kwargs) out = helper.create_tmp_variable(dtype=X.data_type) xnorm = helper.create_tmp_variable(dtype=X.data_type) @@ -257,6 +372,9 @@ def cos_sim(X, Y, **kwargs): def cross_entropy(input, label, **kwargs): + """ + This function computes cross_entropy using the input and label. + """ helper = LayerHelper('cross_entropy', **kwargs) out = helper.create_tmp_variable(dtype=input.data_type) helper.append_op( @@ -269,6 +387,10 @@ def cross_entropy(input, label, **kwargs): def square_error_cost(input, label, **kwargs): + """ + This functions returns the squared error cost using the input and label. + The output is appending the op to do the above. + """ helper = LayerHelper('square_error_cost', **kwargs) minus_out = helper.create_tmp_variable(dtype=input.data_type) helper.append_op( @@ -284,6 +406,10 @@ def square_error_cost(input, label, **kwargs): def accuracy(input, label, k=1, **kwargs): + """ + This function computes the accuracy using the input and label. + The output is the top_k inputs and their indices. + """ helper = LayerHelper("accuracy", **kwargs) topk_out = helper.create_tmp_variable(dtype=input.data_type) topk_indices = helper.create_tmp_variable(dtype="int64") @@ -316,6 +442,11 @@ def sequence_conv(input, param_attr=None, main_program=None, startup_program=None): + """ + This function creates the op for sequence_conv, using the inputs and + other convolutional configurations for the filters and stride as given + in the input parameters to the function. + """ # FIXME(dzh) : want to unify the argument of python layer # function. So we ignore some unecessary attributes. # such as, padding_trainable, context_start. @@ -356,6 +487,13 @@ def conv2d(input, param_attr=None, main_program=None, startup_program=None): + """ + This function creates the op for a 2-dimensional Convolution. + This is performed using the parameters of filters(size, dimensionality etc) + , stride and other configurations for a Convolution operation. + This funciton can also append an activation on top of the + conv-2d output, if mentioned in the input parameters. + """ helper = LayerHelper('conv2d', **locals()) dtype = helper.input_dtype() @@ -402,6 +540,11 @@ def conv2d(input, def sequence_pool(input, pool_type, **kwargs): + """ + This function add the operator for sequence pooling. + This is applied on top of the input using pool_type mentioned + in the parameters. + """ helper = LayerHelper('sequence_pool', input=input, **kwargs) dtype = helper.input_dtype() pool_out = helper.create_tmp_variable(dtype) @@ -425,6 +568,10 @@ def pool2d(input, global_pooling=False, main_program=None, startup_program=None): + """ + This function adds the operator for pooling in 2 dimensions, using the + pooling configurations mentioned in input parameters. + """ if pool_type not in ["max", "avg"]: raise ValueError( "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", @@ -465,6 +612,10 @@ def batch_norm(input, data_layout='NCHW', main_program=None, startup_program=None): + """ + This function helps create an operator to implement + the BatchNorm layer using the configurations from the input parameters. + """ helper = LayerHelper('batch_norm', **locals()) dtype = helper.input_dtype() @@ -536,8 +687,10 @@ def batch_norm(input, class BlockGuard(object): """ - BlockGuard used to create sub-block in program by using Python `with` - keyword. + BlockGuard class. + + BlockGuard class is used to create a sub-block in a program by + using the Python `with` keyword. """ def __init__(self, main_program): @@ -556,6 +709,12 @@ class BlockGuard(object): class StaticRNNGuard(BlockGuard): + """ + StaticRNNGuard class. + + StaticRNNGuard class is used to create a StaticRNN block in a program. + """ + def __init__(self, rnn): if not isinstance(rnn, StaticRNN): raise TypeError("StaticRNNGuard takes an StaticRNN") @@ -576,12 +735,18 @@ class StaticRNNGuard(BlockGuard): class StaticRNNMemoryLink(object): """ - :param init: the initial variable for Memory - :type init: Variable - :param pre_mem: the memory variable in previous time step - :type pre_mem: Variable - :param mem: the memory variable in current time step - :type mem: Variable + StaticRNNMemoryLink class. + + Args: + init: the initial variable for Memory + init: Variable + pre_mem: the memory variable in previous time step + pre_mem: Variable + mem: the memory variable in current time step + mem: Variable + + StaticRNNMemoryLink class is used to create a link between two + memory cells of a StaticRNN. """ def __init__(self, init, pre_mem, mem=None): @@ -591,6 +756,12 @@ class StaticRNNMemoryLink(object): class StaticRNN(object): + """ + StaticRNN class. + + StaticRNN class is used to create a StaticRNN. The RNN will have its + own parameters like inputs, outputs, memories, status and length. + """ BEFORE_RNN_BLOCK = 0 IN_RNN_BLOCK = 1 AFTER_RNN_BLOCK = 2 @@ -619,15 +790,15 @@ class StaticRNN(object): init_value=0.0, init_batch_dim_idx=0, ref_batch_dim_idx=1): - ''' - :param init: boot memory, if not set, a shape, batch_ref must be provided - :param shape: shape of the boot memory - :param batch_ref: batch size reference variable - :param init_value: the init value of boot memory - :param init_batch_dim_idx: the index of batch size in init's dimension - :param ref_batch_dim_idx: the index of batch size in batch_ref's dimension - :return: boot memory - ''' + """ + Args: + init: boot memory, if not set, a shape, batch_ref must be provided + shape: shape of the boot memory + batch_ref: batch size reference variable + init_value: the init value of boot memory + init_batch_dim_idx: the index of batch size in init's dimension + ref_batch_dim_idx: the index of batch size in batch_ref's dimension + """ self._assert_in_rnn_block_('memory') if init is None: if shape is None or batch_ref is None: @@ -799,6 +970,10 @@ def lstm(x, forget_bias=None, main_program=None, startup_program=None): + """ + This function helps create an operator for the LSTM (Long Short Term + Memory) cell that can be used inside an RNN. + """ helper = LayerHelper('lstm_unit', **locals()) rnn = StaticRNN() with rnn.step(): @@ -834,6 +1009,10 @@ def lstm(x, def lod_rank_table(x, level=0, main_program=None): + """ + This function creates an operator for creating a LOD_RANK_TABLE + using the input x. + """ helper = LayerHelper("lod_rank_table", **locals()) table = helper.create_variable( type=core.VarDesc.VarType.LOD_RANK_TABLE, @@ -847,6 +1026,10 @@ def lod_rank_table(x, level=0, main_program=None): def lod_tensor_to_array(x, table, main_program=None): + """ + This function creates an operator to convert an LOD_Tensor to + an array. + """ helper = LayerHelper("lod_tensor_to_array", **locals()) array = helper.create_variable( name=unique_name("lod_tensor_to_array"), @@ -861,6 +1044,10 @@ def lod_tensor_to_array(x, table, main_program=None): def array_to_lod_tensor(x, table, main_program=None): + """ + This function creates an operator to convert an array to a + LOD_Tensor. + """ helper = LayerHelper("array_to_lod_tensor", **locals()) tmp = helper.create_tmp_variable(dtype=x.data_type) helper.append_op( @@ -872,6 +1059,11 @@ def array_to_lod_tensor(x, table, main_program=None): def fill_constant(shape, dtype, value, main_program=None): + """ + This function creates a tensor , with shape as mentioned in the input and + specified data_type and fills this up with a constant value that + comes in the input. It also sets the stop_gradient to be True. + """ helper = LayerHelper("fill_constant", **locals()) out = helper.create_tmp_variable(dtype=dtype) helper.append_op( @@ -888,14 +1080,27 @@ def fill_constant(shape, dtype, value, main_program=None): def ones(shape, dtype, main_program=None): + """ + This function performs the same function as fill_constant() declared above + with the constant value being 1.0. + """ return fill_constant(value=1.0, **locals()) def zeros(shape, dtype, main_program=None): + """ + This function performs the same function as fill_constant() declared above + with the constant value being 0.0. + """ return fill_constant(value=0.0, **locals()) def increment(x, value=1.0, in_place=True, main_program=None): + """ + This function creates an operator to increment each value in the input + `x` by an amount: `value` as mentioned in the input parameter. This + operation is performed in-place by default. + """ helper = LayerHelper("increment", **locals()) if in_place: out = x @@ -910,6 +1115,10 @@ def increment(x, value=1.0, in_place=True, main_program=None): def array_write(x, i, array=None, main_program=None): + """ + This function creates an operator to write the data out as a + LOD_TENSOR_ARRAY. + """ helper = LayerHelper('array_write', **locals()) if array is None: array = helper.create_variable( @@ -925,6 +1134,10 @@ def array_write(x, i, array=None, main_program=None): def array_read(array, i, main_program=None): + """ + This function creates an operator to read the data in as a + LOD_TENSOR_ARRAY. + """ helper = LayerHelper('array_read', **locals()) if not isinstance( array, @@ -940,6 +1153,10 @@ def array_read(array, i, main_program=None): def shrink_memory(x, i, table, main_program=None): + """ + This function creates an operator to shrink_rnn_memory using the RankTable + as mentioned in the input parameter. + """ helper = LayerHelper('shrink_memory', **locals()) out = helper.create_tmp_variable(dtype=x.data_type) helper.append_op( @@ -953,6 +1170,10 @@ def shrink_memory(x, i, table, main_program=None): def array_length(array, main_program=None): + """ + This function creates an operator to find the length of the + LOD_TENSOR_ARRAY. + """ helper = LayerHelper('array_length', **locals()) tmp = helper.create_tmp_variable(dtype='int64') tmp.stop_gradient = True