提交 b5901a3a 编写于 作者: K kavyasrinet 提交者: Yi Wang

Adding documentation for every function in layers.py (#5529)

* Adding operator assignment

* Adding documentation to layers.py

* Removing file from another PR
上级 5e13e706
......@@ -22,12 +22,36 @@ def fc(input,
num_flatten_dims=1,
main_program=None,
startup_program=None):
# create helper
"""
Fully Connected Layer.
Args:
input: The input tensor to the function
size: The size of the layer
param_attr: The parameters/weights to the FC Layer
bias_attr: The bias parameter for the FC layer
name: Name/alias of the function
act: Activation to be applied to the output of FC layer
num_flatten_dims: Number of columns in input
main_program: Name of the main program that calls this
startup_program: Name of the startup program
This function can take in multiple inputs and performs the Fully Connected
function (linear transformation) on top of each of them.
So for input x, the output will be : Wx + b. Where W is the parameter,
b the bias and x is the input.
The function also applies an activation (non-linearity) on top of the
output, if activation is passed in the input.
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
"""
helper = LayerHelper('fc', **locals())
dtype = helper.input_dtype()
# mul
mul_results = []
for input_var, param_attr in helper.iter_inputs_and_params():
input_shape = input_var.shape
......@@ -68,6 +92,26 @@ def embedding(input,
param_attr=None,
main_program=None,
startup_program=None):
"""
Embedding Layer.
Args:
input: The input to the function
size: The size of the layer
data_type: The type of data : float32, float_16, int etc
is_sparse: A flag that decleares whether the input is sparse
param_attr: Parameters for this layer
main_program: Name of the main program that calls this
startup_program: Name of the startup program
This function can take in the input (which is a vector of IDs) and
performs a lookup in the lookup_table using these IDs, to result into
the embedding of each ID in the input.
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
"""
helper = LayerHelper('embedding', **locals())
w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=data_type)
......@@ -89,6 +133,28 @@ def data(name,
main_program=None,
startup_program=None,
stop_gradient=True):
"""
Data Layer.
Args:
name: The name/alias of the function
shape: Tuple declaring the shape.
data_type: The type of data : float32, float_16, int etc
type: The output type. By default it is LOD_TENSOR.
append_batch_size: Whether or not to append the data as a batch.
main_program: Name of the main program that calls this
startup_program: Name of the startup program
stop_gradient: A boolean that mentions whether gradient should flow.
This function takes in input and based on whether data has
to be returned back as a minibatch, it creates the global variable using
the helper functions. The global variables can be accessed by all the
following operations and layers in the graph.
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
"""
helper = LayerHelper('data', **locals())
shape = list(shape)
for i in xrange(len(shape)):
......@@ -110,11 +176,32 @@ def data(name,
def _convert_(name):
"""
Formatting.
Args:
name: The name/alias
This function takes in a name and converts it to a standard format of
group1_group2. Where as per the regular expression, group1 can have
alphabets and numbers and group2 has capital alphabets.
"""
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def _create_op_func_(op_type):
"""
Create an Operator for a Function.
Args:
op_type: The name of the operator to be created
This function takes in the operator type (sigmoid, mean , average etc) and
creates the operator functionality.
"""
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
not_intermediate_outputs = \
filter(lambda output: not output.intermediate, op_proto.outputs)
......@@ -122,24 +209,26 @@ def _create_op_func_(op_type):
filter(lambda output: output.intermediate, op_proto.outputs)
if len(not_intermediate_outputs) != 1:
raise ValueError(
"Only one not intermediate output operator can be automatically generated"
)
raise ValueError("Only one non intermediate output operator can be",
"automatically generated")
if not_intermediate_outputs[0].duplicable:
raise ValueError(
"Only not duplicable op can be automatically generated")
"Only non duplicable op can be automatically generated")
for output in intermediate_outputs:
if output.duplicable:
raise ValueError(
"Only when all intermediate ops are not duplicable, "
"this op can be automatically generated")
raise ValueError("The op can be automatically generated only when ",
"all intermediate ops are not duplicable")
o_name = not_intermediate_outputs[0].name
intermediate_output_names = [output.name for output in intermediate_outputs]
def infer_and_check_data_type(op_proto, **kwargs):
"""
This function performs the sanity check for data_type and
instance type.
"""
dtype = None
for ipt in op_proto.inputs:
name = _convert_(ipt.name)
......@@ -160,6 +249,11 @@ def _create_op_func_(op_type):
return dtype
def func(**kwargs):
"""
This function implements the function for the operator. This process
involves doing the sanity check (using the function above), reading
inputs from protobuf and applying the activations on top.
"""
helper = LayerHelper(op_type, **kwargs)
dtype = infer_and_check_data_type(op_proto, **kwargs)
......@@ -200,6 +294,11 @@ _create_op_func_('transpose')
def fill_constant(data_type, shape, value=None, program=None):
"""
This function creates a tensor , with shape as mentioned in the input and
specified data_type and fills this up with a constant value that
comes in the input.
"""
helper = LayerHelper('fill_constant', **locals())
out = helper.create_tmp_variable(dtype=data_type)
helper.append_op(
......@@ -212,6 +311,10 @@ def fill_constant(data_type, shape, value=None, program=None):
def cast(x, data_type, main_program=None):
"""
This function takes in the input with input_data_type
and casts it to the output_data_type as the output.
"""
helper = LayerHelper('cast', **locals())
out = helper.create_tmp_variable(dtype=data_type)
helper.append_op(
......@@ -224,6 +327,10 @@ def cast(x, data_type, main_program=None):
def concat(input, axis, main_program=None, startup_program=None):
"""
This function concats the input along the axis mentioned
and returns that as the output.
"""
helper = LayerHelper('concat', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
......@@ -235,6 +342,10 @@ def concat(input, axis, main_program=None, startup_program=None):
def sums(input, main_program=None, startup_program=None):
"""
This function takes in the input and performs the sum operation on it
and returns that as the output.
"""
helper = LayerHelper('sum', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out})
......@@ -242,6 +353,10 @@ def sums(input, main_program=None, startup_program=None):
def cos_sim(X, Y, **kwargs):
"""
This function performs the cosine similarity between two tensors
X and Y and returns that as the output.
"""
helper = LayerHelper('cos_sim', **kwargs)
out = helper.create_tmp_variable(dtype=X.data_type)
xnorm = helper.create_tmp_variable(dtype=X.data_type)
......@@ -257,6 +372,9 @@ def cos_sim(X, Y, **kwargs):
def cross_entropy(input, label, **kwargs):
"""
This function computes cross_entropy using the input and label.
"""
helper = LayerHelper('cross_entropy', **kwargs)
out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
......@@ -269,6 +387,10 @@ def cross_entropy(input, label, **kwargs):
def square_error_cost(input, label, **kwargs):
"""
This functions returns the squared error cost using the input and label.
The output is appending the op to do the above.
"""
helper = LayerHelper('square_error_cost', **kwargs)
minus_out = helper.create_tmp_variable(dtype=input.data_type)
helper.append_op(
......@@ -284,6 +406,10 @@ def square_error_cost(input, label, **kwargs):
def accuracy(input, label, k=1, **kwargs):
"""
This function computes the accuracy using the input and label.
The output is the top_k inputs and their indices.
"""
helper = LayerHelper("accuracy", **kwargs)
topk_out = helper.create_tmp_variable(dtype=input.data_type)
topk_indices = helper.create_tmp_variable(dtype="int64")
......@@ -316,6 +442,11 @@ def sequence_conv(input,
param_attr=None,
main_program=None,
startup_program=None):
"""
This function creates the op for sequence_conv, using the inputs and
other convolutional configurations for the filters and stride as given
in the input parameters to the function.
"""
# FIXME(dzh) : want to unify the argument of python layer
# function. So we ignore some unecessary attributes.
# such as, padding_trainable, context_start.
......@@ -356,6 +487,13 @@ def conv2d(input,
param_attr=None,
main_program=None,
startup_program=None):
"""
This function creates the op for a 2-dimensional Convolution.
This is performed using the parameters of filters(size, dimensionality etc)
, stride and other configurations for a Convolution operation.
This funciton can also append an activation on top of the
conv-2d output, if mentioned in the input parameters.
"""
helper = LayerHelper('conv2d', **locals())
dtype = helper.input_dtype()
......@@ -402,6 +540,11 @@ def conv2d(input,
def sequence_pool(input, pool_type, **kwargs):
"""
This function add the operator for sequence pooling.
This is applied on top of the input using pool_type mentioned
in the parameters.
"""
helper = LayerHelper('sequence_pool', input=input, **kwargs)
dtype = helper.input_dtype()
pool_out = helper.create_tmp_variable(dtype)
......@@ -425,6 +568,10 @@ def pool2d(input,
global_pooling=False,
main_program=None,
startup_program=None):
"""
This function adds the operator for pooling in 2 dimensions, using the
pooling configurations mentioned in input parameters.
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
......@@ -465,6 +612,10 @@ def batch_norm(input,
data_layout='NCHW',
main_program=None,
startup_program=None):
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters.
"""
helper = LayerHelper('batch_norm', **locals())
dtype = helper.input_dtype()
......@@ -536,8 +687,10 @@ def batch_norm(input,
class BlockGuard(object):
"""
BlockGuard used to create sub-block in program by using Python `with`
keyword.
BlockGuard class.
BlockGuard class is used to create a sub-block in a program by
using the Python `with` keyword.
"""
def __init__(self, main_program):
......@@ -556,6 +709,12 @@ class BlockGuard(object):
class StaticRNNGuard(BlockGuard):
"""
StaticRNNGuard class.
StaticRNNGuard class is used to create a StaticRNN block in a program.
"""
def __init__(self, rnn):
if not isinstance(rnn, StaticRNN):
raise TypeError("StaticRNNGuard takes an StaticRNN")
......@@ -576,12 +735,18 @@ class StaticRNNGuard(BlockGuard):
class StaticRNNMemoryLink(object):
"""
:param init: the initial variable for Memory
:type init: Variable
:param pre_mem: the memory variable in previous time step
:type pre_mem: Variable
:param mem: the memory variable in current time step
:type mem: Variable
StaticRNNMemoryLink class.
Args:
init: the initial variable for Memory
init: Variable
pre_mem: the memory variable in previous time step
pre_mem: Variable
mem: the memory variable in current time step
mem: Variable
StaticRNNMemoryLink class is used to create a link between two
memory cells of a StaticRNN.
"""
def __init__(self, init, pre_mem, mem=None):
......@@ -591,6 +756,12 @@ class StaticRNNMemoryLink(object):
class StaticRNN(object):
"""
StaticRNN class.
StaticRNN class is used to create a StaticRNN. The RNN will have its
own parameters like inputs, outputs, memories, status and length.
"""
BEFORE_RNN_BLOCK = 0
IN_RNN_BLOCK = 1
AFTER_RNN_BLOCK = 2
......@@ -619,15 +790,15 @@ class StaticRNN(object):
init_value=0.0,
init_batch_dim_idx=0,
ref_batch_dim_idx=1):
'''
:param init: boot memory, if not set, a shape, batch_ref must be provided
:param shape: shape of the boot memory
:param batch_ref: batch size reference variable
:param init_value: the init value of boot memory
:param init_batch_dim_idx: the index of batch size in init's dimension
:param ref_batch_dim_idx: the index of batch size in batch_ref's dimension
:return: boot memory
'''
"""
Args:
init: boot memory, if not set, a shape, batch_ref must be provided
shape: shape of the boot memory
batch_ref: batch size reference variable
init_value: the init value of boot memory
init_batch_dim_idx: the index of batch size in init's dimension
ref_batch_dim_idx: the index of batch size in batch_ref's dimension
"""
self._assert_in_rnn_block_('memory')
if init is None:
if shape is None or batch_ref is None:
......@@ -799,6 +970,10 @@ def lstm(x,
forget_bias=None,
main_program=None,
startup_program=None):
"""
This function helps create an operator for the LSTM (Long Short Term
Memory) cell that can be used inside an RNN.
"""
helper = LayerHelper('lstm_unit', **locals())
rnn = StaticRNN()
with rnn.step():
......@@ -834,6 +1009,10 @@ def lstm(x,
def lod_rank_table(x, level=0, main_program=None):
"""
This function creates an operator for creating a LOD_RANK_TABLE
using the input x.
"""
helper = LayerHelper("lod_rank_table", **locals())
table = helper.create_variable(
type=core.VarDesc.VarType.LOD_RANK_TABLE,
......@@ -847,6 +1026,10 @@ def lod_rank_table(x, level=0, main_program=None):
def lod_tensor_to_array(x, table, main_program=None):
"""
This function creates an operator to convert an LOD_Tensor to
an array.
"""
helper = LayerHelper("lod_tensor_to_array", **locals())
array = helper.create_variable(
name=unique_name("lod_tensor_to_array"),
......@@ -861,6 +1044,10 @@ def lod_tensor_to_array(x, table, main_program=None):
def array_to_lod_tensor(x, table, main_program=None):
"""
This function creates an operator to convert an array to a
LOD_Tensor.
"""
helper = LayerHelper("array_to_lod_tensor", **locals())
tmp = helper.create_tmp_variable(dtype=x.data_type)
helper.append_op(
......@@ -872,6 +1059,11 @@ def array_to_lod_tensor(x, table, main_program=None):
def fill_constant(shape, dtype, value, main_program=None):
"""
This function creates a tensor , with shape as mentioned in the input and
specified data_type and fills this up with a constant value that
comes in the input. It also sets the stop_gradient to be True.
"""
helper = LayerHelper("fill_constant", **locals())
out = helper.create_tmp_variable(dtype=dtype)
helper.append_op(
......@@ -888,14 +1080,27 @@ def fill_constant(shape, dtype, value, main_program=None):
def ones(shape, dtype, main_program=None):
"""
This function performs the same function as fill_constant() declared above
with the constant value being 1.0.
"""
return fill_constant(value=1.0, **locals())
def zeros(shape, dtype, main_program=None):
"""
This function performs the same function as fill_constant() declared above
with the constant value being 0.0.
"""
return fill_constant(value=0.0, **locals())
def increment(x, value=1.0, in_place=True, main_program=None):
"""
This function creates an operator to increment each value in the input
`x` by an amount: `value` as mentioned in the input parameter. This
operation is performed in-place by default.
"""
helper = LayerHelper("increment", **locals())
if in_place:
out = x
......@@ -910,6 +1115,10 @@ def increment(x, value=1.0, in_place=True, main_program=None):
def array_write(x, i, array=None, main_program=None):
"""
This function creates an operator to write the data out as a
LOD_TENSOR_ARRAY.
"""
helper = LayerHelper('array_write', **locals())
if array is None:
array = helper.create_variable(
......@@ -925,6 +1134,10 @@ def array_write(x, i, array=None, main_program=None):
def array_read(array, i, main_program=None):
"""
This function creates an operator to read the data in as a
LOD_TENSOR_ARRAY.
"""
helper = LayerHelper('array_read', **locals())
if not isinstance(
array,
......@@ -940,6 +1153,10 @@ def array_read(array, i, main_program=None):
def shrink_memory(x, i, table, main_program=None):
"""
This function creates an operator to shrink_rnn_memory using the RankTable
as mentioned in the input parameter.
"""
helper = LayerHelper('shrink_memory', **locals())
out = helper.create_tmp_variable(dtype=x.data_type)
helper.append_op(
......@@ -953,6 +1170,10 @@ def shrink_memory(x, i, table, main_program=None):
def array_length(array, main_program=None):
"""
This function creates an operator to find the length of the
LOD_TENSOR_ARRAY.
"""
helper = LayerHelper('array_length', **locals())
tmp = helper.create_tmp_variable(dtype='int64')
tmp.stop_gradient = True
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册