diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst index b7f470e1f8a9a1c720e7d70832ec069339ddc60f..deeeed26fa6ee24451dff6dad619438440ed60e8 100644 --- a/doc/api/index_en.rst +++ b/doc/api/index_en.rst @@ -7,4 +7,12 @@ Model Config API .. toctree:: :maxdepth: 1 - v2/model_configs.rst \ No newline at end of file + v2/model_configs.rst + +Data API +-------- + +.. toctree:: + :maxdepth: 1 + + v2/data.rst diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst new file mode 100644 index 0000000000000000000000000000000000000000..65e57f2344bd5ee6a68b491d2eb8317a61852422 --- /dev/null +++ b/doc/api/v2/data.rst @@ -0,0 +1,6 @@ +######### +DataTypes +######### + +.. automodule:: paddle.v2.data_type + :members: diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py index 4e3c4db853205bb12272e86295784a6069483ffe..0e752c117c1ecfab72e2da2f830380e9524236e7 100644 --- a/python/paddle/trainer/PyDataProvider2.py +++ b/python/paddle/trainer/PyDataProvider2.py @@ -45,6 +45,23 @@ class CacheType(object): class InputType(object): + """ + InputType is the base class for paddle input types. + + .. note:: + + this is a base class, and should never be used by user. + + :param dim: dimension of input. If the input is an integer, it means the + value range. Otherwise, it means the size of layer. + :type dim: int + :param seq_type: sequence type of input. 0 means it is not a sequence. 1 + means it is a variable length sequence. 2 means it is a + nested sequence. + :type seq_type: int + :param type: data type of input. + :type type: int + """ __slots__ = ['dim', 'seq_type', 'type'] def __init__(self, dim, seq_type, tp): @@ -54,20 +71,61 @@ class InputType(object): def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE): + """ + Dense Vector. It means the input feature is dense float vector. For example, + if the input is an image with 28*28 pixels, the input of Paddle neural + network should be a dense vector with dimension 784. + + :param dim: dimension of this vector. + :type dim: int + :param seq_type: sequence type of input. + :type seq_type: int + :return: An input type object. + :rtype: InputType + """ return InputType(dim, seq_type, DataType.Dense) def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): + """ + Sparse binary vector. It means the input feature is a sparse vector and the + every element in this vector is either zero or one. + + :param dim: dimension of this vector. + :type dim: int + :param seq_type: sequence type of this input. + :type seq_type: int + :return: An input type object. + :rtype: InputType + """ return InputType(dim, seq_type, DataType.SparseNonValue) def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): + """ + Sparse vector. It means the input feature is a sparse vector. Most of the + elements in this vector are zero, others could be any float value. + + :param dim: dimension of this vector. + :type dim: int + :param seq_type: sequence type of this input. + :type seq_type: int + :return: An input type object. + :rtype: InputType + """ return InputType(dim, seq_type, DataType.SparseValue) def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE): - """Data type of integer. + """ + Data type of integer. + + :param seq_type: sequence type of this input. + :type seq_type: int :param value_range: range of this integer. + :type value_range: int + :return: An input type object + :rtype: InputType """ return InputType(value_range, seq_type, DataType.Index) @@ -76,10 +134,17 @@ dense_vector = dense_slot sparse_binary_vector = sparse_non_value_slot sparse_vector = sparse_value_slot integer_value = index_slot -integer_value.__doc__ = index_slot.__doc__ def dense_vector_sequence(dim): + """ + Data type of a sequence of dense vector. + + :param dim: dimension of dense vector. + :type dim: int + :return: An input type object + :rtype: InputType + """ return dense_vector(dim, seq_type=SequenceType.SEQUENCE) @@ -88,6 +153,15 @@ def dense_vector_sub_sequence(dim): def sparse_binary_vector_sequence(dim): + """ + Data type of a sequence of sparse vector, which every element is either zero + or one. + + :param dim: dimension of sparse vector. + :type dim: int + :return: An input type object + :rtype: InputType + """ return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE) @@ -96,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim): def sparse_vector_sequence(dim): + """ + Data type of a sequence of sparse vector, which most elements are zero, + others could be any float value. + + :param dim: dimension of sparse vector. + :type dim: int + :return: An input type object + :rtype: InputType + """ return sparse_vector(dim, seq_type=SequenceType.SEQUENCE) @@ -104,8 +187,11 @@ def sparse_vector_sub_sequence(dim): def integer_value_sequence(value_range): - """Data type of a sequence of integer. + """ + Data type of a sequence of integer. + :param value_range: range of each element. + :type value_range: int """ return integer_value(value_range, seq_type=SequenceType.SEQUENCE) @@ -115,7 +201,6 @@ def integer_value_sub_sequence(dim): integer_sequence = integer_value_sequence -integer_sequence.__doc__ = integer_value_sequence.__doc__ class SingleSlotWrapper(object): diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py index 522ddfdaacce44be7cf27bdbfc1009d4a0c0bbe6..d582f76ddf01ed3430a1d075624bbb8e0bf3f2a9 100644 --- a/python/paddle/v2/data_type.py +++ b/python/paddle/v2/data_type.py @@ -12,11 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.trainer.PyDataProvider2 import \ - InputType, DataType, dense_vector, sparse_binary_vector,\ - sparse_vector, integer_value, integer_value_sequence +import paddle.trainer.PyDataProvider2 as pydp2 -__all__ = [ - 'InputType', 'DataType', 'dense_vector', 'sparse_binary_vector', - 'sparse_vector', 'integer_value', 'integer_value_sequence' +import_list = [ + nm for nm in dir(pydp2) + if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm) ] +import_list.extend(['InputType']) + +for nm in import_list: + globals()[nm] = getattr(pydp2, nm) + +__all__ = import_list