From 88cb8eeac4b62d32ef74708e7658effb5f96b53e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sun, 5 Mar 2017 12:07:39 +0800 Subject: [PATCH] Complete documentation for v2. --- doc/api/index_cn.rst | 26 +++++++- doc/api/index_en.rst | 8 +++ doc/api/v2/data.rst | 79 ++++++++++++++++++++++++- doc/api/v2/model_configs.rst | 7 +++ doc/api/v2/run_logic.rst | 22 +++++++ doc/design/reader/README.md | 74 +++++++++++------------ python/paddle/v2/data_feeder.py | 49 +++++++-------- python/paddle/v2/dataset/__init__.py | 3 + python/paddle/v2/dataset/cifar.py | 2 + python/paddle/v2/dataset/conll05.py | 6 +- python/paddle/v2/dataset/imdb.py | 2 + python/paddle/v2/dataset/imikolov.py | 2 + python/paddle/v2/dataset/mnist.py | 21 +++++++ python/paddle/v2/dataset/movielens.py | 5 ++ python/paddle/v2/dataset/sentiment.py | 11 ++-- python/paddle/v2/dataset/uci_housing.py | 5 ++ python/paddle/v2/event.py | 4 ++ python/paddle/v2/optimizer.py | 11 +++- python/paddle/v2/parameters.py | 6 ++ python/paddle/v2/reader/__init__.py | 65 +++++++++++++++++--- python/paddle/v2/reader/creator.py | 6 +- python/paddle/v2/reader/decorator.py | 50 +++++++++++----- python/paddle/v2/trainer.py | 26 +++++--- 23 files changed, 381 insertions(+), 109 deletions(-) create mode 100644 doc/api/v2/run_logic.rst diff --git a/doc/api/index_cn.rst b/doc/api/index_cn.rst index 874dd9cb227..fca981221e4 100644 --- a/doc/api/index_cn.rst +++ b/doc/api/index_cn.rst @@ -1,2 +1,26 @@ API -=== \ No newline at end of file +=== + +模型配置 API +------------ + +.. toctree:: + :maxdepth: 1 + + v2/model_configs.rst + +数据 API +-------- + +.. toctree:: + :maxdepth: 1 + + v2/data.rst + +训练 API +-------- + +.. toctree:: + :maxdepth: 1 + + v2/run_logic.rst \ No newline at end of file diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst index deeeed26fa6..f0ad0fb2aee 100644 --- a/doc/api/index_en.rst +++ b/doc/api/index_en.rst @@ -16,3 +16,11 @@ Data API :maxdepth: 1 v2/data.rst + +Train API +--------- + +.. toctree:: + :maxdepth: 1 + + v2/run_logic.rst \ No newline at end of file diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst index 65e57f2344b..49f70b3671b 100644 --- a/doc/api/v2/data.rst +++ b/doc/api/v2/data.rst @@ -2,5 +2,80 @@ DataTypes ######### -.. automodule:: paddle.v2.data_type - :members: +.. automodule:: paddle.v2.data_type + :members: + +########## +DataFeeder +########## + +.. automodule:: paddle.v2.data_feeder + :members: + +###### +Reader +###### + +.. automodule:: paddle.v2.reader + :members: + +.. automodule:: paddle.v2.reader.creator + :members: + +####### +Dataset +####### + +.. automodule:: paddle.v2.dataset + :members: + + +mnist ++++++ + +.. automodule:: paddle.v2.dataset.mnist + :members: + + +cifar ++++++ + +.. automodule:: paddle.v2.dataset.cifar + :members: + +conll05 ++++++++ + +.. automodule:: paddle.v2.dataset.conll05 + :members: + +imdb +++++ + +.. automodule:: paddle.v2.dataset.imdb + :members: + +imikolov +++++++++ + +.. automodule:: paddle.v2.dataset.imikolov + :members: + +movielens ++++++++++ + +.. automodule:: paddle.v2.dataset.movielens + :members: + +sentiment ++++++++++ + +.. automodule:: paddle.v2.dataset.sentiment + :members: + +uci_housing ++++++++++++ + +.. automodule:: paddle.v2.dataset.uci_housing + :members: + diff --git a/doc/api/v2/model_configs.rst b/doc/api/v2/model_configs.rst index b848bd7045a..52e2dcd5305 100644 --- a/doc/api/v2/model_configs.rst +++ b/doc/api/v2/model_configs.rst @@ -33,3 +33,10 @@ Networks .. automodule:: paddle.v2.networks :members: + +========== +Optimizers +========== + +.. automodule:: paddle.v2.optimizers + :members: diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst new file mode 100644 index 00000000000..522093f8aee --- /dev/null +++ b/doc/api/v2/run_logic.rst @@ -0,0 +1,22 @@ +========== +Parameters +========== + +.. automodule:: paddle.v2.parameters + :members: + + +======= +Trainer +======= + +.. automodule:: paddle.v2.trainer + :members: + + +===== +Event +===== + +.. automodule:: paddle.v2.event + :members: diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md index 03119fdd745..f21f7af520d 100644 --- a/doc/design/reader/README.md +++ b/doc/design/reader/README.md @@ -23,19 +23,19 @@ An example implementation for single item data reader creator: ```python def reader_creator_random_image(width, height): - def reader(): - while True: - yield numpy.random.uniform(-1, 1, size=width*height) - return reader + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader ``` An example implementation for multiple item data reader creator: ```python -def reader_creator_random_imageand_label(widht, height, label): - def reader(): - while True: - yield numpy.random.uniform(-1, 1, size=width*height), label - return reader +def reader_creator_random_image_and_label(width, height, label): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height), label + return reader ``` ## Batch Reader Interface @@ -74,11 +74,11 @@ mnist_train_batch_reader = paddle.batch(mnist_train, 128) Also easy to create custom batch reader: ```python def custom_batch_reader(): - while True: - batch = [] - for i in xrange(128): - batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended. - yield batch + while True: + batch = [] + for i in xrange(128): + batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended. + yield batch mnist_random_image_batch_reader = custom_batch_reader ``` @@ -123,16 +123,16 @@ We can do: ```python def reader_creator_random_image(width, height): - def reader(): - while True: - yield numpy.random.uniform(-1, 1, size=width*height) - return reader + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader def reader_creator_bool(t): - def reader: - while True: - yield t - return reader + def reader: + while True: + yield t + return reader true_reader = reader_creator_bool(True) false_reader = reader_creator_bool(False) @@ -172,18 +172,18 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag ```python def image_reader_creator(image_path, label_path, n): - def reader(): - f = open(image_path) - l = open(label_path) - images = numpy.fromfile( - f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') - images = images / 255.0 * 2.0 - 1.0 - labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") - for i in xrange(n): - yield images[i, :], labels[i] # a single entry of data is created each time - f.close() - l.close() - return reader + def reader(): + f = open(image_path) + l = open(label_path) + images = numpy.fromfile( + f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') + images = images / 255.0 * 2.0 - 1.0 + labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") + for i in xrange(n): + yield images[i, :], labels[i] # a single entry of data is created each time + f.close() + l.close() + return reader # images_reader_creator creates a reader reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) @@ -196,7 +196,7 @@ An example implementation of paddle.train could be: ```python def train(batch_reader, mapping, batch_size, total_pass): - for pass_idx in range(total_pass): - for mini_batch in batch_reader(): # this loop will never end in online learning. - do_forward_backward(mini_batch, mapping) + for pass_idx in range(total_pass): + for mini_batch in batch_reader(): # this loop will never end in online learning. + do_forward_backward(mini_batch, mapping) ``` diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index 3b106e100cf..b7465238be8 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from py_paddle import swig_paddle from py_paddle import DataProviderConverter + import data_type __all__ = ['DataFeeder'] @@ -29,7 +29,10 @@ class DataFeeder(DataProviderConverter): to feed it to C++ interface. The example usage: - + + + .. code-block:: python + data_types = [('image', paddle.data_type.dense_vector(784)), ('label', paddle.data_type.integer_value(10))] reader_dict = {'image':0, 'label':1} @@ -43,20 +46,24 @@ class DataFeeder(DataProviderConverter): # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample # ] arg = feeder(minibatch_data) + + .. note:: + + This module is for internal use only. Users should use the `reader` + interface. + + + + :param data_types: A list to specify data name and type. Each item is + a tuple of (data_name, data_type). + + :type data_types: list + :param reader_dict: A dictionary to specify the position of each data + in the input data. + :type reader_dict: dict """ def __init__(self, data_types, reader_dict): - """ - :param data_types: A list to specify data name and type. Each item is - a tuple of (data_name, data_type). For example: - [('image', paddle.data_type.dense_vector(784)), - ('label', paddle.data_type.integer_value(10))] - - :type data_types: A list of tuple - :param reader_dict: A dictionary to specify the position of each data - in the input data. - :type reader_dict: dict() - """ self.input_names = [] input_types = [] self.reader_dict = reader_dict @@ -70,22 +77,12 @@ class DataFeeder(DataProviderConverter): """ :param dat: A list of mini-batch data. Each sample is a list or tuple one feature or multiple features. - for example: - [ - ([0.2, 0.2], ), # first sample - ([0.8, 0.3], ), # second sample - ] - or, - [ - [[0.2, 0.2], ], # first sample - [[0.8, 0.3], ], # second sample - ] - - :type dat: List + + :type dat: list :param argument: An Arguments object contains this mini-batch data with one or multiple features. The Arguments definition is in the API. - :type argument: swig_paddle.Arguments + :type argument: py_paddle.swig_paddle.Arguments """ def reorder_data(data): diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 82f11a7c411..9ef0277609d 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +Dataset package. +""" import mnist import imikolov diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 5c6f5d85567..d9f7a830ee6 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -13,6 +13,8 @@ # limitations under the License. """ CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html + +TODO(yuyang18): Complete the comments. """ import cPickle diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index e96a701c1a9..9eab49ee393 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -16,15 +16,17 @@ import tarfile import gzip import itertools from common import download - -__all__ = ['test, get_dict', 'get_embedding'] """ Conll 2005 dataset. Paddle semantic role labeling Book and demo use this dataset as an example. Because Conll 2005 is not free in public, the default downloaded URL is test set of Conll 2005 (which is public). Users can change URL and MD5 to their Conll dataset. + +TODO(yuyang18): Complete comments. """ +__all__ = ['test, get_dict', 'get_embedding'] + DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index f27756a38a9..76019d9f540 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -13,6 +13,8 @@ # limitations under the License. """ IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz + +TODO(yuyang18): Complete comments. """ import paddle.v2.dataset.common diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index deb556942d9..97c160f111d 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -13,6 +13,8 @@ # limitations under the License. """ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ + +Complete comments. """ import paddle.v2.dataset.common import tarfile diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 6a621a2aaad..16f2fcb99de 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -13,6 +13,9 @@ # limitations under the License. """ MNIST dataset. + +This module will download dataset from http://yann.lecun.com/exdb/mnist/ and +parse train set and test set into paddle reader creators. """ import paddle.v2.dataset.common import subprocess @@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size): def train(): + """ + MNIST train set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Train reader creator + :rtype: callable + """ return reader_creator( paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5), @@ -80,6 +92,15 @@ def train(): def test(): + """ + MNIST test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator( paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5), diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index c22bcfa38b5..dc65e8f8b6f 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -11,6 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +Movielens 1-M dataset. + +TODO(yuyang18): Complete comments. +""" import zipfile from common import download diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index cbd08fa7368..71689fd61b6 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -15,18 +15,19 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -The script fetch and preprocess movie_reviews data set +The script fetch and preprocess movie_reviews data set that provided by NLTK -that provided by NLTK +TODO(yuyang18): Complete dataset. """ -import common import collections -import nltk -import numpy as np from itertools import chain + +import nltk from nltk.corpus import movie_reviews +import common + __all__ = ['train', 'test', 'get_word_dict'] NUM_TRAINING_INSTANCES = 1600 NUM_TOTAL_INSTANCES = 2000 diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index b5a0537af66..27f454b137e 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -11,6 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +UCI Housing dataset. + +TODO(yuyang18): Complete comments. +""" import numpy as np import os diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index a429e36b63c..1ad52b8baa4 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -34,6 +34,10 @@ class WithMetric(object): class TestResult(WithMetric): + """ + Result that trainer.test return. + """ + def __init__(self, evaluator, cost): super(TestResult, self).__init__(evaluator) self.cost = cost diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 10e255dc945..1a01d95c205 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,7 +1,12 @@ import py_paddle.swig_paddle as swig_api -import paddle.trainer_config_helpers.optimizers as v1_optimizers + import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.v2 +import paddle.trainer_config_helpers.optimizers as v1_optimizers +""" +Optimizers(update equation) for SGD method. + +TODO(yuyang18): Complete comments. +""" __all__ = [ 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', @@ -44,7 +49,7 @@ class Optimizer(object): class Momentum(Optimizer): def __init__(self, momentum=None, sparse=False, **kwargs): learning_method = v1_optimizers.MomentumOptimizer( - momentum=None, sparse=False) + momentum=momentum, sparse=sparse) super(Momentum, self).__init__( learning_method=learning_method, **kwargs) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index 2a6026bcab1..7ee388f067c 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -10,6 +10,7 @@ __all__ = ['Parameters', 'create'] def create(layers): """ Create parameter pool by topology. + :param layers: :return: """ @@ -67,6 +68,7 @@ class Parameters(object): def keys(self): """ keys are the names of each parameter. + :return: list of parameter name :rtype: list """ @@ -75,6 +77,7 @@ class Parameters(object): def names(self): """ names of each parameter. + :return: list of parameter name :rtype: list """ @@ -83,6 +86,7 @@ class Parameters(object): def has_key(self, key): """ has_key return true if there are such parameter name == key + :param key: Parameter name :type key: basestring :return: True if contains such key @@ -136,6 +140,7 @@ class Parameters(object): def get_shape(self, key): """ get shape of the parameter. + :param key: parameter name :type key: basestring :return: parameter's shape @@ -190,6 +195,7 @@ class Parameters(object): def set(self, parameter_name, value): """ Set parameter by parameter name & matrix. + :param parameter_name: parameter name :type parameter_name: basestring :param value: parameter matrix diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/v2/reader/__init__.py index 7373dc461b1..3b059735a92 100644 --- a/python/paddle/v2/reader/__init__.py +++ b/python/paddle/v2/reader/__init__.py @@ -11,15 +11,64 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +At training and testing time, PaddlePaddle programs need to read data. To ease +the users' work to write data reading code, we define that -# It would be too lengthy to require our users to prefix decorators with `decorator`. -# For example, we want the following line -# -# r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt")) -# -# to be a shorter version: -# -# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt")) +- A *reader* is a function that reads data (from file, network, random number + generator, etc) and yields data items. +- A *reader creator* is a function that returns a reader function. +- A *reader decorator* is a function, which accepts one or more readers, and + returns a reader. +- A *batch reader* is a function that reads data (from *reader*, file, network, + random number generator, etc) and yields a batch of data items. + +##################### +Data Reader Interface +##################### + +Indeed, *data reader* doesn't have to be a function that reads and yields data +items. It can be any function with no parameter that creates a iterable +(anything can be used in :code:`for x in iterable`)\: + +.. code-block:: python + + iterable = data_reader() + +Element produced from the iterable should be a **single** entry of data, +**not** a mini batch. That entry of data could be a single item, or a tuple of +items. +Item should be of `supported type `_ (e.g., numpy 1d +array of float32, int, list of int) + +An example implementation for single item data reader creator: + +.. code-block:: python + + def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader + +An example implementation for multiple item data reader creator: + +.. code-block:: python + + def reader_creator_random_image_and_label(width, height, label): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height), label + return reader + + +TODO(yuyang18): Should we add whole design doc here? +""" + +import decorator from decorator import * import creator + +__all__ = decorator.__all__ + ['creator'] diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 5a91bb0b8ef..07142056f87 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +Creator package contains some simple reader creator, which could be used in user +program. +""" __all__ = ['np_array', 'text_file'] @@ -38,7 +42,7 @@ def np_array(x): def text_file(path): """ Creates a data reader that outputs text line by line from given text file. - Trailing new line ('\n') of each line will be removed. + Trailing new line ('\\\\n') of each line will be removed. :path: path of the text file. :returns: data reader of text file diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index b7657e27764..40124997247 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -12,25 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -__all__ = [ - 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', - 'ComposeNotAligned', 'batched', 'firstn' -] - import itertools import random from Queue import Queue from threading import Thread +__all__ = [ + 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', + 'ComposeNotAligned', 'batched', 'firstn' +] + def map_readers(func, *readers): """ Creates a data reader that outputs return value of function using output of each data readers as arguments. - :param func: function to use. - :param *readers: readers whose outputs will be used as arguments of func. - :returns: the created data reader. + :param func: function to use. The type of func should be (Sample) => Sample + :type: callable + :param readers: readers whose outputs will be used as arguments of func. + :return: the created data reader. + :rtype: callable """ def reader(): @@ -45,16 +47,19 @@ def map_readers(func, *readers): def shuffle(reader, buf_size): """ - Creates a data reader whose data output is suffled. + Creates a data reader whose data output is shuffled. Output from the iterator that created by original reader will be buffered into shuffle buffer, and then shuffled. The size of shuffle buffer is determined by argument buf_size. :param reader: the original reader whose output will be shuffled. + :type reader: callable :param buf_size: shuffle buffer size. + :type buf_size: int - :returns:the new reader whose output is shuffled. + :return: the new reader whose output is shuffled. + :rtype: callable """ def data_reader(): @@ -88,7 +93,8 @@ def chain(*readers): [0, 0, 0, 1, 1, 1, 2, 2, 2] :param readers: input readers. - :returns: the new data reader. + :return: the new data reader. + :rtype: callable """ def reader(): @@ -115,12 +121,13 @@ def compose(*readers, **kwargs): The composed reader will output: (1, 2, 3, 4, 5) - :*readers: readers that will be composed together. - :check_alignment: if True, will check if input readers are aligned + :param readers: readers that will be composed together. + :param check_alignment: if True, will check if input readers are aligned correctly. If False, will not check alignment and trailing outputs will be discarded. Defaults to True. + :type check_alignment: bool - :returns: the new data reader. + :return: the new data reader. :raises ComposeNotAligned: outputs of readers are not aligned. Will not raise when check_alignment is set to False. @@ -161,7 +168,9 @@ def buffered(reader, size): as the buffer is not empty. :param reader: the data reader to read from. + :type reader: callable :param size: max buffer size. + :type size: int :returns: the buffered data reader. """ @@ -196,9 +205,13 @@ def buffered(reader, size): def batched(reader, batch_size): """ Create a batched reader. + :param reader: the data reader to read from. - :param batch_size: batch_size + :type reader: callable + :param batch_size: size of each mini-batch + :type batch_size: int :return: the batched reader. + :rtype: callable """ def batched_reader(): @@ -218,6 +231,13 @@ def batched(reader, batch_size): def firstn(reader, n): """ Limit the max number of samples that reader could return. + + :param reader: the data reader to read from. + :type reader: callable + :param n: the max number of samples that return. + :type n: int + :return: the decorated reader. + :rtype: callable """ # TODO(yuyang18): Check if just drop the reader, could clean the opened diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 8bcdd122b30..7b706721367 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer from . import parameters as v2_parameters __all__ = ['SGD'] +""" +Trainer package +TODO(yuyang18): Complete comments. +""" def default_event_handler(event): @@ -22,14 +26,20 @@ def default_event_handler(event): pass -class SGD(): - def __init__(self, cost, parameters, update_equation): - """ - Simple SGD Trainer. +class SGD(object): + """ + Simple SGD Trainer. + TODO(yuyang18): Complete comments + + :param update_equation: The optimizer object. + :type update_equation: paddle.v2.optimizer.Optimizer + :param cost: Target cost that neural network should be optimized. + :type cost: paddle.v2.config_base.Layer + :param parameters: The parameters dictionary. + :type parameters: paddle.v2.parameters.Parameters + """ - :param update_equation: The optimizer object. - :type update_equation: v2_optimizer.Optimizer - """ + def __init__(self, cost, parameters, update_equation): if not isinstance(parameters, v2_parameters.Parameters): raise TypeError('parameters should be parameters') @@ -56,8 +66,6 @@ class SGD(): Training method. Will train num_passes of input data. :param reader: - :param topology: Network Topology, use one or more Layers to represent it. - :param parameters: The parameter pools. :param num_passes: The total train passes. :param event_handler: Event handler. A method will be invoked when event occurred. -- GitLab