提交 88cb8eea 编写于 作者: Y Yu Yang

Complete documentation for v2.

上级 fd41a87a
API API
=== ===
\ No newline at end of file
模型配置 API
------------
.. toctree::
:maxdepth: 1
v2/model_configs.rst
数据 API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
训练 API
--------
.. toctree::
:maxdepth: 1
v2/run_logic.rst
\ No newline at end of file
...@@ -16,3 +16,11 @@ Data API ...@@ -16,3 +16,11 @@ Data API
:maxdepth: 1 :maxdepth: 1
v2/data.rst v2/data.rst
Train API
---------
.. toctree::
:maxdepth: 1
v2/run_logic.rst
\ No newline at end of file
...@@ -2,5 +2,80 @@ ...@@ -2,5 +2,80 @@
DataTypes DataTypes
######### #########
.. automodule:: paddle.v2.data_type .. automodule:: paddle.v2.data_type
:members: :members:
##########
DataFeeder
##########
.. automodule:: paddle.v2.data_feeder
:members:
######
Reader
######
.. automodule:: paddle.v2.reader
:members:
.. automodule:: paddle.v2.reader.creator
:members:
#######
Dataset
#######
.. automodule:: paddle.v2.dataset
:members:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
...@@ -33,3 +33,10 @@ Networks ...@@ -33,3 +33,10 @@ Networks
.. automodule:: paddle.v2.networks .. automodule:: paddle.v2.networks
:members: :members:
==========
Optimizers
==========
.. automodule:: paddle.v2.optimizers
:members:
==========
Parameters
==========
.. automodule:: paddle.v2.parameters
:members:
=======
Trainer
=======
.. automodule:: paddle.v2.trainer
:members:
=====
Event
=====
.. automodule:: paddle.v2.event
:members:
...@@ -23,19 +23,19 @@ An example implementation for single item data reader creator: ...@@ -23,19 +23,19 @@ An example implementation for single item data reader creator:
```python ```python
def reader_creator_random_image(width, height): def reader_creator_random_image(width, height):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height) yield numpy.random.uniform(-1, 1, size=width*height)
return reader return reader
``` ```
An example implementation for multiple item data reader creator: An example implementation for multiple item data reader creator:
```python ```python
def reader_creator_random_imageand_label(widht, height, label): def reader_creator_random_image_and_label(width, height, label):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height), label yield numpy.random.uniform(-1, 1, size=width*height), label
return reader return reader
``` ```
## Batch Reader Interface ## Batch Reader Interface
...@@ -74,11 +74,11 @@ mnist_train_batch_reader = paddle.batch(mnist_train, 128) ...@@ -74,11 +74,11 @@ mnist_train_batch_reader = paddle.batch(mnist_train, 128)
Also easy to create custom batch reader: Also easy to create custom batch reader:
```python ```python
def custom_batch_reader(): def custom_batch_reader():
while True: while True:
batch = [] batch = []
for i in xrange(128): for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended. batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch yield batch
mnist_random_image_batch_reader = custom_batch_reader mnist_random_image_batch_reader = custom_batch_reader
``` ```
...@@ -123,16 +123,16 @@ We can do: ...@@ -123,16 +123,16 @@ We can do:
```python ```python
def reader_creator_random_image(width, height): def reader_creator_random_image(width, height):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height) yield numpy.random.uniform(-1, 1, size=width*height)
return reader return reader
def reader_creator_bool(t): def reader_creator_bool(t):
def reader: def reader:
while True: while True:
yield t yield t
return reader return reader
true_reader = reader_creator_bool(True) true_reader = reader_creator_bool(True)
false_reader = reader_creator_bool(False) false_reader = reader_creator_bool(False)
...@@ -172,18 +172,18 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag ...@@ -172,18 +172,18 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
```python ```python
def image_reader_creator(image_path, label_path, n): def image_reader_creator(image_path, label_path, n):
def reader(): def reader():
f = open(image_path) f = open(image_path)
l = open(label_path) l = open(label_path)
images = numpy.fromfile( images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0 images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n): for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time yield images[i, :], labels[i] # a single entry of data is created each time
f.close() f.close()
l.close() l.close()
return reader return reader
# images_reader_creator creates a reader # images_reader_creator creates a reader
reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
...@@ -196,7 +196,7 @@ An example implementation of paddle.train could be: ...@@ -196,7 +196,7 @@ An example implementation of paddle.train could be:
```python ```python
def train(batch_reader, mapping, batch_size, total_pass): def train(batch_reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass): for pass_idx in range(total_pass):
for mini_batch in batch_reader(): # this loop will never end in online learning. for mini_batch in batch_reader(): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping) do_forward_backward(mini_batch, mapping)
``` ```
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from py_paddle import swig_paddle
from py_paddle import DataProviderConverter from py_paddle import DataProviderConverter
import data_type import data_type
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
...@@ -29,7 +29,10 @@ class DataFeeder(DataProviderConverter): ...@@ -29,7 +29,10 @@ class DataFeeder(DataProviderConverter):
to feed it to C++ interface. to feed it to C++ interface.
The example usage: The example usage:
.. code-block:: python
data_types = [('image', paddle.data_type.dense_vector(784)), data_types = [('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))] ('label', paddle.data_type.integer_value(10))]
reader_dict = {'image':0, 'label':1} reader_dict = {'image':0, 'label':1}
...@@ -43,20 +46,24 @@ class DataFeeder(DataProviderConverter): ...@@ -43,20 +46,24 @@ class DataFeeder(DataProviderConverter):
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ] # ]
arg = feeder(minibatch_data) arg = feeder(minibatch_data)
.. note::
This module is for internal use only. Users should use the `reader`
interface.
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type).
:type data_types: list
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict
""" """
def __init__(self, data_types, reader_dict): def __init__(self, data_types, reader_dict):
"""
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type). For example:
[('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
:type data_types: A list of tuple
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict()
"""
self.input_names = [] self.input_names = []
input_types = [] input_types = []
self.reader_dict = reader_dict self.reader_dict = reader_dict
...@@ -70,22 +77,12 @@ class DataFeeder(DataProviderConverter): ...@@ -70,22 +77,12 @@ class DataFeeder(DataProviderConverter):
""" """
:param dat: A list of mini-batch data. Each sample is a list or tuple :param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features. one feature or multiple features.
for example:
[ :type dat: list
([0.2, 0.2], ), # first sample
([0.8, 0.3], ), # second sample
]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List
:param argument: An Arguments object contains this mini-batch data with :param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is one or multiple features. The Arguments definition is
in the API. in the API.
:type argument: swig_paddle.Arguments :type argument: py_paddle.swig_paddle.Arguments
""" """
def reorder_data(data): def reorder_data(data):
......
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Dataset package.
"""
import mnist import mnist
import imikolov import imikolov
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
""" """
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
TODO(yuyang18): Complete the comments.
""" """
import cPickle import cPickle
......
...@@ -16,15 +16,17 @@ import tarfile ...@@ -16,15 +16,17 @@ import tarfile
import gzip import gzip
import itertools import itertools
from common import download from common import download
__all__ = ['test, get_dict', 'get_embedding']
""" """
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
dataset as an example. Because Conll 2005 is not free in public, the default dataset as an example. Because Conll 2005 is not free in public, the default
downloaded URL is test set of Conll 2005 (which is public). Users can change downloaded URL is test set of Conll 2005 (which is public). Users can change
URL and MD5 to their Conll dataset. URL and MD5 to their Conll dataset.
TODO(yuyang18): Complete comments.
""" """
__all__ = ['test, get_dict', 'get_embedding']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc' DATA_MD5 = '387719152ae52d60422c016e92a742fc'
WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
""" """
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
TODO(yuyang18): Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
""" """
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
......
...@@ -13,6 +13,9 @@ ...@@ -13,6 +13,9 @@
# limitations under the License. # limitations under the License.
""" """
MNIST dataset. MNIST dataset.
This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
parse train set and test set into paddle reader creators.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import subprocess import subprocess
...@@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size): ...@@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size):
def train(): def train():
"""
MNIST train set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Train reader creator
:rtype: callable
"""
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
TRAIN_IMAGE_MD5), TRAIN_IMAGE_MD5),
...@@ -80,6 +92,15 @@ def train(): ...@@ -80,6 +92,15 @@ def train():
def test(): def test():
"""
MNIST test set cretor.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Test reader creator.
:rtype: callable
"""
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
TEST_IMAGE_MD5), TEST_IMAGE_MD5),
......
...@@ -11,6 +11,11 @@ ...@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Movielens 1-M dataset.
TODO(yuyang18): Complete comments.
"""
import zipfile import zipfile
from common import download from common import download
......
...@@ -15,18 +15,19 @@ ...@@ -15,18 +15,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
The script fetch and preprocess movie_reviews data set The script fetch and preprocess movie_reviews data set that provided by NLTK
that provided by NLTK TODO(yuyang18): Complete dataset.
""" """
import common
import collections import collections
import nltk
import numpy as np
from itertools import chain from itertools import chain
import nltk
from nltk.corpus import movie_reviews from nltk.corpus import movie_reviews
import common
__all__ = ['train', 'test', 'get_word_dict'] __all__ = ['train', 'test', 'get_word_dict']
NUM_TRAINING_INSTANCES = 1600 NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000 NUM_TOTAL_INSTANCES = 2000
......
...@@ -11,6 +11,11 @@ ...@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
UCI Housing dataset.
TODO(yuyang18): Complete comments.
"""
import numpy as np import numpy as np
import os import os
......
...@@ -34,6 +34,10 @@ class WithMetric(object): ...@@ -34,6 +34,10 @@ class WithMetric(object):
class TestResult(WithMetric): class TestResult(WithMetric):
"""
Result that trainer.test return.
"""
def __init__(self, evaluator, cost): def __init__(self, evaluator, cost):
super(TestResult, self).__init__(evaluator) super(TestResult, self).__init__(evaluator)
self.cost = cost self.cost = cost
......
import py_paddle.swig_paddle as swig_api import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.optimizers as v1_optimizers
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.v2 import paddle.trainer_config_helpers.optimizers as v1_optimizers
"""
Optimizers(update equation) for SGD method.
TODO(yuyang18): Complete comments.
"""
__all__ = [ __all__ = [
'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
...@@ -44,7 +49,7 @@ class Optimizer(object): ...@@ -44,7 +49,7 @@ class Optimizer(object):
class Momentum(Optimizer): class Momentum(Optimizer):
def __init__(self, momentum=None, sparse=False, **kwargs): def __init__(self, momentum=None, sparse=False, **kwargs):
learning_method = v1_optimizers.MomentumOptimizer( learning_method = v1_optimizers.MomentumOptimizer(
momentum=None, sparse=False) momentum=momentum, sparse=sparse)
super(Momentum, self).__init__( super(Momentum, self).__init__(
learning_method=learning_method, **kwargs) learning_method=learning_method, **kwargs)
......
...@@ -10,6 +10,7 @@ __all__ = ['Parameters', 'create'] ...@@ -10,6 +10,7 @@ __all__ = ['Parameters', 'create']
def create(layers): def create(layers):
""" """
Create parameter pool by topology. Create parameter pool by topology.
:param layers: :param layers:
:return: :return:
""" """
...@@ -67,6 +68,7 @@ class Parameters(object): ...@@ -67,6 +68,7 @@ class Parameters(object):
def keys(self): def keys(self):
""" """
keys are the names of each parameter. keys are the names of each parameter.
:return: list of parameter name :return: list of parameter name
:rtype: list :rtype: list
""" """
...@@ -75,6 +77,7 @@ class Parameters(object): ...@@ -75,6 +77,7 @@ class Parameters(object):
def names(self): def names(self):
""" """
names of each parameter. names of each parameter.
:return: list of parameter name :return: list of parameter name
:rtype: list :rtype: list
""" """
...@@ -83,6 +86,7 @@ class Parameters(object): ...@@ -83,6 +86,7 @@ class Parameters(object):
def has_key(self, key): def has_key(self, key):
""" """
has_key return true if there are such parameter name == key has_key return true if there are such parameter name == key
:param key: Parameter name :param key: Parameter name
:type key: basestring :type key: basestring
:return: True if contains such key :return: True if contains such key
...@@ -136,6 +140,7 @@ class Parameters(object): ...@@ -136,6 +140,7 @@ class Parameters(object):
def get_shape(self, key): def get_shape(self, key):
""" """
get shape of the parameter. get shape of the parameter.
:param key: parameter name :param key: parameter name
:type key: basestring :type key: basestring
:return: parameter's shape :return: parameter's shape
...@@ -190,6 +195,7 @@ class Parameters(object): ...@@ -190,6 +195,7 @@ class Parameters(object):
def set(self, parameter_name, value): def set(self, parameter_name, value):
""" """
Set parameter by parameter name & matrix. Set parameter by parameter name & matrix.
:param parameter_name: parameter name :param parameter_name: parameter name
:type parameter_name: basestring :type parameter_name: basestring
:param value: parameter matrix :param value: parameter matrix
......
...@@ -11,15 +11,64 @@ ...@@ -11,15 +11,64 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
At training and testing time, PaddlePaddle programs need to read data. To ease
the users' work to write data reading code, we define that
# It would be too lengthy to require our users to prefix decorators with `decorator`. - A *reader* is a function that reads data (from file, network, random number
# For example, we want the following line generator, etc) and yields data items.
# - A *reader creator* is a function that returns a reader function.
# r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt")) - A *reader decorator* is a function, which accepts one or more readers, and
# returns a reader.
# to be a shorter version: - A *batch reader* is a function that reads data (from *reader*, file, network,
# random number generator, etc) and yields a batch of data items.
# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt"))
#####################
Data Reader Interface
#####################
Indeed, *data reader* doesn't have to be a function that reads and yields data
items. It can be any function with no parameter that creates a iterable
(anything can be used in :code:`for x in iterable`)\:
.. code-block:: python
iterable = data_reader()
Element produced from the iterable should be a **single** entry of data,
**not** a mini batch. That entry of data could be a single item, or a tuple of
items.
Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider
/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d
array of float32, int, list of int)
An example implementation for single item data reader creator:
.. code-block:: python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
An example implementation for multiple item data reader creator:
.. code-block:: python
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
TODO(yuyang18): Should we add whole design doc here?
"""
import decorator
from decorator import * from decorator import *
import creator import creator
__all__ = decorator.__all__ + ['creator']
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Creator package contains some simple reader creator, which could be used in user
program.
"""
__all__ = ['np_array', 'text_file'] __all__ = ['np_array', 'text_file']
...@@ -38,7 +42,7 @@ def np_array(x): ...@@ -38,7 +42,7 @@ def np_array(x):
def text_file(path): def text_file(path):
""" """
Creates a data reader that outputs text line by line from given text file. Creates a data reader that outputs text line by line from given text file.
Trailing new line ('\n') of each line will be removed. Trailing new line ('\\\\n') of each line will be removed.
:path: path of the text file. :path: path of the text file.
:returns: data reader of text file :returns: data reader of text file
......
...@@ -12,25 +12,27 @@ ...@@ -12,25 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'batched', 'firstn'
]
import itertools import itertools
import random import random
from Queue import Queue from Queue import Queue
from threading import Thread from threading import Thread
__all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'batched', 'firstn'
]
def map_readers(func, *readers): def map_readers(func, *readers):
""" """
Creates a data reader that outputs return value of function using Creates a data reader that outputs return value of function using
output of each data readers as arguments. output of each data readers as arguments.
:param func: function to use. :param func: function to use. The type of func should be (Sample) => Sample
:param *readers: readers whose outputs will be used as arguments of func. :type: callable
:returns: the created data reader. :param readers: readers whose outputs will be used as arguments of func.
:return: the created data reader.
:rtype: callable
""" """
def reader(): def reader():
...@@ -45,16 +47,19 @@ def map_readers(func, *readers): ...@@ -45,16 +47,19 @@ def map_readers(func, *readers):
def shuffle(reader, buf_size): def shuffle(reader, buf_size):
""" """
Creates a data reader whose data output is suffled. Creates a data reader whose data output is shuffled.
Output from the iterator that created by original reader will be Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size. is determined by argument buf_size.
:param reader: the original reader whose output will be shuffled. :param reader: the original reader whose output will be shuffled.
:type reader: callable
:param buf_size: shuffle buffer size. :param buf_size: shuffle buffer size.
:type buf_size: int
:returns:the new reader whose output is shuffled. :return: the new reader whose output is shuffled.
:rtype: callable
""" """
def data_reader(): def data_reader():
...@@ -88,7 +93,8 @@ def chain(*readers): ...@@ -88,7 +93,8 @@ def chain(*readers):
[0, 0, 0, 1, 1, 1, 2, 2, 2] [0, 0, 0, 1, 1, 1, 2, 2, 2]
:param readers: input readers. :param readers: input readers.
:returns: the new data reader. :return: the new data reader.
:rtype: callable
""" """
def reader(): def reader():
...@@ -115,12 +121,13 @@ def compose(*readers, **kwargs): ...@@ -115,12 +121,13 @@ def compose(*readers, **kwargs):
The composed reader will output: The composed reader will output:
(1, 2, 3, 4, 5) (1, 2, 3, 4, 5)
:*readers: readers that will be composed together. :param readers: readers that will be composed together.
:check_alignment: if True, will check if input readers are aligned :param check_alignment: if True, will check if input readers are aligned
correctly. If False, will not check alignment and trailing outputs correctly. If False, will not check alignment and trailing outputs
will be discarded. Defaults to True. will be discarded. Defaults to True.
:type check_alignment: bool
:returns: the new data reader. :return: the new data reader.
:raises ComposeNotAligned: outputs of readers are not aligned. :raises ComposeNotAligned: outputs of readers are not aligned.
Will not raise when check_alignment is set to False. Will not raise when check_alignment is set to False.
...@@ -161,7 +168,9 @@ def buffered(reader, size): ...@@ -161,7 +168,9 @@ def buffered(reader, size):
as the buffer is not empty. as the buffer is not empty.
:param reader: the data reader to read from. :param reader: the data reader to read from.
:type reader: callable
:param size: max buffer size. :param size: max buffer size.
:type size: int
:returns: the buffered data reader. :returns: the buffered data reader.
""" """
...@@ -196,9 +205,13 @@ def buffered(reader, size): ...@@ -196,9 +205,13 @@ def buffered(reader, size):
def batched(reader, batch_size): def batched(reader, batch_size):
""" """
Create a batched reader. Create a batched reader.
:param reader: the data reader to read from. :param reader: the data reader to read from.
:param batch_size: batch_size :type reader: callable
:param batch_size: size of each mini-batch
:type batch_size: int
:return: the batched reader. :return: the batched reader.
:rtype: callable
""" """
def batched_reader(): def batched_reader():
...@@ -218,6 +231,13 @@ def batched(reader, batch_size): ...@@ -218,6 +231,13 @@ def batched(reader, batch_size):
def firstn(reader, n): def firstn(reader, n):
""" """
Limit the max number of samples that reader could return. Limit the max number of samples that reader could return.
:param reader: the data reader to read from.
:type reader: callable
:param n: the max number of samples that return.
:type n: int
:return: the decorated reader.
:rtype: callable
""" """
# TODO(yuyang18): Check if just drop the reader, could clean the opened # TODO(yuyang18): Check if just drop the reader, could clean the opened
......
...@@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer ...@@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
__all__ = ['SGD'] __all__ = ['SGD']
"""
Trainer package
TODO(yuyang18): Complete comments.
"""
def default_event_handler(event): def default_event_handler(event):
...@@ -22,14 +26,20 @@ def default_event_handler(event): ...@@ -22,14 +26,20 @@ def default_event_handler(event):
pass pass
class SGD(): class SGD(object):
def __init__(self, cost, parameters, update_equation): """
""" Simple SGD Trainer.
Simple SGD Trainer. TODO(yuyang18): Complete comments
:param update_equation: The optimizer object.
:type update_equation: paddle.v2.optimizer.Optimizer
:param cost: Target cost that neural network should be optimized.
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
"""
:param update_equation: The optimizer object. def __init__(self, cost, parameters, update_equation):
:type update_equation: v2_optimizer.Optimizer
"""
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters') raise TypeError('parameters should be parameters')
...@@ -56,8 +66,6 @@ class SGD(): ...@@ -56,8 +66,6 @@ class SGD():
Training method. Will train num_passes of input data. Training method. Will train num_passes of input data.
:param reader: :param reader:
:param topology: Network Topology, use one or more Layers to represent it.
:param parameters: The parameter pools.
:param num_passes: The total train passes. :param num_passes: The total train passes.
:param event_handler: Event handler. A method will be invoked when event :param event_handler: Event handler. A method will be invoked when event
occurred. occurred.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册