diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst index b042320bc2922a1ddfa06b5d8479ac9134ae9d89..7fd71e743b8003b22c80d649ee7b5ee531e61422 100644 --- a/doc/api/v2/data.rst +++ b/doc/api/v2/data.rst @@ -1,6 +1,6 @@ -======== -Datasets -======== +================================== +Data Reader Inferface and DataSets +================================== DataTypes diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst index c383e87c8c1f83e36acc06605a24923d265e198b..9088e30b09ffb4b539b4691798f633918f3c37bb 100644 --- a/doc/api/v2/run_logic.rst +++ b/doc/api/v2/run_logic.rst @@ -26,6 +26,12 @@ Event Inference ========= -.. autofunction:: paddle.v2.infer +.. automodule:: paddle.v2.inference :members: Inference :noindex: + +.. autofunction:: paddle.v2.infer + :members: + :noindex: + + diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 3a8b98b8f045b0eb58be69649486cbd0a571f118..d8554d4d8e5b3343b993857abeb025e9ebfe4db9 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -12,9 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html +CIFAR dataset. + +This module will download dataset from https://www.cs.toronto.edu/~kriz/cifar.html and +parse train set and test set into paddle reader creators. + +The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 +images per class. There are 50000 training images and 10000 test images. + +The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes containing +600 images each. There are 500 training images and 100 testing images per class. -TODO(yuyang18): Complete the comments. """ import cPickle @@ -54,20 +62,56 @@ def reader_creator(filename, sub_name): def train100(): + """ + CIFAR-100 train set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 99]. + + :return: Train reader creator + :rtype: callable + """ return reader_creator( download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') def test100(): + """ + CIFAR-100 test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') def train10(): + """ + CIFAR-10 train set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Train reader creator + :rtype: callable + """ return reader_creator( download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') def test10(): + """ + CIFAR-10 test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator( download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index f1b0ce16f21ad13d4564242c2359355236093032..854b20f0c3573363c461932eb552263213f9b697 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -11,11 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import tarfile -import gzip -import itertools -from common import download """ Conll 2005 dataset. Paddle semantic role labeling Book and demo use this dataset as an example. Because Conll 2005 is not free in public, the default @@ -25,6 +20,12 @@ URL and MD5 to their Conll dataset. TODO(yuyang18): Complete comments. """ +import tarfile +import gzip +import itertools +from common import download + + __all__ = ['test, get_dict', 'get_embedding'] DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index e148ddeca0370cd76128a31ce3a4d488e9737d98..e304c986ba99cc6256e2ae967b2a508e673c29e1 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -14,6 +14,10 @@ """ Movielens 1-M dataset. +GroupLens Research collected and made available rating data sets from the +MovieLens web site (http://movielens.org). Movielens 1-M dataset contains 1 million +ratings from 6000 users on 4000 movies. + TODO(yuyang18): Complete comments. """