From 9f417f129d7385476cf23f1105060967d43a2c39 Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 27 Mar 2017 16:17:54 +0800 Subject: [PATCH] add some dateset docs --- doc/api/v2/data.rst | 6 ++-- doc/api/v2/run_logic.rst | 8 ++++- python/paddle/v2/dataset/cifar.py | 48 +++++++++++++++++++++++++-- python/paddle/v2/dataset/conll05.py | 11 +++--- python/paddle/v2/dataset/movielens.py | 4 +++ 5 files changed, 66 insertions(+), 11 deletions(-) diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst index b042320bc2..7fd71e743b 100644 --- a/doc/api/v2/data.rst +++ b/doc/api/v2/data.rst @@ -1,6 +1,6 @@ -======== -Datasets -======== +================================== +Data Reader Inferface and DataSets +================================== DataTypes diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst index c383e87c8c..9088e30b09 100644 --- a/doc/api/v2/run_logic.rst +++ b/doc/api/v2/run_logic.rst @@ -26,6 +26,12 @@ Event Inference ========= -.. autofunction:: paddle.v2.infer +.. automodule:: paddle.v2.inference :members: Inference :noindex: + +.. autofunction:: paddle.v2.infer + :members: + :noindex: + + diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 3a8b98b8f0..d8554d4d8e 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -12,9 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html +CIFAR dataset. + +This module will download dataset from https://www.cs.toronto.edu/~kriz/cifar.html and +parse train set and test set into paddle reader creators. + +The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 +images per class. There are 50000 training images and 10000 test images. + +The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes containing +600 images each. There are 500 training images and 100 testing images per class. -TODO(yuyang18): Complete the comments. """ import cPickle @@ -54,20 +62,56 @@ def reader_creator(filename, sub_name): def train100(): + """ + CIFAR-100 train set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 99]. + + :return: Train reader creator + :rtype: callable + """ return reader_creator( download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') def test100(): + """ + CIFAR-100 test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') def train10(): + """ + CIFAR-10 train set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Train reader creator + :rtype: callable + """ return reader_creator( download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') def test10(): + """ + CIFAR-10 test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator( download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index f1b0ce16f2..854b20f0c3 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -11,11 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -import tarfile -import gzip -import itertools -from common import download """ Conll 2005 dataset. Paddle semantic role labeling Book and demo use this dataset as an example. Because Conll 2005 is not free in public, the default @@ -25,6 +20,12 @@ URL and MD5 to their Conll dataset. TODO(yuyang18): Complete comments. """ +import tarfile +import gzip +import itertools +from common import download + + __all__ = ['test, get_dict', 'get_embedding'] DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index e148ddeca0..e304c986ba 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -14,6 +14,10 @@ """ Movielens 1-M dataset. +GroupLens Research collected and made available rating data sets from the +MovieLens web site (http://movielens.org). Movielens 1-M dataset contains 1 million +ratings from 6000 users on 4000 movies. + TODO(yuyang18): Complete comments. """ -- GitLab