add some dateset docs

9f417f12 · qijun · 0690a9fb · 9f417f12 · 9f417f12 · 9f417f12
5 changed file
--- a/doc/api/v2/data.rst
+++ b/doc/api/v2/data.rst
-========
-Datasets
-========
+==================================
+Data Reader Inferface and DataSets 
+==================================


 DataTypes

--- a/doc/api/v2/run_logic.rst
+++ b/doc/api/v2/run_logic.rst
@@ -26,6 +26,12 @@ Event
 Inference
 =========

-..  autofunction:: paddle.v2.infer
+..  automodule:: paddle.v2.inference
    :members: Inference
    :noindex:
+
+..  autofunction:: paddle.v2.infer
+    :members:
+    :noindex:
+
+
--- a/python/paddle/v2/dataset/cifar.py
+++ b/python/paddle/v2/dataset/cifar.py
@@ -12,9 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+CIFAR dataset.
+
+This module will download dataset from https://www.cs.toronto.edu/~kriz/cifar.html and
+parse train set and test set into paddle reader creators.
+
+The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 
+images per class. There are 50000 training images and 10000 test images.
+
+The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes containing 
+600 images each. There are 500 training images and 100 testing images per class. 

-TODO(yuyang18): Complete the comments.
 """

 import cPickle
@@ -54,20 +62,56 @@ def reader_creator(filename, sub_name):


 def train100():
+    """
+    CIFAR-100 train set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 99].
+
+    :return: Train reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train')


 def test100():
+    """
+    CIFAR-100 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
    return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test')


 def train10():
+    """
+    CIFAR-10 train set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Train reader creator
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch')


 def test10():
+    """
+    CIFAR-10 test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
    return reader_creator(
        download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch')


--- a/python/paddle/v2/dataset/conll05.py
+++ b/python/paddle/v2/dataset/conll05.py
@@ -11,11 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-import tarfile
-import gzip
-import itertools
-from common import download
 """
 Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
 dataset as an example. Because Conll 2005 is not free in public, the default
@@ -25,6 +20,12 @@ URL and MD5 to their Conll dataset.
 TODO(yuyang18): Complete comments.
 """

+import tarfile
+import gzip
+import itertools
+from common import download
+
+
 __all__ = ['test, get_dict', 'get_embedding']

 DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'

--- a/python/paddle/v2/dataset/movielens.py
+++ b/python/paddle/v2/dataset/movielens.py
@@ -14,6 +14,10 @@
 """
 Movielens 1-M dataset.

+GroupLens Research collected and made available rating data sets from the 
+MovieLens web site (http://movielens.org). Movielens 1-M dataset contains 1 million 
+ratings from 6000 users on 4000 movies. 
+
 TODO(yuyang18): Complete comments.
 """