diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 95a0eeeef5d65565a3bf6e4737dbe5b15b5e8f9a..41fda1e8f24cdef13d8ab3645862814100a1cd4c 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -66,12 +66,12 @@ def reader_creator(filename, sub_name): def train100(): """ - CIFAR-100 train set creator. + CIFAR-100 training set creator. It returns a reader creator, each sample in the reader is image pixels in [0, 1] and label in [0, 99]. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator( @@ -93,12 +93,12 @@ def test100(): def train10(): """ - CIFAR-10 train set creator. + CIFAR-10 training set creator. It returns a reader creator, each sample in the reader is image pixels in [0, 1] and label in [0, 9]. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator( diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 2da12dc8fd51b9b497678abbf50b8d9628c3bcb0..12d648bf6557ed6e437320e56a80294abac29f18 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -199,12 +199,12 @@ def test(): """ Conll05 test set creator. - Because the train dataset is not free, the test dataset is used for + Because the training dataset is not free, the test dataset is used for training. It returns a reader creator, each sample in the reader is nine features, including sentence sequence, predicate, predicate context, predicate context flag and tagged sequence. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ word_dict, verb_dict, label_dict = get_dict() diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index cc07b53ef4b437a850dc5ed6d2a58edda59c7e34..5dc5abfe53d90ec3adc9a27a49ed086953146497 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -121,14 +121,14 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size): def train(word_idx): """ - IMDB train set creator. + IMDB training set creator. It returns a reader creator, each sample in the reader is an zero-based ID sequence and label in [0, 1]. :param word_idx: word dictionary :type word_idx: dict - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator( diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index ddf0dbce22158612d052b835ee018780165040a8..41ca27e23632bea7e410f9d91920bbc539d38279 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -15,8 +15,8 @@ imikolov's simple dataset. This module will download dataset from -http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse train/test set into paddle -reader creators. +http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set +into paddle reader creators. """ import paddle.v2.dataset.common import collections @@ -91,7 +91,7 @@ def reader_creator(filename, word_idx, n): def train(word_idx, n): """ - imikolov train set creator. + imikolov training set creator. It returns a reader creator, each sample in the reader is a word ID tuple. @@ -100,7 +100,7 @@ def train(word_idx, n): :type word_idx: dict :param n: sliding window size :type n: int - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n) @@ -117,7 +117,7 @@ def test(word_idx, n): :type word_idx: dict :param n: sliding window size :type n: int - :return: Train reader creator + :return: Test reader creator :rtype: callable """ return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 2d6b3e376bec1dd507b8819fecf058aaa07129e0..c1347d3c66da858104858bfb6739d84051322146 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -15,7 +15,7 @@ MNIST dataset. This module will download dataset from http://yann.lecun.com/exdb/mnist/ and -parse train/test set into paddle reader creators. +parse training set and test set into paddle reader creators. """ import paddle.v2.dataset.common import subprocess @@ -76,12 +76,12 @@ def reader_creator(image_filename, label_filename, buffer_size): def train(): """ - MNIST train set creator. + MNIST training set creator. It returns a reader creator, each sample in the reader is image pixels in [0, 1] and label in [0, 9]. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator( diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index d6b57ec58559e505d092edac540276ecbc989996..837a85912663826f0483aff4f6a38f3945375d82 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -17,8 +17,8 @@ Movielens 1-M dataset. Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000 movies, which was collected by GroupLens Research. This module will download Movielens 1-M dataset from -http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse train/test -set into paddle reader creators. +http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training +set and test set into paddle reader creators. """ diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 0eeb6d5affd8c280fb74edc82cf24bf418ca8ef9..4dd34e7383fe2a290fcf61474914183a383e2b9c 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -113,7 +113,7 @@ def reader_creator(data): def train(): """ - Default train set reader creator + Default training set reader creator """ data_set = load_sentiment_data() return reader_creator(data_set[0:NUM_TRAINING_INSTANCES]) diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 5da4df898473f90715a3d89d9b02bca46c675d51..3469fd9ce12dd4d934004f90286979b73048a5c8 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -16,7 +16,7 @@ UCI Housing dataset. This module will download dataset from https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and -parse train/test set into paddle reader creators. +parse training set and test set into paddle reader creators. """ import numpy as np @@ -73,12 +73,12 @@ def load_data(filename, feature_num=14, ratio=0.8): def train(): """ - UCI_HOUSING train set creator. + UCI_HOUSING training set creator. It returns a reader creator, each sample in the reader is features after normalization and price number. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ global UCI_TRAIN_DATA diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 690bb32aee26ccbd823d20af9ea3fca869cbff94..d8c68184874e3cded719ed3ed701d28bdc2f8cf8 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -16,7 +16,7 @@ WMT14 dataset. The original WMT14 dataset is too large and a small set of data for set is provided. This module will download dataset from http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and -parse train/test set into paddle reader creators. +parse training set and test set into paddle reader creators. """ import tarfile @@ -105,13 +105,13 @@ def reader_creator(tar_file, file_name, dict_size): def train(dict_size): """ - WMT14 train set creator. + WMT14 training set creator. It returns a reader creator, each sample in the reader is source language word ID sequence, target language word ID sequence and next word ID sequence. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator( @@ -126,7 +126,7 @@ def test(dict_size): word ID sequence, target language word ID sequence and next word ID sequence. - :return: Train reader creator + :return: Test reader creator :rtype: callable """ return reader_creator(