diff --git a/doc/api/v2/config/optimizer.rst b/doc/api/v2/config/optimizer.rst index ec6ba0aa46239f3806ca950e8863b953d0c4150b..b32373fdef52a7aa9d64b12cda3f76cb2abf351b 100644 --- a/doc/api/v2/config/optimizer.rst +++ b/doc/api/v2/config/optimizer.rst @@ -1,5 +1,3 @@ -.. _api_v2.optimizer: - ========== Optimizer ========== diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst index b042320bc2922a1ddfa06b5d8479ac9134ae9d89..fef87c4fbdb452771ecdb361c6eeae5b32bcee14 100644 --- a/doc/api/v2/data.rst +++ b/doc/api/v2/data.rst @@ -1,6 +1,6 @@ -======== -Datasets -======== +================================== +Data Reader Interface and DataSets +================================== DataTypes @@ -49,7 +49,6 @@ mnist :members: :noindex: - cifar +++++ @@ -61,7 +60,7 @@ conll05 +++++++ .. automodule:: paddle.v2.dataset.conll05 - :members: + :members: get_dict,get_embedding,test :noindex: imdb @@ -85,6 +84,12 @@ movielens :members: :noindex: +.. autoclass:: paddle.v2.dataset.movielens.MovieInfo + :noindex: + +.. autoclass:: paddle.v2.dataset.movielens.UserInfo + :noindex: + sentiment +++++++++ @@ -102,7 +107,7 @@ uci_housing wmt14 +++++ -.. automodule:: paddle.v2.dataset.uci_housing +.. automodule:: paddle.v2.dataset.wmt14 :members: :noindex: diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst index 94921e1a7b9c0a95931136bfb65d2560dba8b8ee..5c97651f6536d89d2b5926d4b2907a547aa86b55 100644 --- a/doc/api/v2/run_logic.rst +++ b/doc/api/v2/run_logic.rst @@ -6,18 +6,21 @@ Parameters ========== .. automodule:: paddle.v2.parameters + :members: Parameters :noindex: Trainer ======= .. automodule:: paddle.v2.trainer + :members: SGD :noindex: Event ===== .. automodule:: paddle.v2.event + :members: :noindex: Inference @@ -25,3 +28,4 @@ Inference .. autofunction:: paddle.v2.infer :noindex: + \ No newline at end of file diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py index ca3e44e5a0187da33654f4955197196b150da196..2698251b9e15046eb14f71c3f5b0546ecbb4a5dd 100644 --- a/python/paddle/v2/data_feeder.py +++ b/python/paddle/v2/data_feeder.py @@ -67,7 +67,7 @@ class DataFeeder(DataProviderConverter): # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ], # first sample # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample # ] - arg = feeder(minibatch_data) + arg = feeder.convert(minibatch_data) .. note:: diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 3a8b98b8f045b0eb58be69649486cbd0a571f118..41fda1e8f24cdef13d8ab3645862814100a1cd4c 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -12,9 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html +CIFAR dataset. + +This module will download dataset from +https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into +paddle reader creators. + +The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, +with 6000 images per class. There are 50000 training images and 10000 test +images. + +The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes +containing 600 images each. There are 500 training images and 100 testing +images per class. -TODO(yuyang18): Complete the comments. """ import cPickle @@ -54,20 +65,56 @@ def reader_creator(filename, sub_name): def train100(): + """ + CIFAR-100 training set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 99]. + + :return: Training reader creator + :rtype: callable + """ return reader_creator( download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') def test100(): + """ + CIFAR-100 test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') def train10(): + """ + CIFAR-10 training set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Training reader creator + :rtype: callable + """ return reader_creator( download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') def test10(): + """ + CIFAR-10 test set cretor. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Test reader creator. + :rtype: callable + """ return reader_creator( download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index f1b0ce16f21ad13d4564242c2359355236093032..12d648bf6557ed6e437320e56a80294abac29f18 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -11,19 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +Conll05 dataset. +Paddle semantic role labeling Book and demo use this dataset as an example. +Because Conll05 is not free in public, the default downloaded URL is test set +of Conll05 (which is public). Users can change URL and MD5 to their Conll +dataset. And a pre-trained word vector model based on Wikipedia corpus is used +to initialize SRL model. +""" import tarfile import gzip import itertools from common import download -""" -Conll 2005 dataset. Paddle semantic role labeling Book and demo use this -dataset as an example. Because Conll 2005 is not free in public, the default -downloaded URL is test set of Conll 2005 (which is public). Users can change -URL and MD5 to their Conll dataset. - -TODO(yuyang18): Complete comments. -""" __all__ = ['test, get_dict', 'get_embedding'] @@ -179,6 +179,9 @@ def reader_creator(corpus_reader, def get_dict(): + """ + Get the word, verb and label dictionary of Wikipedia corpus. + """ word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) @@ -186,10 +189,24 @@ def get_dict(): def get_embedding(): + """ + Get the trained word vector based on Wikipedia corpus. + """ return download(EMB_URL, 'conll05st', EMB_MD5) def test(): + """ + Conll05 test set creator. + + Because the training dataset is not free, the test dataset is used for + training. It returns a reader creator, each sample in the reader is nine + features, including sentence sequence, predicate, predicate context, + predicate context flag and tagged sequence. + + :return: Training reader creator + :rtype: callable + """ word_dict, verb_dict, label_dict = get_dict() reader = corpus_reader( download(DATA_URL, 'conll05st', DATA_MD5), diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 9a7ccff4d5cd2563053adb0aae95fc6d10ad2a50..5dc5abfe53d90ec3adc9a27a49ed086953146497 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -12,9 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz +IMDB dataset. -TODO(yuyang18): Complete comments. +This module downloads IMDB dataset from +http://ai.stanford.edu/%7Eamaas/data/sentiment/. This dataset contains a set +of 25,000 highly polar movie reviews for training, and 25,000 for testing. +Besides, this module also provides API for building dictionary. """ import paddle.v2.dataset.common @@ -31,8 +34,11 @@ URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' -# Read files that match pattern. Tokenize and yield each file. def tokenize(pattern): + """ + Read files that match the given pattern. Tokenize and yield each file. + """ + with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb', MD5)) as tarf: # Note that we should use tarfile.next(), which does @@ -49,6 +55,10 @@ def tokenize(pattern): def build_dict(pattern, cutoff): + """ + Build a word dictionary from the corpus. Keys of the dictionary are words, + and values are zero-based IDs of these words. + """ word_freq = collections.defaultdict(int) for doc in tokenize(pattern): for word in doc: @@ -110,18 +120,46 @@ def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size): def train(word_idx): + """ + IMDB training set creator. + + It returns a reader creator, each sample in the reader is an zero-based ID + sequence and label in [0, 1]. + + :param word_idx: word dictionary + :type word_idx: dict + :return: Training reader creator + :rtype: callable + """ return reader_creator( re.compile("aclImdb/train/pos/.*\.txt$"), re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000) def test(word_idx): + """ + IMDB test set creator. + + It returns a reader creator, each sample in the reader is an zero-based ID + sequence and label in [0, 1]. + + :param word_idx: word dictionary + :type word_idx: dict + :return: Test reader creator + :rtype: callable + """ return reader_creator( re.compile("aclImdb/test/pos/.*\.txt$"), re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000) def word_dict(): + """ + Build a word dictionary from the corpus. + + :return: Word dictionary + :rtype: dict + """ return build_dict( re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150) diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index 5d7e0282b4db639e6590ade66241328d6ab8b5e3..41ca27e23632bea7e410f9d91920bbc539d38279 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ +imikolov's simple dataset. -Complete comments. +This module will download dataset from +http://www.fit.vutbr.cz/~imikolov/rnnlm/ and parse training set and test set +into paddle reader creators. """ import paddle.v2.dataset.common import collections @@ -40,6 +42,10 @@ def word_count(f, word_freq=None): def build_dict(): + """ + Build a word dictionary from the corpus, Keys of the dictionary are words, + and values are zero-based IDs of these words. + """ train_filename = './simple-examples/data/ptb.train.txt' test_filename = './simple-examples/data/ptb.valid.txt' with tarfile.open( @@ -84,10 +90,36 @@ def reader_creator(filename, word_idx, n): def train(word_idx, n): + """ + imikolov training set creator. + + It returns a reader creator, each sample in the reader is a word ID + tuple. + + :param word_idx: word dictionary + :type word_idx: dict + :param n: sliding window size + :type n: int + :return: Training reader creator + :rtype: callable + """ return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n) def test(word_idx, n): + """ + imikolov test set creator. + + It returns a reader creator, each sample in the reader is a word ID + tuple. + + :param word_idx: word dictionary + :type word_idx: dict + :param n: sliding window size + :type n: int + :return: Test reader creator + :rtype: callable + """ return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n) diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 48a39b5493a8004d6eb034498a797af9c662bd19..c1347d3c66da858104858bfb6739d84051322146 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -15,7 +15,7 @@ MNIST dataset. This module will download dataset from http://yann.lecun.com/exdb/mnist/ and -parse train set and test set into paddle reader creators. +parse training set and test set into paddle reader creators. """ import paddle.v2.dataset.common import subprocess @@ -76,12 +76,12 @@ def reader_creator(image_filename, label_filename, buffer_size): def train(): """ - MNIST train set creator. + MNIST training set creator. It returns a reader creator, each sample in the reader is image pixels in [0, 1] and label in [0, 9]. - :return: Train reader creator + :return: Training reader creator :rtype: callable """ return reader_creator( diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index e148ddeca0370cd76128a31ce3a4d488e9737d98..837a85912663826f0483aff4f6a38f3945375d82 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -14,7 +14,12 @@ """ Movielens 1-M dataset. -TODO(yuyang18): Complete comments. +Movielens 1-M dataset contains 1 million ratings from 6000 users on 4000 +movies, which was collected by GroupLens Research. This module will download +Movielens 1-M dataset from +http://files.grouplens.org/datasets/movielens/ml-1m.zip and parse training +set and test set into paddle reader creators. + """ import zipfile @@ -35,12 +40,19 @@ MD5 = 'c4d9eecfca2ab87c1945afe126590906' class MovieInfo(object): + """ + Movie id, title and categories information are stored in MovieInfo. + """ + def __init__(self, index, categories, title): self.index = int(index) self.categories = categories self.title = title def value(self): + """ + Get information from a movie. + """ return [ self.index, [CATEGORIES_DICT[c] for c in self.categories], [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()] @@ -55,6 +67,10 @@ class MovieInfo(object): class UserInfo(object): + """ + User id, gender, age, and job information are stored in UserInfo. + """ + def __init__(self, index, gender, age, job_id): self.index = int(index) self.is_male = gender == 'M' @@ -62,6 +78,9 @@ class UserInfo(object): self.job_id = int(job_id) def value(self): + """ + Get information from a user. + """ return [self.index, 0 if self.is_male else 1, self.age, self.job_id] def __str__(self): @@ -148,6 +167,9 @@ test = functools.partial(__reader_creator__, is_test=True) def get_movie_title_dict(): + """ + Get movie title dictionary. + """ __initialize_meta_info__() return MOVIE_TITLE_DICT @@ -160,11 +182,17 @@ def __max_index_info__(a, b): def max_movie_id(): + """ + Get the maximum value of movie id. + """ __initialize_meta_info__() return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index def max_user_id(): + """ + Get the maximum value of user id. + """ __initialize_meta_info__() return reduce(__max_index_info__, USER_INFO.viewvalues()).index @@ -177,21 +205,33 @@ def __max_job_id_impl__(a, b): def max_job_id(): + """ + Get the maximum value of job id. + """ __initialize_meta_info__() return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id def movie_categories(): + """ + Get movie categoriges dictionary. + """ __initialize_meta_info__() return CATEGORIES_DICT def user_info(): + """ + Get user info dictionary. + """ __initialize_meta_info__() return USER_INFO def movie_info(): + """ + Get movie info dictionary. + """ __initialize_meta_info__() return MOVIE_INFO diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 0eeb6d5affd8c280fb74edc82cf24bf418ca8ef9..4dd34e7383fe2a290fcf61474914183a383e2b9c 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -113,7 +113,7 @@ def reader_creator(data): def train(): """ - Default train set reader creator + Default training set reader creator """ data_set = load_sentiment_data() return reader_creator(data_set[0:NUM_TRAINING_INSTANCES]) diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index dab8620441c966b19d8218025f8d8fa5b40d1c2c..3469fd9ce12dd4d934004f90286979b73048a5c8 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -14,7 +14,9 @@ """ UCI Housing dataset. -TODO(yuyang18): Complete comments. +This module will download dataset from +https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and +parse training set and test set into paddle reader creators. """ import numpy as np @@ -70,6 +72,15 @@ def load_data(filename, feature_num=14, ratio=0.8): def train(): + """ + UCI_HOUSING training set creator. + + It returns a reader creator, each sample in the reader is features after + normalization and price number. + + :return: Training reader creator + :rtype: callable + """ global UCI_TRAIN_DATA load_data(download(URL, 'uci_housing', MD5)) @@ -81,6 +92,15 @@ def train(): def test(): + """ + UCI_HOUSING test set creator. + + It returns a reader creator, each sample in the reader is features after + normalization and price number. + + :return: Test reader creator + :rtype: callable + """ global UCI_TEST_DATA load_data(download(URL, 'uci_housing', MD5)) diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 23ca8036281b16447403c1bfcec5e11f839ab94e..0902f87741c342b237439081703081b467dc6f35 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -wmt14 dataset +WMT14 dataset. +The original WMT14 dataset is too large and a small set of data for set is +provided. This module will download dataset from +http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz and +parse training set and test set into paddle reader creators. + """ import tarfile import gzip @@ -99,11 +104,31 @@ def reader_creator(tar_file, file_name, dict_size): def train(dict_size): + """ + WMT14 training set creator. + + It returns a reader creator, each sample in the reader is source language + word ID sequence, target language word ID sequence and next word ID + sequence. + + :return: Training reader creator + :rtype: callable + """ return reader_creator( download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) def test(dict_size): + """ + WMT14 test set creator. + + It returns a reader creator, each sample in the reader is source language + word ID sequence, target language word ID sequence and next word ID + sequence. + + :return: Test reader creator + :rtype: callable + """ return reader_creator( download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index 1ad52b8baa411269d29732685871a875df5185cc..fd6050fa339d280ad54e40128ea6bae25132c873 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -1,14 +1,13 @@ """ -All training events. +Testing and training events. There are: +* TestResult * BeginIteration * EndIteration * BeginPass * EndPass - -TODO(yuyang18): Complete it! """ import py_paddle.swig_paddle as api diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 3210bf0642cda53cded52d7c38a8770e36290c19..c178336303f53769863063922868cd2a22e4b957 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -9,6 +9,17 @@ __all__ = ['infer'] class Inference(object): + """ + Inference combines neural network output and parameters together + to do inference. + + :param outptut_layer: The neural network that should be inferenced. + :type output_layer: paddle.v2.config_base.Layer or the sequence + of paddle.v2.config_base.Layer + :param parameters: The parameters dictionary. + :type parameters: paddle.v2.parameters.Parameters + """ + def __init__(self, output_layer, parameters): topo = topology.Topology(output_layer) gm = api.GradientMachine.createFromConfigProto( diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 1a01d95c205c0626374e1814a170ce2d58f23a60..feefd7d758ba09f5d8f818ca1b12b00c5f0e9797 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -47,6 +47,35 @@ class Optimizer(object): class Momentum(Optimizer): + """ + SGD Optimizer. + + SGD is an optimization method, trying to find a neural network that + minimize the "cost/error" of it by iteration. In paddle's implementation + SGD Optimizer is synchronized, which means all gradients will be wait to + calculate and reduced into one gradient, then do optimize operation. + + The neural network consider the learning problem of minimizing an objective + function, that has the form of a sum + + .. math:: + + Q(w) = \\sum_{i}^{n} Q_i(w) + + The value of function Q sometimes is the cost of neural network (Mean + Square Error between prediction and label for example). The function Q is + parametrised by w, the weight/bias of neural network. And weights is what to + be learned. The i is the i-th observation in (trainning) data. + + So, the SGD method will optimize the weight by + + .. math:: + + w = w - \\eta \\nabla Q(w) = w - \\eta \\sum_{i}^{n} \\nabla Q_i(w) + + where :math:`\\eta` is learning rate. And :math:`n` is batch size. + """ + def __init__(self, momentum=None, sparse=False, **kwargs): learning_method = v1_optimizers.MomentumOptimizer( momentum=momentum, sparse=sparse) @@ -55,6 +84,26 @@ class Momentum(Optimizer): class Adam(Optimizer): + """ + Adam optimizer. + The details of please refer `Adam: A Method for Stochastic Optimization + `_ + + .. math:: + + m(w, t) & = \\beta_1 m(w, t-1) + (1 - \\beta_1) \\nabla Q_i(w) \\\\ + v(w, t) & = \\beta_2 v(w, t-1) + (1 - \\beta_2)(\\nabla Q_i(w)) ^2 \\\\ + w & = w - \\frac{\\eta}{\\sqrt{v(w,t) + \\epsilon}} + + :param beta1: the :math:`\\beta_1` in equation. + :type beta1: float + :param beta2: the :math:`\\beta_2` in equation. + :type beta2: float + :param epsilon: the :math:`\\epsilon` in equation. It is used to prevent + divided by zero. + :type epsilon: float + """ + def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs): learning_method = v1_optimizers.AdamOptimizer( beta1=beta1, beta2=beta2, epsilon=epsilon) @@ -62,6 +111,24 @@ class Adam(Optimizer): class Adamax(Optimizer): + """ + Adamax optimizer. + + The details of please refer this `Adam: A Method for Stochastic Optimization + `_ + + .. math:: + + m_t & = \\beta_1 * m_{t-1} + (1-\\beta_1)* \\nabla Q_i(w) \\\\ + u_t & = max(\\beta_2*u_{t-1}, abs(\\nabla Q_i(w))) \\\\ + w_t & = w_{t-1} - (\\eta/(1-\\beta_1^t))*m_t/u_t + + :param beta1: the :math:`\\beta_1` in the equation. + :type beta1: float + :param beta2: the :math:`\\beta_2` in the equation. + :type beta2: float + """ + def __init__(self, beta1=0.9, beta2=0.999, **kwargs): learning_method = v1_optimizers.AdamaxOptimizer( beta1=beta1, beta2=beta2) @@ -69,12 +136,40 @@ class Adamax(Optimizer): class AdaGrad(Optimizer): + """ + Adagrad(for ADAptive GRAdient algorithm) optimizer. + + For details please refer this `Adaptive Subgradient Methods for + Online Learning and Stochastic Optimization + `_. + + .. math:: + + G &= \\sum_{\\tau=1}^{t} g_{\\tau} g_{\\tau}^T \\\\ + w & = w - \\eta diag(G)^{-\\frac{1}{2}} \\circ g + """ + def __init__(self, **kwargs): learning_method = v1_optimizers.AdaGradOptimizer() super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs) class DecayedAdaGrad(Optimizer): + """ + AdaGrad method with decayed sum gradients. The equations of this method + show as follow. + + .. math:: + + E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ + learning\\_rate &= 1/sqrt( ( E(g_t^2) + \\epsilon ) + + :param rho: The :math:`\\rho` parameter in that equation + :type rho: float + :param epsilon: The :math:`\\epsilon` parameter in that equation. + :type epsilon: float + """ + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): learning_method = v1_optimizers.DecayedAdaGradOptimizer( rho=rho, epsilon=epsilon) @@ -83,6 +178,24 @@ class DecayedAdaGrad(Optimizer): class AdaDelta(Optimizer): + """ + AdaDelta method. The details of adadelta please refer to this + `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD + `_. + + .. math:: + + E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ + learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\ + E(g_t^2) + \\epsilon ) ) \\\\ + E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 + + :param rho: :math:`\\rho` in equation + :type rho: float + :param epsilon: :math:`\\rho` in equation + :type epsilon: float + """ + def __init__(self, rho=0.95, epsilon=1e-06, **kwargs): learning_method = v1_optimizers.AdaDeltaOptimizer( rho=rho, epsilon=epsilon) @@ -91,6 +204,24 @@ class AdaDelta(Optimizer): class RMSProp(Optimizer): + """ + RMSProp(for Root Mean Square Propagation) optimizer. For details please + refer this `slide `_. + + The equations of this method as follows: + + .. math:: + + v(w, t) & = \\rho v(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ + w & = w - \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w) + + :param rho: the :math:`\\rho` in the equation. The forgetting factor. + :type rho: float + :param epsilon: the :math:`\\epsilon` in the equation. + :type epsilon: float + """ + def __init__(self, rho=0.95, epsilon=1e-6, **kwargs): learning_method = v1_optimizers.RMSPropOptimizer( rho=rho, epsilon=epsilon) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index f5797a86c2b71502a7791453ff86c6a486c9f185..68b4967cc031dfa2dd164d822aff97585f923e48 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -1,3 +1,6 @@ +""" +Module Trainer +""" import collections import py_paddle.swig_paddle as api @@ -9,10 +12,6 @@ from . import optimizer as v2_optimizer from . import parameters as v2_parameters __all__ = ['SGD'] -""" -Trainer package -TODO(yuyang18): Complete comments. -""" def default_event_handler(event): @@ -29,7 +28,8 @@ def default_event_handler(event): class SGD(object): """ Simple SGD Trainer. - TODO(yuyang18): Complete comments + SGD Trainer combines data reader, network topolopy and update_equation together + to train/test a neural network. :param update_equation: The optimizer object. :type update_equation: paddle.v2.optimizer.Optimizer @@ -74,7 +74,9 @@ class SGD(object): """ Training method. Will train num_passes of input data. - :param reader: + :param reader: A reader that reads and yeilds data items. Usually we use a + batched reader to do mini-batch training. + :type reader: collections.Iterable :param num_passes: The total train passes. :param event_handler: Event handler. A method will be invoked when event occurred. @@ -132,6 +134,16 @@ class SGD(object): self.__gradient_machine__.finish() def test(self, reader, feeding=None): + """ + Testing method. Will test input data. + + :param reader: A reader that reads and yeilds data items. + :type reader: collections.Iterable + :param feeding: Feeding is a map of neural network input name and array + index that reader returns. + :type feeding: dict + :return: + """ feeder = DataFeeder(self.__data_types__, feeding) evaluator = self.__gradient_machine__.makeEvaluator() out_args = api.Arguments.createArguments(0)