diff --git a/doc/demo/embedding_model/index.md b/doc/demo/embedding_model/index.md index 45992ad856e65d317ca4e81ac78bb5b903175393..06f3ff1f009e470cdb9687658613a76acbb79751 100644 --- a/doc/demo/embedding_model/index.md +++ b/doc/demo/embedding_model/index.md @@ -93,7 +93,7 @@ where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the - `--init_model_path`: path of the initialization model, here is `data/paraphrase_model` - `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer -For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](text_generation.md). +For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/text_generation.md). ## Optional Function ## ### Embedding Parameters Observation diff --git a/doc/demo/rec/ml_regression.rst b/doc/demo/rec/ml_regression.rst index 472f585e6889e85d60d1fe8a64ee0adca1dfcbc8..4917f873a934dc93e8618627473dbe99644b10b5 100644 --- a/doc/demo/rec/ml_regression.rst +++ b/doc/demo/rec/ml_regression.rst @@ -291,7 +291,7 @@ It just start a paddle training process, write the log to `log.txt`, then print it on screen. Each command line argument in :code:`run.sh`, please refer to the `command line -arguments `_ page. The short description of these arguments is shown as follow. +arguments <../../ui/index.html#command-line-argument>`_ page. The short description of these arguments is shown as follow. * config\: Tell paddle which file is neural network configuration. * save_dir\: Tell paddle save model into './output' @@ -303,8 +303,6 @@ arguments `_ page. The short description of these arguments is shown as fol * dot_period\: Print a :code:`.` after train :code:`dot_period` batches. * num_passes\: Train at most :code:`num_passes`. - - If training process starts successfully, the output likes follow: .. code-block:: text diff --git a/doc_cn/demo/index.rst b/doc_cn/demo/index.rst index 4c948dadae2d8db7d97877b34f851935de21eb7a..df4c0d4ba347480b01d20ae270a9c7010228524d 100644 --- a/doc_cn/demo/index.rst +++ b/doc_cn/demo/index.rst @@ -4,23 +4,23 @@ 图像 '''' -* `图像分类 `_ +* `图像分类 <../..doc/demo/image_classification/index.html>`_ 自然语言处理 '''''''''''' -* `情感分析 `_ -* `文本生成 `_ -* `词性标注 `_ +* `情感分析 <../../doc/demo/sentiment_analysis/index.html>`_ +* `文本生成 <../../doc/demo/text_generation/index.html>`_ +* `词性标注 <../../doc/demo/semantic_role_labeling/index.html>`_ 推荐 '''' -* `MovieLens数据集 `_ -* `MovieLens评分回归 `_ +* `MovieLens数据集 <../../doc/demo/rec/ml_dataset.html>`_ +* `MovieLens评分回归 <../../doc/demo/rec/ml_regression.html>`_ 常用模型 '''''''' -* `ImageNet: ResNet `_ -* `Embedding: Chinese Word `_ +* `ImageNet: ResNet <../../doc/demo/imagenet_model/resnet_model.html>`_ +* `Embedding: Chinese Word <../../doc/demo/embedding_model/index.html>`_ diff --git a/python/paddle/trainer_config_helpers/data_sources.py b/python/paddle/trainer_config_helpers/data_sources.py index 8f3dcb96a931da21958733b7e790338aa2ca0577..8ada3903dc06befb82e47ba36a34e13865f6484b 100644 --- a/python/paddle/trainer_config_helpers/data_sources.py +++ b/python/paddle/trainer_config_helpers/data_sources.py @@ -14,10 +14,6 @@ """ Data Sources are helpers to define paddle training data or testing data. -There are several data attributes will be used by paddle: - -- Data ProviderType\: such as Python, Protobuf -- Data File list\: a single file that contains all data file paths """ from paddle.trainer.config_parser import * from .utils import deprecated @@ -27,8 +23,7 @@ try: except ImportError: import pickle -__all__ = ['define_py_data_sources', - 'define_py_data_sources2'] +__all__ = ['define_py_data_sources2'] def define_py_data_source(file_list, cls, module, @@ -50,11 +45,8 @@ def define_py_data_source(file_list, cls, module, define_py_data_source("train.list", TrainData, "data_provider", "process", args={"dictionary": dict_name}) - The related data provider can refer to - `here `__. - :param data_cls: - :param file_list: file list name. + :param file_list: file list name, which contains all data file paths :type file_list: basestring :param cls: Train or Test Class. :type cls: TrainData or TestData @@ -105,27 +97,10 @@ def define_py_data_source(file_list, cls, module, def define_py_data_sources(train_list, test_list, module, obj, args=None, train_async=False, data_cls=PyData): """ - Define python Train/Test data sources in one method. If train/test use - the same Data Provider configuration, module/obj/args contain one argument, - otherwise contain a list or tuple of arguments. For example\: - - .. code-block:: python - - define_py_data_sources("train.list", "test.list", module="data_provider" - obj="process", args={"dictionary": dict_name}) - - Or. - - .. code-block:: python + The annotation is almost the same as define_py_data_sources2, except that + it can specific train_async and data_cls. - define_py_data_sources("train.list", "test.list", module="data_provider" - obj=["process_train", "process_test"], - args=[{"dictionary": dict_train}, {"dictionary": dict_test}]) - - The related data provider can refer to - `here `__. - - :param data_cls: + :param data_cls: :param train_list: Train list name. :type train_list: basestring :param test_list: Test list name. @@ -183,6 +158,43 @@ def define_py_data_sources(train_list, test_list, module, obj, args=None, def define_py_data_sources2(train_list, test_list, module, obj, args=None): + """ + Define python Train/Test data sources in one method. If train/test use + the same Data Provider configuration, module/obj/args contain one argument, + otherwise contain a list or tuple of arguments. For example\: + + .. code-block:: python + + define_py_data_sources2(train_list="train.list", + test_list="test.list", + module="data_provider" + # if train/test use different configurations, + # obj=["process_train", "process_test"] + obj="process", + args={"dictionary": dict_name}) + + The related data provider can refer to + `here <../../data_provider/pydataprovider2.html#dataprovider-for-the-sequential-model>`__. + + :param train_list: Train list name. + :type train_list: basestring + :param test_list: Test list name. + :type test_list: basestring + :param module: python module name. If train and test is different, then + pass a tuple or list to this argument. + :type module: basestring or tuple or list + :param obj: python object name. May be a function name if using + PyDataProviderWrapper. If train and test is different, then pass + a tuple or list to this argument. + :type obj: basestring or tuple or list + :param args: The best practice is using dict() to pass arguments into + DataProvider, and use :code:`@init_hook_wrapper` to receive + arguments. If train and test is different, then pass a tuple + or list to this argument. + :type args: string or picklable object or list or tuple. + :return: None + :rtype: None + """ define_py_data_sources(train_list=train_list, test_list=test_list, module=module,