diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py index ca7346b3db97e8f0907ab233982a5eb422d9514b..2ef25c42c1794c410fe85fd497a6ed9d2295dca9 100644 --- a/demo/semantic_role_labeling/dataprovider.py +++ b/demo/semantic_role_labeling/dataprovider.py @@ -22,13 +22,13 @@ def hook(settings, word_dict, label_dict, **kwargs): settings.label_dict = label_dict #all inputs are integral and sequential type settings.slots = [ - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(2, seq_type=SequenceType.SEQUENCE), - integer_value(len(label_dict), seq_type=SequenceType.SEQUENCE)] + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(2), + integer_value_sequence(len(label_dict))] @provider(init_hook=hook) diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py index c325d33485c872fa7c36c774869e9062ca461139..9a9fd81f030cb1d2a10a5000fd1d12810d12112b 100755 --- a/demo/sentiment/dataprovider.py +++ b/demo/sentiment/dataprovider.py @@ -17,7 +17,7 @@ from paddle.trainer.PyDataProvider2 import * def hook(settings, dictionary, **kwargs): settings.word_dict = dictionary settings.input_types = [ - integer_value(len(settings.word_dict), seq_type=SequenceType.SEQUENCE), + integer_value_sequence(len(settings.word_dict)), integer_value(2)] settings.logger.info('dict len : %d' % (len(settings.word_dict))) diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py index a646667977d3ebf762b8b00a2c143eafa89c2a03..df19db109ed223c7515c3ebf2cb1918f41163930 100755 --- a/demo/seqToseq/dataprovider.py +++ b/demo/seqToseq/dataprovider.py @@ -30,22 +30,15 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs): if settings.job_mode: settings.trg_dict = trg_dict settings.slots = [ - integer_value( - len(settings.src_dict), - seq_type=SequenceType.SEQUENCE), integer_value( - len(settings.trg_dict), - seq_type=SequenceType.SEQUENCE), integer_value( - len(settings.trg_dict), - seq_type=SequenceType.SEQUENCE) + integer_value_sequence(len(settings.src_dict)), + integer_value_sequence(len(settings.trg_dict)), + integer_value_sequence(len(settings.trg_dict)) ] settings.logger.info("trg dict len : %d" % (len(settings.trg_dict))) else: settings.slots = [ - integer_value( - len(settings.src_dict), - seq_type=SequenceType.SEQUENCE), integer_value( - len(open(file_list[0], "r").readlines()), - seq_type=SequenceType.SEQUENCE) + integer_value_sequence(len(settings.src_dict)), + integer_value_sequence(len(open(file_list[0], "r").readlines())) ] diff --git a/doc/demo/quick_start/NetRNN_en.png b/doc/demo/quick_start/NetRNN_en.png index 8c11b8ae0c3d32c48fcca6de47ea12ffbbaa106a..3ebdf820ef702858fd1e0ad09ad55c9f2c31635b 100644 Binary files a/doc/demo/quick_start/NetRNN_en.png and b/doc/demo/quick_start/NetRNN_en.png differ diff --git a/doc/demo/quick_start/index_en.md b/doc/demo/quick_start/index_en.md index 3a2c39d11bf1434f8ce77a84be255a4bcc81dd59..e0f819283f4d2bee816dc6711b059e51e3234f05 100644 --- a/doc/demo/quick_start/index_en.md +++ b/doc/demo/quick_start/index_en.md @@ -225,7 +225,7 @@ Performance summary: You can refer to the training and testing scripts later. In
### Word Embedding Model -In order to use the word embedding model, you need to change the data provider a little bit to make the input words as a sequence of word IDs. The revised data provider is listed below. You only need to change initializer() for the type of the first input. It is changed from sparse_binary_vector to sequence of intergers. process() remains the same. This data provider can also be used for later sequence models. +In order to use the word embedding model, you need to change the data provider a little bit to make the input words as a sequence of word IDs. The revised data provider `dataprovider_emb.py` is listed below. You only need to change initializer() for the type of the first input. It is changed from sparse_binary_vector to sequence of intergers. process() remains the same. This data provider can also be used for later sequence models. ```python def initializer(settings, dictionary, **kwargs): @@ -260,7 +260,7 @@ avg = pooling_layer(input=emb, pooling_type=AvgPooling()) The other parts of the model are the same as logistic regression network. -The performance is summarized in the following table:: +The performance is summarized in the following table:
@@ -400,7 +400,7 @@ If you want to install the remote training platform, which enables distributed t You can use the trained model to perform prediction on the dataset with no labels. You can also evaluate the model on dataset with labels to obtain its test accuracy.
![](./PipelineTest_en.png)
-The test script (test.sh) is listed below. PaddlePaddle can evaluate a model on the data with labels specified in `test.list`. +The test script is listed below. PaddlePaddle can evaluate a model on the data with labels specified in `test.list`. ```bash paddle train \ @@ -497,11 +497,12 @@ The scripts of data downloading, network configurations, and training scrips are ## Appendix ### Command Line Argument -* --config:network architecture path. -* --save_dir:model save directory. -* --log_period:the logging period per batch. -* --num_passes:number of training passes. One pass means the training would go over the whole training dataset once.* --config_args:Other configuration arguments. -* --init_model_path:The path of the initial model parameter. +* \--config:network architecture path. +* \--save_dir:model save directory. +* \--log_period:the logging period per batch. +* \--num_passes:number of training passes. One pass means the training would go over the whole training dataset once. +* \--config_args:Other configuration arguments. +* \--init_model_path:The path of the initial model parameter. By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in command line argument documentation。 diff --git a/doc/demo/semantic_role_labeling/index.md b/doc/demo/semantic_role_labeling/index.md index 53c817a485b3cc6d8942729eb0f1fff3d7b01adc..58b1b8abbc3961995f034e929265a0ab75d9b723 100644 --- a/doc/demo/semantic_role_labeling/index.md +++ b/doc/demo/semantic_role_labeling/index.md @@ -71,15 +71,14 @@ def hook(settings, word_dict, label_dict, **kwargs): settings.word_dict = word_dict settings.label_dict = label_dict #all inputs are integral and sequential type - settings.slots = [ - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(len(word_dict), seq_type=SequenceType.SEQUENCE), - integer_value(2, seq_type=SequenceType.SEQUENCE), - integer_value(len(label_dict), seq_type=SequenceType.SEQUENCE)]``` - + settings.slots = [ + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(len(word_dict)), + integer_value_sequence(2), + integer_value_sequence(len(label_dict))] ``` The corresponding data iterator is as following: ``` diff --git a/doc/layer.md b/doc/layer.md new file mode 100644 index 0000000000000000000000000000000000000000..45f2e2bad542ff5c29c89201b356728cf7ca8c1c --- /dev/null +++ b/doc/layer.md @@ -0,0 +1,4 @@ +# Layer Documents + +* [Layer Source Code Document](source/gserver/layers/index.rst) +* [Layer Python API Document](ui/api/trainer_config_helpers/layers_index.rst) diff --git a/doc/source/gserver/layers/layer.rst b/doc/source/gserver/layers/layer.rst index 0406c75e4eb62aa201e0d5d4eb6976f6a31d0e2a..807b22ca140ee71208a96e2877b9c5636620b165 100644 --- a/doc/source/gserver/layers/layer.rst +++ b/doc/source/gserver/layers/layer.rst @@ -510,11 +510,24 @@ NCELayer .. doxygenclass:: paddle::NCELayer :members: +Validation Layers +----------------- + ValidationLayer ---------------- +``````````````` .. doxygenclass:: paddle::ValidationLayer :members: +AucValidation +````````````` +.. doxygenclass:: paddle::AucValidation + :members: + +PnpairValidation +```````````````` +.. doxygenclass:: paddle::PnpairValidation + :members: + Check Layers ============ diff --git a/doc/ui/api/trainer_config_helpers/activations_index.rst b/doc/ui/api/trainer_config_helpers/activations_index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1c0b71ab77eec62859c1d7615f6ebe637f3108ac --- /dev/null +++ b/doc/ui/api/trainer_config_helpers/activations_index.rst @@ -0,0 +1,7 @@ +Activations +=========== + +.. toctree:: + :maxdepth: 3 + + activations.rst diff --git a/doc_cn/demo/quick_start/index.md b/doc_cn/demo/quick_start/index.md index 0027075ea045f45f4fd5588ae6a11549e4b30fe1..84d796320f7ab675f529d7d0bda843711b688c67 100644 --- a/doc_cn/demo/quick_start/index.md +++ b/doc_cn/demo/quick_start/index.md @@ -207,17 +207,16 @@ classification_cost(input=output, label=label) ### 词向量模型(Word Vector) -embeding模型需要稍微改变数据提供的脚本,即`dataprovider_emb.py`,词向量模型、 -卷积模型、时序模型均使用该脚 -- 文本输入类型定义为整数类型integer_value -- 设置文本输入类型seq_type为SequenceType.SEQUENCE +embedding模型需要稍微改变数据提供的脚本,即`dataprovider_emb.py`,词向量模型、 +卷积模型、时序模型均使用该脚本。其中文本输入类型定义为整数时序类型integer_value_sequence。 ``` def initializer(settings, dictionary, **kwargs): settings.word_dict = dictionary settings.input_types = [ # Define the type of the first input as sequence of integer. - integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE), + # The value of the integers range from 0 to len(dictrionary)-1 + integer_value_sequence(len(dictionary)), # Define the second input for label id integer_value(2)] @@ -479,12 +478,12 @@ else: ## 附录(Appendix) ### 命令行参数(Command Line Argument) -* --config:网络配置 -* --save_dir:模型存储路径 -* --log_period:每隔多少batch打印一次日志 -* --num_passes:训练轮次,一个pass表示过一遍所有训练样本 -* --config_args:命令指定的参数会传入网络配置中。 -* --init_model_path:指定初始化模型路径,可用在测试或训练时指定初始化模型。 +* \--config:网络配置 +* \--save_dir:模型存储路径 +* \--log_period:每隔多少batch打印一次日志 +* \--num_passes:训练轮次,一个pass表示过一遍所有训练样本 +* \--config_args:命令指定的参数会传入网络配置中。 +* \--init_model_path:指定初始化模型路径,可用在测试或训练时指定初始化模型。 默认一个pass保存一次模型,也可以通过saving_period_by_batches设置每隔多少batch保存一次模型。 可以通过show_parameter_stats_period设置打印参数信息等。