提交 69285d97 编写于 作者: jiangfeng08's avatar jiangfeng08

Merge branch 'develop' of https://github.com/baidu/Paddle into MY_COOL_STUFF_BRANCH

......@@ -169,5 +169,4 @@ add_subdirectory(paddle)
add_subdirectory(python)
if(WITH_DOC)
add_subdirectory(doc)
add_subdirectory(doc_cn)
endif()
......@@ -72,6 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -s ${destination}/index_*.html ${destination}/index.html
)
set_property(
......
......@@ -206,5 +206,5 @@ function(create_resources res_file output)
# Convert hex data for C compatibility
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
# Append data to output file
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
endfunction()
......@@ -7,25 +7,50 @@ if(NOT DEFINED SPHINX_THEME_DIR)
endif()
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
${SPHINX_CACHE_DIR}
${BINARY_BUILD_DIR_EN}
${SPHINX_CACHE_DIR_EN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_docs
gen_proto_py)
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.cn.in"
"${BINARY_BUILD_DIR_CN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs_cn
html
${BINARY_BUILD_DIR_CN}
${SPHINX_CACHE_DIR_CN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_docs_cn
gen_proto_py)
......@@ -15,23 +15,23 @@ MNIST的使用场景
MNIST是一个包含有70,000张灰度图片的数字分类数据集。样例数据 ``mnist_train.txt`` 如下:
.. literalinclude:: mnist_train.txt
.. literalinclude:: src/mnist_train.txt
其中每行数据代表一张图片,行内使用 ``;`` 分成两部分。第一部分是图片的标签,为0-9中的一个数字;第二部分是28*28的图片像素灰度值。 对应的 ``train.list`` 即为这个数据文件的名字:
.. literalinclude:: train.list
.. literalinclude:: src/train.list
dataprovider的使用
++++++++++++++++++
.. literalinclude:: mnist_provider.dict.py
.. literalinclude:: src/mnist_provider.dict.py
- 首先,引入PaddlePaddle的PyDataProvider2包。
- 其次,定义一个Python的 `Decorator <http://www.learnpython.org/en/Decorators>`_ `@provider`_ 。用于将下一行的数据输入函数标记成一个PyDataProvider2,同时设置它的input_types属性。
- `input_types`_:设置这个PyDataProvider2返回什么样的数据。本例根据网络配置中 ``data_layer`` 的名字,显式指定返回的是一个28*28维的稠密浮点数向量和一个[0-9]的10维整数标签。
.. literalinclude:: mnist_config.py
.. literalinclude:: src/mnist_config.py
:lines: 9-10
- 注意:如果用户不显示指定返回数据的对应关系,那么PaddlePaddle会根据layer的声明顺序,来确定对应关系。但这个关系可能不正确,所以推荐使用显式指定的方式来设置input_types。
......@@ -53,7 +53,7 @@ dataprovider的使用
在网络配置里,只需要一行代码就可以调用这个PyDataProvider2,如,
.. literalinclude:: mnist_config.py
.. literalinclude:: src/mnist_config.py
:lines: 1-7
训练数据是 ``train.list`` ,没有测试数据,调用的PyDataProvider2是 ``mnist_provider`` 模块中的 ``process`` 函数。
......@@ -80,7 +80,7 @@ dataprovider的使用
本例采用英文情感分类的数据,即将一段英文文本数据,分类成正面情绪和负面情绪两类(用0和1表示)。样例数据 ``sentimental_train.txt`` 如下:
.. literalinclude:: sentimental_train.txt
.. literalinclude:: src/sentimental_train.txt
dataprovider的使用
++++++++++++++++++
......@@ -90,7 +90,7 @@ dataprovider的使用
- 其中 ``input_types`` 和在 `@provider`_ 中配置的效果一致。本例中的输入特征是词ID的序列,因此使用 ``integer_value_sequence`` 类型来设置。
- 将 ``dictionary`` 存入settings对象,在 ``process`` 函数中使用。 dictionary是从网络配置中传入的dict对象,即一个将单词字符串映射到单词ID的字典。
.. literalinclude:: sentimental_provider.py
.. literalinclude:: src/sentimental_provider.py
网络配置中的调用
++++++++++++++++
......@@ -100,7 +100,7 @@ dataprovider的使用
* 在配置中需要读取外部字典。
* 在声明DataProvider的时候传入dictionary作为参数。
.. literalinclude:: sentimental_config.py
.. literalinclude:: src/sentimental_config.py
:emphasize-lines: 12-14
参考(Reference)
......
.. _api_pydataprovider:
.. _api_pydataprovider2:
PyDataProvider2
===============
......@@ -24,18 +24,18 @@ of 28 x 28 pixels.
A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
.. literalinclude:: src/mnist_train.txt
Each line of the data contains two parts, separated by :code:`;`. The first part is
label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this:
.. literalinclude:: ../../../doc_cn/ui/data_provider/train.list
.. literalinclude:: src/train.list
The corresponding dataprovider is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.py
.. literalinclude:: src/mnist_provider.dict.py
The first line imports PyDataProvider2 package.
The main function is the process function, that has two parameters.
......@@ -74,7 +74,7 @@ sample by using keywords :code:`yield`.
Only a few lines of codes need to be added into the training configuration file,
you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
.. literalinclude:: src/mnist_config.py
Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
......@@ -83,7 +83,7 @@ User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
.. literalinclude:: src/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
......@@ -104,6 +104,8 @@ And PaddlePadle will do all of the rest things\:
Is this cool?
.. _api_pydataprovider2_sequential_model:
DataProvider for the sequential model
-------------------------------------
A sequence model takes sequences as its input. A sequence is made up of several
......@@ -119,11 +121,11 @@ negative sentiment (marked by 0 and 1 respectively).
A small part of the original data as an example can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_train.txt
.. literalinclude:: src/sentimental_train.txt
The corresponding data provider can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_provider.py
.. literalinclude:: src/sentimental_provider.py
This data provider for sequential model is a little more complex than that
for MINST dataset.
......@@ -141,7 +143,7 @@ initialized. The :code:`on_init` function has the following parameters:
To pass these parameters into DataProvider, the following lines should be added
into trainer configuration file.
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_config.py
.. literalinclude:: src/sentimental_config.py
The definition is basically same as MNIST example, except:
* Load dictionary in this configuration
......
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
......@@ -7,7 +7,7 @@ DataProvider API
.. toctree::
:maxdepth: 1
data_provider/index_en.rst
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
......
......@@ -34,7 +34,7 @@ PaddlePaddle使用swig对常用的预测接口进行了封装,通过编译会
如下是一段使用mnist model来实现手写识别的预测代码。完整的代码见 ``src_root/doc/ui/predict/predict_sample.py`` 。mnist model可以通过 ``src_root\demo\mnist`` 目录下的demo训练出来。
.. literalinclude:: ../../../doc/ui/predict/predict_sample.py
.. literalinclude:: src/predict_sample.py
:language: python
:lines: 15-18,121-136
......
......@@ -13,7 +13,7 @@ Here is a sample python script that shows the typical prediction process for the
MNIST classification problem. A complete sample code could be found at
:code:`src_root/doc/ui/predict/predict_sample.py`.
.. literalinclude:: ./predict_sample.py
.. literalinclude:: src/predict_sample.py
:language: python
:lines: 15-18,90-100,101-104
......@@ -23,7 +23,7 @@ python's :code:`help()` function. Let's walk through the above python script:
* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
PaddlePaddle with command line arguments, for more about command line arguments
see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_.
see :ref:`cmd_detail_introduction` .
* Parse the configuration file that is used in training with :code:`parse_config()`.
Because data to predict with always have no label, and output of prediction work
normally is the output layer rather than the cost layer, so you should modify
......@@ -36,7 +36,7 @@ python's :code:`help()` function. Let's walk through the above python script:
- Note: As swig_paddle can only accept C++ matrices, we offer a utility
class DataProviderConverter that can accept the same input data with
PyDataProvider2, for more information please refer to document
of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_.
of :ref:`api_pydataprovider2` .
* Do the prediction with :code:`forwardTest()`, which takes the converted
input data and outputs the activations of the output layer.
......
.. _api_trainer_config_helpers_layers:
======
Layers
======
......
......@@ -62,7 +62,7 @@ source_suffix = ['.rst', '.md', '.Rmd']
source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
master_doc = 'index_cn'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......@@ -79,7 +79,7 @@ language = 'zh_CN'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
exclude_patterns = ['_build', '**/*_en*', '*_en*']
# The reST default role (used for this markup: `text`) to use for all
# documents.
......
......@@ -63,7 +63,7 @@ source_suffix = ['.rst', '.md', '.Rmd']
source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
master_doc = 'index_en'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
......@@ -80,7 +80,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
exclude_patterns = ['_build', '**/*_cn*', '*_cn*']
# The reST default role (used for this markup: `text`) to use for all
# documents.
......
####################
PaddlePaddle常见问题
FAQ
####################
.. contents::
......@@ -33,10 +33,9 @@ PyDataProvider使用的是异步加载,同时在内存里直接随即选取数
个内存池实际上决定了shuffle的粒度。所以,如果将这个内存池减小,又要保证数据是随机的,
那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为
.. literalinclude:: reduce_min_pool_size.py
.. literalinclude:: src/reduce_min_pool_size.py
这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 `这里
<../ui/data_provider/pydataprovider2.html#provider>`_ 。
这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 `这里 <../ui/data_provider/pydataprovider2.html#provider>`_ 。
神经元激活内存
++++++++++++++
......@@ -76,7 +75,7 @@ PaddlePaddle支持非常多的优化算法(Optimizer),不同的优化算法需
使用 :code:`pydataprovider`时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。
:code:`DataProvider` 缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。
.. literalinclude:: reduce_min_pool_size.py
.. literalinclude:: src/reduce_min_pool_size.py
同时 :code:`@provider` 接口有一个 :code:`cache` 参数来控制缓存方法,将其设置成 :code:`CacheType.CACHE_PASS_IN_MEM` 的话,会将第一个 :code:`pass` (过完所有训练数据即为一个pass)生成的数据缓存在内存里,在之后的 :code:`pass` 中,不会再从 :code:`python` 端读取数据,而是直接从内存的缓存里读取数据。这也会极大减少数据读入的耗时。
......@@ -90,11 +89,11 @@ PaddlePaddle支持Sparse的训练,sparse训练需要训练特征是 :code:`spa
使用一个词前两个词和后两个词,来预测这个中间的词。这个任务的DataProvider为\:
.. literalinclude:: word2vec_dataprovider.py
.. literalinclude:: src/word2vec_dataprovider.py
这个任务的配置为\:
.. literalinclude:: word2vec_config.py
.. literalinclude:: src/word2vec_config.py
更多关于sparse训练的内容请参考 `sparse训练的文档 <TBD>`_
......@@ -158,7 +157,7 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。
7. *-cp27mu-linux_x86_64.whl is not a supported wheel on this platform.
-----------------------------------------------------------------------
---------------------------------------------------------------------------
出现这个问题的主要原因是,系统编译wheel包的时候,使用的 :code:`wheel` 包是最新的,
而系统中的 :code:`pip` 包比较老。具体的解决方法是,更新 :code:`pip` 包并重新编译PaddlePaddle。
......@@ -220,7 +219,7 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
10. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致
----------------------------------------------------------
----------------------------------------------------------------
这是目前CMake寻找Python的逻辑存在缺陷,如果系统安装了多个Python版本,CMake找到的Python库和Python解释器版本可能有不一致现象,导致编译PaddlePaddle失败。正确的解决方法是,
用户强制指定特定的Python版本,具体操作如下:
......
......@@ -58,6 +58,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍
cost = regression_cost(input= ȳ, label=y)
outputs(cost)
这段简短的配置展示了PaddlePaddle的基本用法:
- 第一部分定义了数据输入。一般情况下,PaddlePaddle先从一个文件列表里获得数据文件地址,然后交给用户自定义的函数(例如上面的 `process`函数)进行读入和预处理从而得到真实输入。本文中由于输入数据是随机生成的不需要读输入文件,所以放一个空列表(`empty.list`)即可。
......
......@@ -99,11 +99,3 @@ In PaddlePaddle, training is just to get a collection of model parameters, which
Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data.
5. Where to Go from Here
-------------------------
- `Install and Build <../build_and_install/index.html>`_
- `Tutorials <../demo/quick_start/index_en.html>`_
- `Example and Demo <../demo/index.html>`_
......@@ -111,7 +111,24 @@ cuda相关的Driver和设备映射进container中,脚本类似于
简单的含有ssh的Dockerfile如下:
.. literalinclude:: paddle_ssh.Dockerfile
.. code-block:: bash
FROM paddledev/paddle:cpu-latest
MAINTAINER PaddlePaddle dev team <paddle-dev@baidu.com>
RUN apt-get update
RUN apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
使用该Dockerfile构建出镜像,然后运行这个container即可。相关命令为\:
......
......@@ -17,7 +17,7 @@ CPU-only one and a CUDA GPU one. We do so by configuring
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_
automatically runs the following commands:
.. code-block:: base
.. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu .
......
......@@ -9,8 +9,8 @@ PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜
.. toctree::
:maxdepth: 1
install/docker_install.rst
install/ubuntu_install.rst
docker_install_cn.rst
ubuntu_install_cn.rst
......@@ -24,4 +24,4 @@ PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜
.. toctree::
:maxdepth: 1
cmake/index.rst
cmake/build_from_source_cn.rst
\ No newline at end of file
......@@ -38,7 +38,20 @@ PaddlePaddle提供了ubuntu 14.04 deb安装包。
安装完成后,可以使用命令 :code:`paddle version` 查看安装后的paddle 版本:
.. literalinclude:: paddle_version.txt
.. code-block:: shell
PaddlePaddle 0.8.0b1, compiled with
with_avx: ON
with_gpu: OFF
with_double: OFF
with_python: ON
with_rdma: OFF
with_glog: ON
with_gflags: ON
with_metric_learning:
with_timer: OFF
with_predict_sdk:
可能遇到的问题
--------------
......
GET STARTED
============
.. toctree::
:maxdepth: 2
build_and_install/index_cn.rst
basic_usage/index_cn.rst
```eval_rst
.. _cmd_detail_introduction:
```
# Detail Description
## Common
......
```eval_rst
.. _cmd_line_index_en:
.. _cmd_line_index:
```
# How to Set Command-line Parameters
......
......@@ -47,7 +47,7 @@ DataProvider是PaddlePaddle系统的数据提供器,将用户的原始数据
一个简单的训练配置文件为:
.. literalinclude:: trainer_config.py
.. literalinclude:: src/trainer_config.py
:linenos:
文件开头 ``from paddle.trainer_config_helpers import *`` ,是因为PaddlePaddle配置文件与C++模块通信的最基础协议是protobuf,为了避免用户直接写复杂的protobuf string,我们为用户定以Python接口来配置网络,该Python代码可以生成protobuf包,这就是`trainer_config_helpers`_的作用。因此,在文件的开始,需要import这些函数。 这个包里面包含了模型配置需要的各个模块。
......@@ -114,7 +114,7 @@ PaddlePaddle 可以使用 ``mixed layer`` 配置出非常复杂的网络,甚
PaddlePaddle多机采用经典的 Parameter Server 架构对多个节点的 trainer 进行同步。多机训练的经典拓扑结构如下\:
.. graphviz:: pserver_topology.dot
.. graphviz:: src/pserver_topology.dot
图中每个灰色方块是一台机器,在每个机器中,先使用命令 ``paddle pserver`` 启动一个pserver进程,并指定端口号,可能的参数是\:
......
How to Configure Deep Models
============================
.. toctree::
:maxdepth: 1
rnn/recurrent_group_cn.md
rnn/hierarchical_layer_cn.rst
rnn/hrnn_rnn_api_compare_cn.rst
rnn/hrnn_demo_cn.rst
......@@ -24,18 +24,18 @@
- 本例中的原始数据一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。这个数据也被单层RNN网络直接使用。
.. literalinclude:: ../../../paddle/gserver/tests/Sequence/tour_train_wdseg
.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg
:language: text
- 双层序列数据一共有4个样本。 每个样本间用空行分开,整体数据和原始数据完全一样。但于双层序列的LSTM来说,第一个样本同时encode两条数据成两个向量。这四条数据同时处理的句子数量为\ :code:`[2, 3, 2, 3]`\ 。
.. literalinclude:: ../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest
.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest
:language: text
其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/gserver/tests/sequenceGen.py>`_)\:
.. literalinclude:: ../../../paddle/gserver/tests/sequenceGen.py
.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py
:language: python
:lines: 21-39
:linenos:
......@@ -43,10 +43,11 @@
- 这是普通的单层时间序列的DataProvider代码,其说明如下:
* DataProvider共返回两个数据,分别是words和label。即上述代码中的第19行。
- words是原始数据中的每一句话,所对应的词表index数组。它是integer_value_sequence类型的,即整数数组。words即为这个数据中的单层时间序列。
- label是原始数据中对于每一句话的分类标签,它是integer_value类型的。
.. literalinclude:: ../../../paddle/gserver/tests/sequenceGen.py
.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py
:language: python
:lines: 42-71
:linenos:
......@@ -63,7 +64,7 @@
首先,我们看一下单层RNN的配置。代码中9-15行(高亮部分)即为单层RNN序列的使用代码。这里使用了PaddlePaddle预定义好的RNN处理函数。在这个函数中,RNN对于每一个时间步通过了一个LSTM网络。
.. literalinclude:: ../../../paddle/gserver/tests/sequence_layer_group.conf
.. literalinclude:: ../../../../paddle/gserver/tests/sequence_layer_group.conf
:language: python
:lines: 38-63
:linenos:
......@@ -84,7 +85,7 @@
* 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。
.. literalinclude:: ../../../paddle/gserver/tests/sequence_nest_layer_group.conf
.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_layer_group.conf
:language: python
:lines: 38-64
:linenos:
......@@ -106,7 +107,7 @@
- 单层RNN:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。
.. literalinclude:: ../../../paddle/gserver/tests/sequence_rnn.conf
.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn.conf
:language: python
:lines: 36-48
......@@ -115,7 +116,7 @@
- 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。
- 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每个时间步都用了上一个时间步的输出结果”一致。
.. literalinclude:: ../../../paddle/gserver/tests/sequence_nest_rnn.conf
.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn.conf
:language: python
:lines: 39-66
......@@ -151,14 +152,14 @@
* 单层RNN\:
.. literalinclude:: ../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
:language: python
:lines: 42-59
:linenos:
* 双层RNN\ \:
.. literalinclude:: ../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
:language: python
:lines: 41-80
:linenos:
......@@ -181,11 +182,11 @@ Memory
Memory是PaddlePaddle实现RNN时候使用的一个概念。RNN即时间递归神经网络,通常要求时间步之间具有一些依赖性,即当前时间步下的神经网络依赖前一个时间步神经网络中某一个神经元输出。如下图所示。
.. graphviz:: glossary_rnn.dot
.. graphviz:: src/glossary_rnn.dot
上图中虚线的连接,即是跨越时间步的网络连接。PaddlePaddle在实现RNN的时候,将这种跨越时间步的连接用一个特殊的神经网络单元实现。这个神经网络单元就叫Memory。Memory可以缓存上一个时刻某一个神经元的输出,然后在下一个时间步输入给另一个神经元。使用Memory的RNN实现便如下图所示。
.. graphviz:: glossary_rnn_with_memory.dot
.. graphviz:: src/glossary_rnn_with_memory.dot
使用这种方式,PaddlePaddle可以比较简单的判断哪些输出是应该跨越时间步的,哪些不是。
......
......@@ -30,7 +30,7 @@ Then at the :code:`process` function, each :code:`yield` function will return th
yield src_ids, trg_ids, trg_ids_next
For more details description of how to write a data provider, please refer to `PyDataProvider2 <../../ui/data_provider/index.html>`_. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
For more details description of how to write a data provider, please refer to :ref:`api_pydataprovider2` . The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
===============================================
Configure Recurrent Neural Network Architecture
......@@ -42,7 +42,7 @@ Simple Gated Recurrent Neural Network
Recurrent neural network process a sequence at each time step sequentially. An example of the architecture of LSTM is listed below.
.. image:: ../../../tutorials/sentiment_analysis/bi_lstm.jpg
.. image:: ../../../tutorials/sentiment_analysis/src/bi_lstm.jpg
:align: center
Generally speaking, a recurrent network perform the following operations from :math:`t=1` to :math:`t=T`, or reversely from :math:`t=T` to :math:`t=1`.
......@@ -106,7 +106,7 @@ We will use the sequence to sequence model with attention as an example to demon
In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to `Layers <../../ui/api/trainer_config_helpers/layers_index.html>`_ for more details.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to :ref:`api_trainer_config_helpers_layers` for more details.
We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space:
......@@ -246,6 +246,6 @@ The code is listed below:
outputs(beam_gen)
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `Semantic Role Labeling Demo <../../demo/semantic_role_labeling/index.html>`_ for more details.
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :ref:`semantic_role_labeling` for more details.
The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`.
HOW TO
=======
Usage
-------
.. toctree::
:maxdepth: 1
concepts/use_concepts_cn.rst
cluster/k8s/paddle_on_k8s_cn.md
cluster/k8s/distributed_training_on_k8s_cn.md
Development
------------
.. toctree::
:maxdepth: 1
write_docs/index_cn.rst
deep_model/index_cn.rst
Optimization
-------------
.. toctree::
:maxdepth: 1
......@@ -51,7 +51,7 @@ In this tutorial, we will focus on nvprof and nvvp.
:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate
above profilers.
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:linenos:
......@@ -77,7 +77,7 @@ As a simple example, consider the following:
1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines).
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:emphasize-lines: 8-10,13
......@@ -124,7 +124,7 @@ To use this command line profiler **nvprof**, you can simply issue the following
1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines).
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:emphasize-lines: 6-7
......
PaddlePaddle 文档
======================
.. toctree::
:maxdepth: 1
getstarted/index_cn.rst
tutorials/index_cn.md
howto/index_cn.rst
api/index_cn.rst
faq/index_cn.rst
......@@ -9,3 +9,4 @@ PaddlePaddle Documentation
howto/index_en.rst
api/index_en.rst
about/index_en.rst
\ No newline at end of file
......@@ -93,7 +93,7 @@ where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the
- `--init_model_path`: path of the initialization model, here is `data/paraphrase_model`
- `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer
For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/text_generation.md).
For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/index_en.md).
## Optional Function ##
### Embedding Parameters Observation
......
# Model Zoo - ImageNet #
[ImageNet](http://www.image-net.org/) 是通用物体分类领域一个众所周知的数据库。本教程提供了一个用于ImageNet上的卷积分类网络模型。
## ResNet 介绍
论文 [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385) 中提出的ResNet网络结构在2015年ImageNet大规模视觉识别竞赛(ILSVRC 2015)的分类任务中赢得了第一名。他们提出残差学习的框架来简化网络的训练,所构建网络结构的的深度比之前使用的网络有大幅度的提高。下图展示的是基于残差的连接方式。左图构造网络模块的方式被用于34层的网络中,而右图的瓶颈连接模块用于50层,101层和152层的网络结构中。
<center>![resnet_block](./resnet_block.jpg)</center>
<center>图 1. ResNet 网络模块</center>
本教程中我们给出了三个ResNet模型,这些模型都是由原作者提供的模型<https://github.com/KaimingHe/deep-residual-networks>转换过来的。我们使用PaddlePaddle在ILSVRC的验证集共50,000幅图像上测试了模型的分类错误率,其中输入图像的颜色通道顺序为**BGR**,保持宽高比缩放到短边为256,只截取中心方形的图像区域。分类错误率和模型大小由下表给出。
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<colgroup>
<col class="left" />
<col class="left" />
<col class="left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="left">ResNet</th>
<th scope="col" class="left">Top-1</th>
<th scope="col" class="left">Model Size</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">ResNet-50</td>
<td class="left">24.9%</td>
<td class="left">99M</td>
</tr>
<tr>
<td class="left">ResNet-101</td>
<td class="left">23.7%</td>
<td class="left">173M</td>
</tr>
<tr>
<td class="left">ResNet-152</td>
<td class="left">23.2%</td>
<td class="left">234M</td>
</tr>
</tbody>
</table></center>
<br>
## ResNet 模型
50层,101层和152层的网络配置文件可参照```demo/model_zoo/resnet/resnet.py```。你也可以通过在命令行参数中增加一个参数如```--config_args=layer_num=50```来指定网络层的数目。
### 网络可视化
你可以通过执行下面的命令来得到ResNet网络的结构可视化图。该脚本会生成一个dot文件,然后可以转换为图片。需要安装graphviz来转换dot文件为图片。
```
cd demo/model_zoo/resnet
./net_diagram.sh
```
### 模型下载
```
cd demo/model_zoo/resnet
./get_model.sh
```
你可以执行上述命令来下载所有的模型和均值文件,如果下载成功,这些文件将会被保存在```demo/model_zoo/resnet/model```路径下。
```
mean_meta_224 resnet_101 resnet_152 resnet_50
```
* resnet_50: 50层网络模型。
* resnet_101: 101层网络模型。
* resnet_152: 152层网络模型。
* mean\_meta\_224: 均值图像文件,图像大小为3 x 224 x 224,颜色通道顺序为**BGR**。你也可以使用这三个值: 103.939, 116.779, 123.68。
### 参数信息
* **卷积层权重**
由于每个卷积层后面连接的是batch normalization层,因此该层中没有偏置(bias)参数,并且只有一个权重。
形状: `(Co, ky, kx, Ci)`
* Co: 输出特征图的通道数目
* ky: 滤波器核在垂直方向上的尺寸
* kx: 滤波器核在水平方向上的尺寸
* Ci: 输入特征图的通道数目
二维矩阵: (Co * ky * kx, Ci), 行优先次序存储。
* **全连接层权重**
二维矩阵: (输入层尺寸, 本层尺寸), 行优先次序存储。
* **[Batch Normalization](<http://arxiv.org/abs/1502.03167>) 层权重**
本层有四个参数,实际上只有.w0和.wbias是需要学习的参数,另外两个分别是滑动均值和方差。在测试阶段它们将会被加载到模型中。下表展示了batch normalization层的参数。
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<colgroup>
<col class="left" />
<col class="left" />
<col class="left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="left">参数名</th>
<th scope="col" class="left">尺寸</th>
<th scope="col" class="left">含义</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">_res2_1_branch1_bn.w0</td>
<td class="left">256</td>
<td class="left">gamma, 缩放参数</td>
</tr>
<tr>
<td class="left">_res2_1_branch1_bn.w1</td>
<td class="left">256</td>
<td class="left">特征图均值</td>
</tr>
<tr>
<td class="left">_res2_1_branch1_bn.w2</td>
<td class="left">256</td>
<td class="left">特征图方差</td>
</tr>
<tr>
<td class="left">_res2_1_branch1_bn.wbias</td>
<td class="left">256</td>
<td class="left">beta, 偏置参数</td>
</tr>
</tbody>
</table></center>
<br>
### 参数读取
使用者可以使用下面的Python脚本来读取参数值:
```
import sys
import numpy as np
def load(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
if __name__=='__main__':
weight = load(sys.argv[1])
```
或者直接使用下面的shell命令:
```
od -j 16 -f _res2_1_branch1_bn.w0
```
## 特征提取
我们提供了C++和Python接口来提取特征。下面的例子使用了`demo/model_zoo/resnet/example`中的数据,详细地展示了整个特征提取的过程。
### C++接口
首先,在配置文件中的`define_py_data_sources2`里指定图像数据列表,具体请参照示例`demo/model_zoo/resnet/resnet.py`
```
train_list = 'train.list' if not is_test else None
# mean.meta is mean file of ImageNet dataset.
# mean.meta size : 3 x 224 x 224.
# If you use three mean value, set like:
# "mean_value:103.939,116.779,123.68;"
args={
'mean_meta': "model/mean_meta_224/mean.meta",
'image_size': 224, 'crop_size': 224,
'color': True,'swap_channel:': [2, 1, 0]}
define_py_data_sources2(train_list,
'example/test.list',
module="example.image_list_provider",
obj="processData",
args=args)
```
第二步,在`resnet.py`文件中指定要提取特征的网络层的名字。例如,
```
Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
```
第三步,在`extract_fea_c++.sh`文件中指定模型路径和输出的目录,然后执行下面的命令。
```
cd demo/model_zoo/resnet
./extract_fea_c++.sh
```
如果执行成功,特征将会存到`fea_output/rank-00000`文件中,如下所示。同时你可以使用`load_feature.py`文件中的`load_feature_c`接口来加载该文件。
```
-0.115318 -0.108358 ... -0.087884;-1.27664 ... -1.11516 -2.59123;
-0.126383 -0.116248 ... -0.00534909;-1.42593 ... -1.04501 -1.40769;
```
* 每行存储的是一个样本的特征。其中,第一行存的是图像`example/dog.jpg`的特征,第二行存的是图像`example/cat.jpg`的特征。
* 不同层的特征由分号`;`隔开,并且它们的顺序与`Outputs()`中指定的层顺序一致。这里,左边是`res5_3_branch2c_conv`层的特征,右边是`res5_3_branch2c_bn`层特征。
### Python接口
示例`demo/model_zoo/resnet/classify.py`中展示了如何使用Python来提取特征。下面的例子同样使用了`./example/test.list`中的数据。执行的命令如下:
```
cd demo/model_zoo/resnet
./extract_fea_py.sh
```
extract_fea_py.sh:
```
python classify.py \
--job=extract \
--conf=resnet.py\
--use_gpu=1 \
--mean=model/mean_meta_224/mean.meta \
--model=model/resnet_50 \
--data=./example/test.list \
--output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \
--output_dir=features
```
* \--job=extract: 指定工作模式来提取特征。
* \--conf=resnet.py: 网络配置文件。
* \--use_gpu=1: 指定是否使用GPU。
* \--model=model/resnet_50: 模型路径。
* \--data=./example/test.list: 数据列表。
* \--output_layer="xxx,xxx": 指定提取特征的层。
* \--output_dir=features: 输出目录。
如果运行成功,你将会看到特征存储在`features/batch_0`文件中,该文件是由cPickle产生的。你可以使用`load_feature.py`中的`load_feature_py`接口来打开该文件,它将返回如下的字典:
```
{
'cat.jpg': {'res5_3_branch2c_conv': array([[-0.12638293, -0.116248 , -0.11883899, ..., -0.00895038, 0.01994277, -0.00534909]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.42593431, -1.28918779, -1.32414699, ..., -1.45933616, -1.04501402, -1.40769434]], dtype=float32)},
'dog.jpg': {'res5_3_branch2c_conv': array([[-0.11531784, -0.10835785, -0.08809858, ...,0.0055237, 0.01505112, -0.08788397]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.27663755, -1.18272924, -0.90937918, ..., -1.25178063, -1.11515927, -2.59122872]], dtype=float32)}
}
```
仔细观察,这些特征值与上述使用C++接口提取的结果是一致的。
## 预测
`classify.py`文件也可以用于对样本进行预测。我们提供了一个示例脚本`predict.sh`,它使用50层的ResNet模型来对`example/test.list`中的数据进行预测。
```
cd demo/model_zoo/resnet
./predict.sh
```
predict.sh调用了`classify.py`:
```
python classify.py \
--job=predict \
--conf=resnet.py\
--multi_crop \
--model=model/resnet_50 \
--use_gpu=1 \
--data=./example/test.list
```
* \--job=extract: 指定工作模型进行预测。
* \--conf=resnet.py: 网络配置文件。network configure.
* \--multi_crop: 使用10个裁剪图像块,预测概率取平均。
* \--use_gpu=1: 指定是否使用GPU。
* \--model=model/resnet_50: 模型路径。
* \--data=./example/test.list: 数据列表。
如果运行成功,你将会看到如下结果,其中156和285是这些图像的分类标签。
```
Label of example/dog.jpg is: 156
Label of example/cat.jpg is: 282
```
......@@ -52,7 +52,7 @@ See ```demo/model_zoo/resnet/resnet.py```. This config contains network of 50, 1
### Network Visualization
You can get a diagram of ResNet network by running the following commands. The script generates dot file and then converts dot file to PNG file, which uses installed draw_dot tool in our server. If you can not access the server, just install graphviz to convert dot file.
You can get a diagram of ResNet network by running the following commands. The script generates dot file and then converts dot file to PNG file, which needs to install graphviz to convert.
```
cd demo/model_zoo/resnet
......@@ -138,7 +138,7 @@ There are four parameters in this layer. In fact, only .w0 and .wbias are the le
### Parameter Observation
Users who want to observe the parameters can use python to read:
Users who want to observe the parameters can use Python to read:
```
import sys
......@@ -209,7 +209,7 @@ If successful, features are saved in `fea_output/rank-00000` as follows. And you
### Python Interface
`demo/model_zoo/resnet/classify.py` is an example to show how to use python to extract features. Following example still uses data of `./example/test.list`. Command is as follows:
`demo/model_zoo/resnet/classify.py` is an example to show how to use Python to extract features. Following example still uses data of `./example/test.list`. Command is as follows:
```
cd demo/model_zoo/resnet
......@@ -238,8 +238,6 @@ python classify.py \
* \--output_layer="xxx,xxx": specify layers to extract features.
* \--output_dir=features: output diretcoty.
Note, since the convolution layer in these ResNet models is suitable for the cudnn implementation which only support GPU. It not support CPU mode because of compatibility issue and we will fix later.
If run successfully, you will see features saved in `features/batch_0`, this file is produced with cPickle. You can use `load_feature_py` interface in `load_feature.py` to open the file, and it returns a dictionary as follows:
```
......
# TUTORIALS
There are several examples and demos here.
## Quick Start
* [Quick Start](quick_start/index_cn.rst)
## Image
* TBD
## NLP
* [Sentiment Analysis](sentiment_analysis/index_cn.md)
* [Semantic Role Labeling](semantic_role_labeling/index_cn.rst)
## Recommendation
* TBD
## Model Zoo
* TBD
# TUTORIALS
There are serveral examples and demos here.
There are several examples and demos here.
## [Quick Start](quick_start/index_en.md)
## Quick Start
* [Quick Start](quick_start/index_en.md)
## Image
......
......@@ -21,7 +21,7 @@ PaddlePaddle快速入门教程
使用PaddlePaddle, 每一个任务流程都可以被划分为如下五个步骤。
.. image:: Pipeline.jpg
.. image:: src/Pipeline_cn.jpg
:align: center
:scale: 80%
......@@ -99,7 +99,7 @@ Python脚本读取数据
本小节我们将介绍模型网络结构。
.. image:: PipelineNetwork.jpg
.. image:: src/PipelineNetwork_cn.jpg
:align: center
:scale: 80%
......@@ -112,7 +112,7 @@ Python脚本读取数据
具体流程如下:
.. image:: NetLR.jpg
.. image:: src/NetLR_cn.jpg
:align: center
:scale: 80%
......@@ -176,7 +176,7 @@ embedding模型需要稍微改变提供数据的Python脚本,即 ``dataprovide
该模型依然使用逻辑回归分类网络的框架, 只是将句子用连续向量表示替换为用稀疏向量表示, 即对第三步进行替换。句子表示的计算更新为两步:
.. image:: NetContinuous.jpg
.. image:: src/NetContinuous_cn.jpg
:align: center
:scale: 80%
......@@ -207,7 +207,7 @@ embedding模型需要稍微改变提供数据的Python脚本,即 ``dataprovide
卷积网络是一种特殊的从词向量表示到句子表示的方法, 也就是将词向量模型进一步演化为三个新步骤。
.. image:: NetConv.jpg
.. image:: src/NetConv_cn.jpg
:align: center
:scale: 80%
......@@ -238,7 +238,7 @@ embedding模型需要稍微改变提供数据的Python脚本,即 ``dataprovide
时序模型
----------
.. image:: NetRNN.jpg
.. image:: src/NetRNN_cn.jpg
:align: center
:scale: 80%
......@@ -284,7 +284,7 @@ Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优
在数据加载和网络配置完成之后, 我们就可以训练模型了。
.. image:: PipelineTrain.jpg
.. image:: src/PipelineTrain_cn.jpg
:align: center
:scale: 80%
......@@ -294,7 +294,7 @@ Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优
./train.sh
``train.sh``中包含了训练模型的基本命令。训练时所需设置的主要参数如下:
``train.sh`` 中包含了训练模型的基本命令。训练时所需设置的主要参数如下:
.. code-block:: bash
......@@ -312,7 +312,7 @@ Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优
当模型训练好了之后,我们就可以进行预测了。
.. image:: PipelineTest.jpg
.. image:: src/PipelineTest_cn.jpg
:align: center
:scale: 80%
......
......@@ -32,7 +32,7 @@ The monitor breaks down two months after purchase.
the classifier should output “negative“.
To build your text classification system, your code will need to perform five steps:
<center> ![](./Pipeline_en.jpg) </center>
<center> ![](./src/Pipeline_en.jpg) </center>
- Preprocess data into a standardized format.
- Provide data to the learning model.
......@@ -160,14 +160,14 @@ You can refer to the following link for more detailed examples and data formats:
## Network Architecture
You will describe four kinds of network architectures in this section.
<center> ![](./PipelineNetwork_en.jpg) </center>
<center> ![](./src/PipelineNetwork_en.jpg) </center>
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
### Logistic Regression
The architecture is illustrated in the following picture:
<center> ![](./NetLR_en.png) </center>
<center> ![](./src/NetLR_en.png) </center>
- You need define the data for text features. The size of the data layer is the number of words in the dictionary.
......@@ -240,7 +240,7 @@ def process(settings, file_name):
```
This model is very similar to the framework of logistic regression, but it uses word embedding vectors instead of a sparse vectors to represent words.
<center> ![](./NetContinuous_en.png) </center>
<center> ![](./src/NetContinuous_en.png) </center>
- It can look up the dense word embedding vector in the dictionary (its words embedding vector is `word_dim`). The input is a sequence of N words, the output is N word_dim dimensional vectors.
......@@ -283,7 +283,7 @@ The performance is summarized in the following table:
### Convolutional Neural Network Model
Convolutional neural network converts a sequence of word embeddings into a sentence representation using temporal convolutions. You will transform the fully connected layer of the word embedding model to 3 new sub-steps.
<center> ![](./NetConv_en.png) </center>
<center> ![](./src/NetConv_en.png) </center>
Text convolution has 3 steps:
......@@ -324,7 +324,7 @@ The performance is summarized in the following table:
<br>
### Recurrent Model
<center> ![](./NetRNN_en.png) </center>
<center> ![](./src/NetRNN_en.png) </center>
You can use Recurrent neural network as our time sequence model, including simple RNN model, GRU model, and LSTM model。
......@@ -378,7 +378,7 @@ settings(batch_size=128,
## Training Model
After completing data preparation and network architecture specification, you will run the training script.
<center> ![](./PipelineTrain_en.png) </center>
<center> ![](./src/PipelineTrain_en.png) </center>
Training script: our training script is in `train.sh` file. The training arguments are listed below:
......@@ -395,7 +395,7 @@ We do not provide examples on how to train on clusters here. If you want to trai
## Inference
You can use the trained model to perform prediction on the dataset with no labels. You can also evaluate the model on dataset with labels to obtain its test accuracy.
<center> ![](./PipelineTest_en.png) </center>
<center> ![](./src/PipelineTest_en.png) </center>
The test script is listed below. PaddlePaddle can evaluate a model on the data with labels specified in `test.list`.
......
```eval_rst
.. _demo_ml_dataset_en:
.. _demo_ml_dataset:
```
# MovieLens Dataset
......
......@@ -16,7 +16,7 @@ Data Preparation
````````````````
Download and extract dataset
''''''''''''''''''''''''''''
We use :ref:`demo_ml_dataset_en` here.
We use :ref:`demo_ml_dataset` here.
To download and unzip the dataset, simply run the following commands.
.. code-block:: bash
......@@ -264,7 +264,7 @@ In this :code:`dataprovider.py`, we should set\:
* use_seq\: Whether this :code:`dataprovider.py` in sequence mode or not.
* process\: Return each sample of data to :code:`paddle`.
The data provider details document see :ref:`api_pydataprovider`.
The data provider details document see :ref:`api_pydataprovider2`.
Train
`````
......@@ -280,7 +280,7 @@ The run.sh is shown as follow:
It just start a paddle training process, write the log to `log.txt`,
then print it on screen.
Each command line argument in :code:`run.sh`, please refer to the :ref:`cmd_line_index_en` page. The short description of these arguments is shown as follow.
Each command line argument in :code:`run.sh`, please refer to the :ref:`cmd_line_index` page. The short description of these arguments is shown as follow.
* config\: Tell paddle which file is neural network configuration.
* save_dir\: Tell paddle save model into './output'
......
......@@ -149,7 +149,7 @@ paddle train \
训练后,模型将保存在目录`output`中。 我们的训练曲线如下:
<center>
![pic](./curve.jpg)
![pic](./src/curve.jpg)
</center>
### 测试
......
```eval_rst
.. _semantic_role_labeling:
```
# Semantic Role labeling Tutorial #
Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is to discover the predicate-argument structure of each predicate in a given input sentence. SRL is useful as an intermediate step in a wide range of natural language processing tasks, such as information extraction. automatic document categorization and question answering. An instance is as following [1]:
......@@ -41,13 +45,13 @@ Unlike Bidirectional-LSTM that used in Sentiment Analysis demo, the DB-LSTM ado
The following figure shows a temporal expanded 2-layer DB-LSTM network.
<center>
![pic](./network_arch.png)
![pic](./src/network_arch.png)
</center>
### Features
Two input features play an essential role in this pipeline: predicate (pred) and argument (argu). Two other features: predicate context (ctx-p) and region mark (mr) are also adopted. Because a single predicate word can not exactly describe the predicate information, especially when the same words appear more than one times in a sentence. With the predicate context, the ambiguity can be largely eliminated. Similarly, we use region mark m<sub>r</sub> = 1 to denote the argument position if it locates in the predicate context region, or m<sub>r</sub> = 0 if does not. These four simple features are all we need for our SRL system. Features of one sample with context size set to 1 is showed as following[2]:
<center>
![pic](./feature.jpg)
![pic](./src/feature.jpg)
</center>
In this sample, the coresponding labelled sentence is:
......@@ -148,7 +152,7 @@ paddle train \
After training, the models will be saved in directory `output`. Our training curve is as following:
<center>
![pic](./curve.jpg)
![pic](./src/curve.jpg)
</center>
### Run testing
......
# 语义角色标注教程 #
语义角色标注(Semantic role labeling, SRL)是浅语义解析的一种形式,其目的是在给定的输入句子中发现每个谓词的谓词参数结构。 SRL作为很多自然语言处理任务中的中间步骤是很有用的,如信息提取、文档自动分类和问答。 实例如下 [1]:
[ <sub>A0</sub> 他 ] [ <sub>AM-MOD</sub> 将 ][ <sub>AM-NEG</sub> 不会 ] [ <sub>V</sub> 接受] [ <sub>A1</sub> 任何东西 ] 从 [<sub>A2</sub> 那些他写的东西中 ]。
- V: 动词
- A0: 接受者
- A1: 接受的东西
- A2: 从……接受
- A3: 属性
- AM-MOD: 情态动词
- AM-NEG: 否定
给定动词“接受”,句子中的大部分将会扮演某些语义角色。这里,标签方案来自 Penn Proposition Bank。
到目前为止,大多数成功的SRL系统是建立在某种形式的解析结果之上的,其中在语法结构上使用了预先定义的特征模板。 本教程将介绍使用深度双向长短期记忆(DB-LSTM)模型[2]的端到端系统来解决SRL任务,这在很大程度上优于先前的最先进的系统。 这个系统将SRL任务视为序列标记问题。
## 数据描述
相关论文[2]采用 CoNLL-2005&2012 共享任务中设置的数据进行训练和测试。根据数据许可证,演示采用 CoNLL-2005 的测试数据集,可以在网站上找到。
用户只需执行以下命令就可以下载并处理原始数据:
```bash
cd data
./get_data.sh
```
`data `目录会出现如下几个新的文件:
```bash
conll05st-release:the test data set of CoNll-2005 shared task
test.wsj.words:the Wall Street Journal data sentences
test.wsj.props: the propositional arguments
feature: the extracted features from data set
```
## 训练
### DB-LSTM
请参阅情绪分析的演示以了解有关长期短期记忆单元的更多信息。
与在 Sentiment Analysis 演示中使用的 Bidirectional-LSTM 不同,DB-LSTM 采用另一种方法来堆叠LSTM层。首先,标准LSTM以正向处理该序列。该 LSTM 层的输入和输出作为下一个 LSTM 层的输入,并被反向处理。这两个标准 LSTM 层组成一对 LSTM。然后我们堆叠一对对的 LSTM 层后得到深度 LSTM 模型。
下图展示了时间扩展的2层 DB-LSTM 网络。
<center>
![pic](./network_arch.png)
</center>
### 特征
两个输入特性在这个管道中起着至关重要的作用:predicate(pred)和argument(arguments)。 还采用了两个其他特征:谓词上下文(ctx-p)和区域标记(mr)。 因为单个谓词不能精确地描述谓词信息,特别是当相同的词在句子中出现多于一次时。 使用谓词上下文,可以在很大程度上消除歧义。类似地,如果它位于谓词上下文区域中,则使用区域标记 m<sub>r</sub> = 1 来表示参数位置,反之则 m<sub>r</sub> = 0。这四个简单的特征是我们的SRL系统所需要的。上下文大小设置为1的一个样本的特征如下[2]所示:
<center>
![pic](./feature.jpg)
</center>
在这个示例中,相应的标记句子是:
[ <sub>A1</sub> A record date ] has [ <sub>AM-NEG</sub> n't ] been [ <sub>V</sub> set ] .
在演示中, 我们采用上面的特征模板, 包括: `argument`, `predicate`, `ctx-p (p=-1,0,1)`, `mark` 并使用 `B/I/O` 方案来标记每个参数。这些特征和标签存储在 `feature` 文件中, 用`\t`分割。
### 数据提供
`dataprovider.py` 是一个包装数据的 Python 文件。 函数 `hook()` 定义了网络的数据槽。六个特征和标签都是索引槽。
```
def hook(settings, word_dict, label_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(2),
integer_value_sequence(len(label_dict))]
```
相应的数据迭代器如下:
```
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
```
函数 `process` 产出有8个特征和标签的9个表。
### 神经网络配置
`db_lstm.py` 是在训练过程中加载字典并定义数据提供程序模块和网络架构的神经网络配置文件。
九个 `data_layer` 从数据提供程序加载实例。八个特征分别转换为嵌入,并由`mixed_layer`混合。 深度双向LSTM层提取softmax层的特征。目标函数是标签的交叉熵。
### 训练
训练的脚本是 `train.sh`,用户只需执行:
```bash
./train.sh
```
`train.sh` 中的内容:
```
paddle train \
--config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \
--num_passes=10000 \
--average_test_period=10000000 \
--init_model_path=./data \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
- \--config=./db_lstm.py : 网络配置文件
- \--use_gpu=false: 使用 CPU 训练(如果已安装 PaddlePaddle GPU版本并想使用 GPU 训练可以设置为true,目前 crf_layer 不支持 GPU)
- \--log_period=500: 每20批(batch)输出日志
- \--trainer_count=1: 设置线程数(或 GPU 数)
- \--show_parameter_stats_period=5000: 每100批显示参数统计
- \--save_dir=./output: 模型输出路径
- \--num_passes=10000: 设置通过数,一次通过意味着PaddlePaddle训练数据集中的所有样本一次
- \--average_test_period=10000000: 每个 average_test_period 批次对平均参数进行测试
- \--init_model_path=./data: 参数初始化路径
- \--load_missing_parameter_strategy=rand: 随机初始不存在的参数
- \--test_all_data_in_one_period=1: 在一个周期内测试所有数据
训练后,模型将保存在目录`output`中。 我们的训练曲线如下:
<center>
![pic](./curve.jpg)
</center>
### 测试
测试脚本是 `test.sh`, 执行:
```bash
./test.sh
```
`tesh.sh` 的主要部分:
```
paddle train \
--config=./db_lstm.py \
--model_list=$model_list \
--job=test \
--config_args=is_test=1 \
```
- \--config=./db_lstm.py: 网络配置文件
- \--model_list=$model_list.list: 模型列表文件
- \--job=test: 指示测试任务
- \--config_args=is_test=1: 指示测试任务的标记
- \--test_all_data_in_one_period=1: 在一个周期内测试所有数据
### 预测
预测脚本是 `predict.sh`,用户只需执行:
```bash
./predict.sh
```
`predict.sh`中,用户应该提供网络配置文件,模型路径,标签文件,字典文件,特征文件。
```
python predict.py
-c $config_file \
-w $best_model_path \
-l $label_file \
-p $predicate_dict_file \
-d $dict_file \
-i $input_file \
-o $output_file
```
`predict.py` 是主要的可执行python脚本,其中包括函数:加载模型,加载数据,数据预测。网络模型将输出标签的概率分布。 在演示中,我们使用最大概率的标签作为结果。用户还可以根据概率分布矩阵实现集束搜索或维特比解码。
预测后,结果保存在 `predict.res` 中。
## 引用
[1] Martha Palmer, Dan Gildea, and Paul Kingsbury. The Proposition Bank: An Annotated Corpus of Semantic Roles , Computational Linguistics, 31(1), 2005.
[2] Zhou, Jie, and Wei Xu. "End-to-end learning of semantic role labeling using recurrent neural networks." Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.
......@@ -109,7 +109,7 @@ dataset
在这步任务中,我们使用了循环神经网络(RNN)的 LSTM 架构来训练情感分析模型。 引入LSTM模型主要是为了克服消失梯度的问题。 LSTM网络类似于具有隐藏层的标准循环神经网络, 但是隐藏层中的每个普通节点被一个记忆单元替换。 每个记忆单元包含四个主要的元素: 输入门, 具有自循环连接的神经元,忘记门和输出门。 更多的细节可以在文献中找到[4]。 LSTM架构的最大优点是它可以在长时间间隔内记忆信息,而没有短时记忆的损失。在有新的单词来临的每一个时间步骤内,存储在记忆单元区块的历史信息被更新用来迭代的学习单词以合理的序列程现。
<center>![LSTM](../../../doc/demo/sentiment_analysis/lstm.png)</center>
<center>![LSTM](src/lstm.png)</center>
<center>图表 1. LSTM [3]</center>
情感分析是自然语言理解中最典型的问题之一。 它的目的是预测在一个序列中表达的情感态度。 通常, ,仅仅是一些关键词,如形容词和副词,在预测序列或段落的情感中起主要作用。然而有些评论上下文非常长,例如 IMDB的数椐集。 我们只所以使用LSTM来执行这个任务是因为其改进的设计并且具有门机制。 首先,它能够从词级到具有可变上下文长度的上下文级别来总结表示。 第二,它可以在句子级别利用可扩展的上下文, 而大多数方法只是利用n-gram级别的知识。第三,它直接学习段落表示,而不是组合上下文级别信息。
......@@ -120,13 +120,13 @@ dataset
图2是双向LSTM网络,后面连全连接层和softmax层。
<center>![BiLSTM](../../../doc/demo/sentiment_analysis/bi_lstm.jpg)</center>
<center>![BiLSTM](src/bi_lstm.jpg)</center>
<center>图 2. Bidirectional-LSTM </center>
#### Stacked-LSTM
图3是三层LSTM结构。图的底部是word embedding(对文档处理后形成的单词向量)。 接下来,连接三个LSTM隐藏层,并且第二个是反向LSTM。然后提取隐藏LSTM层的所有时间步长的最大词向量作为整个序列的表示。 最后,使用具有softmax激活的全连接前馈层来执行分类任务。 更多内容可查看参考文献 [5]。
<center>![StackedLSTM](../../../doc/demo/sentiment_analysis/stacked_lstm.jpg)</center>
<center>![StackedLSTM](src/stacked_lstm.jpg)</center>
<center>图 3. Stacked-LSTM for sentiment analysis </center>
**配置**
......
if(NOT DEFINED SPHINX_THEME)
set(SPHINX_THEME default)
endif()
if(NOT DEFINED SPHINX_THEME_DIR)
set(SPHINX_THEME_DIR)
endif()
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs_cn
html
${BINARY_BUILD_DIR}
${SPHINX_CACHE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
add_dependencies(paddle_docs_cn
gen_proto_py)
使用cmake编译PaddlePaddle
=========================
.. toctree::
install_deps.rst
compile_options.rst
make_and_install.rst
安装编译PaddlePaddle需要的依赖
==============================
参见 `安装编译依赖 <../../../doc/build/build_from_source.html#install-dependencies>`_
make和make install
==================
参见 `make和make install <../../../doc/build/build_from_source.html#build-and-install>`_
FROM paddledev/paddle:cpu-latest
MAINTAINER PaddlePaddle dev team <paddle-dev@baidu.com>
RUN apt-get update
RUN apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
CMD ["/usr/sbin/sshd", "-D"]
PaddlePaddle 0.8.0b1, compiled with
with_avx: ON
with_gpu: OFF
with_double: OFF
with_python: ON
with_rdma: OFF
with_glog: ON
with_gflags: ON
with_metric_learning:
with_timer: OFF
with_predict_sdk:
集群训练
========
* `集群训练 <../../doc/cluster/index.html>`_
.. toctree::
:maxdepth: 2
:glob:
集群训练(对内) <internal/index.md>
使用示例
========
图像
''''
* `图像分类 <../../doc/demo/image_classification/index.html>`_
自然语言处理
''''''''''''
* `情感分析 <sentiment_analysis/index.html>`_
* `文本生成 <../../doc/demo/text_generation/index.html>`_
* `词性标注 <../../doc/demo/semantic_role_labeling/index.html>`_
推荐
''''
* `MovieLens数据集 <../../doc/demo/rec/ml_dataset.html>`_
* `MovieLens评分回归 <../../doc/demo/rec/ml_regression.html>`_
常用模型
''''''''
* `ImageNet: ResNet <../../doc/demo/imagenet_model/resnet_model.html>`_
* `Embedding: Chinese Word <../../doc/demo/embedding_model/index.html>`_
# PaddlePaddle快速入门教程
我们以文本分类问题作为背景,介绍PaddlePaddle使用流程和常用的网络基础单元的配置方法。
## 安装(Install)
首先请参考<a href = "../../build_and_install/index.html">安装教程</a>安装PaddlePaddle。
## 使用概述(Overview)
**文本分类问题**:对于给定的一条文本, 我们从提前给定的类别集合中选择其所属类
别。比如通过用户对电子商务网站评论,评估产品的质量:
- 这个显示器很棒! (好评)
- 用了两个月之后这个显示器屏幕碎了。(差评)
每一个任务流程都可以分为如下5个基础部分。
<center> ![](./Pipeline.jpg) </center>
1. 数据格式准备
- 每行保存一条样本,类别Id 和文本信息用Tab间隔, 文本中的单词用空格分隔(如果不切词,则字与字之间用空格分隔),例如:```类别Id ‘\t’ 这 个 显 示 器 很 棒 !```
2. 数据向模型传送
- PaddlePaddle可以读取Python写的传输数据脚本,所有字符都将转换为连续整数表示的Id传给模型
3. 网络结构(由易到难展示4种不同的网络配置)
- 逻辑回归模型
- 词向量模型
- 卷积模型
- 时序模型
- 优化算法
4. 训练模型
5. 预测
## 数据格式准备(Data Preparation)
在本问题中,我们使用[Amazon电子产品评论数据](http://jmcauley.ucsd.edu/data/amazon/)
将评论分为好评(正样本)和差评(负样本)两类。[源码](https://github.com/PaddlePaddle/Paddle)`demo/quick_start`里提供了下载已经预处理数据的脚本(如果想从最原始的数据处理,可以使用脚本 `./demo/quick_start/data/proc_from_raw_data/get_data.sh`)。
```bash
cd demo/quick_start
./data/get_data.sh
```
## 数据向模型传送(Transfer Data to Model)
### Python数据加载脚本(Data Provider Script)
下面dataprovider_bow.py文件给出了完整例子,主要包括两部分:
* initalizer: 定义文本信息、类别Id的数据类型。
* process: yield文本信息和类别Id,和initalizer里定义顺序一致。
```python
from paddle.trainer.PyDataProvider2 import *
# id of the word not in dictionary
UNK_IDX = 0
# initializer is called by the framework during initialization.
# It allows the user to describe the data types and setup the
# necessary data structure for later use.
# `settings` is an object. initializer need to properly fill settings.input_types.
# initializer can also store other data structures needed to be used at process().
# In this example, dictionary is stored in settings.
# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py
def initializer(settings, dictionary, **kwargs):
# Put the word dictionary into settings
settings.word_dict = dictionary
# setting.input_types specifies what the data types the data provider
# generates.
settings.input_types = [
# The first input is a sparse_binary_vector,
# which means each dimension of the vector is either 0 or 1. It is the
# bag-of-words (BOW) representation of the texts.
sparse_binary_vector(len(dictionary)),
# The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative)
integer_value(2)]
# Delaring a data provider. It has an initializer 'data_initialzer'.
# It will cache the generated data of the first pass in memory, so that
# during later pass, no on-the-fly data generation will be needed.
# `setting` is the same object used by initializer()
# `file_name` is the name of a file listed train_list or test_list file given
# to define_py_data_sources2(). See trainer_config.lr.py.
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
# Open the input data file.
with open(file_name, 'r') as f:
# Read each line.
for line in f:
# Each line contains the label and text of the comment, separated by \t.
label, comment = line.strip().split('\t')
# Split the words into a list.
words = comment.split()
# convert the words into a list of ids by looking them up in word_dict.
word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words]
# Return the features for the current comment. The first is a list
# of ids representing a 0-1 binary sparse vector of the text,
# the second is the integer id of the label.
yield word_vector, int(label)
```
### 配置中的数据加载定义(Data Provider in Configure)
在模型配置中利用`define_py_data_sources2`加载数据:
```python
from paddle.trainer_config_helpers import *
file = "data/dict.txt"
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
# define the data sources for the model.
# We need to use different process for training and prediction.
# For training, the input data includes both word IDs and labels.
# For prediction, the input data only includs word Ids.
define_py_data_sources2(train_list='data/train.list',
test_list='data/test.list',
module="dataprovider_bow",
obj="process",
args={"dictionary": word_dict})
```
* data/train.list,data/test.list: 指定训练、测试数据
* module="dataprovider": 数据处理Python文件名
* obj="process": 指定生成数据的函数
* args={"dictionary": word_dict}: 额外的参数,这里指定词典
更详细数据格式和用例请参考<a href = "../../ui/data_provider/pydataprovider2.html">
PyDataProvider2</a>
## 网络结构(Network Architecture)
本节我们将专注于网络结构的介绍。
<center> ![](./PipelineNetwork.jpg) </center>
我们将以基本的逻辑回归网络作为起点,并逐渐展示更加深入的功能。更详细的网络配置
连接请参考<a href = "../../../doc/layer.html">Layer文档</a>
所有配置在[源码](https://github.com/PaddlePaddle/Paddle)`demo/quick_start`目录,首先列举逻辑回归网络。
### 逻辑回归模型(Logistic Regression)
流程如下:
<center> ![](./NetLR.jpg) </center>
- 获取利用one-hot vector表示的每个单词,维度是词典大小
```python
word = data_layer(name="word", size=word_dim)
```
- 获取该条样本类别Id,维度是类别个数。
```python
label = data_layer(name="label", size=label_dim)
```
- 利用逻辑回归模型对该向量进行分类,同时会计算分类准确率
```python
# Define a fully connected layer with logistic activation (also called softmax activation).
output = fc_layer(input=word,
size=label_dim,
act_type=SoftmaxActivation())
# Define cross-entropy classification loss and error.
classification_cost(input=output, label=label)
```
- input: 除过data层,每个层都有一个或多个input,多个input以list方式输入
- size: 该层神经元个数
- act_type: 激活函数类型
效果总结:我们将在后面介绍训练和预测的流程的脚本。在此为方便对比不同网络结构,
我们随时总结了各个网络的复杂度和效果。
<html>
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<thead>
<th scope="col" class="left">网络名称</th>
<th scope="col" class="left">参数数量</th>
<th scope="col" class="left">错误率</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">逻辑回归</td>
<td class="left">252 KB</td>
<td class="left">8.652%</td>
</tr>
</tbody>
</table></center>
</html>
<br>
### 词向量模型(Word Vector)
embedding模型需要稍微改变数据提供的脚本,即`dataprovider_emb.py`,词向量模型、
卷积模型、时序模型均使用该脚本。其中文本输入类型定义为整数时序类型integer_value_sequence。
```
def initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
# Define the type of the first input as sequence of integer.
# The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)),
# Define the second input for label id
integer_value(2)]
@provider(init_hook=initializer)
def process(settings, file_name):
...
# omitted, it is same as the data provider for LR model
```
该模型依然是使用逻辑回归分类网络的框架, 只是将句子利用连续向量表示替换稀疏
向量表示, 即对第3步进行替换。句子表示的计算更新为2步:
<center> ![](./NetContinuous.jpg) </center>
- 利用单词Id查找对应的该单词的连续表示向量(维度为word_dim), 输入N个单词,输出为N个word_dim维度向量
```python
emb = embedding_layer(input=word, size=word_dim)
```
- 将该句话包含的所有单词向量求平均得到句子的表示
```python
avg = pooling_layer(input=emb, pooling_type=AvgPooling())
```
其它部分和逻辑回归网络结构一致。
效果总结:
<html>
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<thead>
<th scope="col" class="left">网络名称</th>
<th scope="col" class="left">参数数量</th>
<th scope="col" class="left">错误率</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">词向量模型</td>
<td class="left">15 MB</td>
<td class="left">8.484%</td>
</tr>
</tbody>
</table>
</html></center>
<br>
### 卷积模型(Convolution)
卷积网络是一种特殊的从词向量表示到句子表示的方法, 也就是将词向量模型额步
骤3-2进行进一步演化, 变为3个新的子步骤。
<center> ![](./NetConv.jpg) </center>
文本卷积分为三个步骤:
1. 获取每个单词左右各k个近邻, 拼接成一个新的向量表示;
2. 对该表示进行非线性变换 (例如Sigmoid变换), 成为维度为hidden_dim的新的向量;
3. 在每个维度上取出在该句话新的向量集合上该维度的最大值作为最后的句子表示向量。 这3个子步骤可配置为:
```python
text_conv = sequence_conv_pool(input=emb,
context_start=k,
context_len=2 * k + 1)
```
效果总结:
<html>
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<thead>
<th scope="col" class="left">网络名称</th>
<th scope="col" class="left">参数数量</th>
<th scope="col" class="left">错误率</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">卷积模型</td>
<td class="left">16 MB</td>
<td class="left">5.628%</td>
</tr>
</tbody>
</table></center>
<br>
### 时序模型(Time Sequence)
<center> ![](./NetRNN.jpg) </center>
时序模型即为RNN模型, 包括简单的RNN模型、GRU模型、LSTM模型等。
- GRU模型配置:
```python
gru = simple_gru(input=emb, size=gru_size)
```
- LSTM模型配置:
```python
lstm = simple_lstm(input=emb, size=lstm_size)
```
针对本问题,我们采用单层LSTM模型,并使用了Dropout,效果总结:
<html>
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<thead>
<th scope="col" class="left">网络名称</th>
<th scope="col" class="left">参数数量</th>
<th scope="col" class="left">错误率</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">时序模型</td>
<td class="left">16 MB</td>
<td class="left">4.812%</td>
</tr>
</tbody>
</table></center>
</html>
<br>
## 优化算法(Optimization Algorithm)
<a href = "../../../doc/ui/trainer_config_helpers_api.html#module-paddle.trainer_config_helpers.optimizers">优化算法</a>包括
Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优化方法,加了L2正则和梯度截断。
```python
settings(batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25)
```
## 训练模型(Training Model)
在完成了数据和网络结构搭建之后, 我们进入到训练部分。
<center> ![](./PipelineTrain.jpg) </center>
训练脚本:我们将训练的命令行保存在了 `train.sh`文件中。训练时所需设置的主要参数如下:
```bash
paddle train \
--config=trainer_config.py \
--log_period=20 \
--save_dir=./output \
--num_passes=15 \
--use_gpu=false
```
这里没有介绍多机分布式训练,可以参考<a href = "../../cluster/index.html">分布式训练</a>的demo学习如何进行多机训练。
## 预测(Prediction)
可以使用训练好的模型评估带有label的验证集,也可以预测没有label的测试集。
<center> ![](./PipelineTest.jpg) </center>
测试脚本如下,将会测试配置文件中test.list指定的数据。
```bash
paddle train \
--use_gpu=false \
--job=test \
--init_model_path=./output/pass-0000x
```
可以参考<a href = "../../ui/predict/swig_py_paddle.html">Python API预测</a>
教程,或其他<a href = "../../demo/index.html">demo</a>的Python预测过程。也可以通过如下方式预测。
预测脚本(`predict.sh`):
```bash
model="output/pass-00003"
paddle train \
--config=trainer_config.lstm.py \
--use_gpu=false \
--job=test \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=. \
mv rank-00000 result.txt
```
这里以`output/pass-00003`为例进行预测,用户可以根据训练log选择test结果最好的模型来预测。与训练网络配置不同的是:无需label相关的层,指定outputs输出概率层(softmax输出),
指定batch_size=1,数据传输无需label数据,预测数据指定test_list的位置。
预测结果以文本的形式保存在`result.txt`中,一行为一个样本,格式如下:
```
预测ID;ID为0的概率 ID为1的概率
预测ID;ID为0的概率 ID为1的概率
```
```
is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
obj = 'process' if not is_predict else 'process_pre'
batch_size = 128 if not is_predict else 1
if is_predict:
maxid = maxid_layer(output)
outputs([maxid,output])
else:
label = data_layer(name="label", size=2)
cls = classification_cost(input=output, label=label)
outputs(cls)
```
## 总体效果总结(Summary)
这些流程中的数据下载、网络配置、训练脚本在`/demo/quick_start`目录,我们在此总
结上述网络结构在Amazon-Elec测试集(25k)上的效果:
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<thead>
<th scope="col" class="left">网络名称</th>
<th scope="col" class="left">参数数量</th>
<th scope="col" class="left">错误率</th>
<th scope="col" class="left">配置文件</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">逻辑回归模型</td>
<td class="left"> 252KB </td>
<td class="left">8.652%</td>
<td class="left">trainer_config.lr.py</td>
</tr>
<tr>
<td class="left">词向量模型</td>
<td class="left"> 15MB </td>
<td class="left"> 8.484%</td>
<td class="left">trainer_config.emb.py</td>
</tr>
<tr>
<td class="left">卷积模型</td>
<td class="left"> 16MB </td>
<td class="left"> 5.628%</td>
<td class="left">trainer_config.cnn.py</td>
</tr>
<tr>
<td class="left">时序模型</td>
<td class="left"> 16MB </td>
<td class="left"> 4.812%</td>
<td class="left">trainer_config.lstm.py</td>
</tr>
</tbody>
</table>
</center>
<br>
## 附录(Appendix)
### 命令行参数(Command Line Argument)
* \--config:网络配置
* \--save_dir:模型存储路径
* \--log_period:每隔多少batch打印一次日志
* \--num_passes:训练轮次,一个pass表示过一遍所有训练样本
* \--config_args:命令指定的参数会传入网络配置中。
* \--init_model_path:指定初始化模型路径,可用在测试或训练时指定初始化模型。
默认一个pass保存一次模型,也可以通过saving_period_by_batches设置每隔多少batch保存一次模型。
可以通过show_parameter_stats_period设置打印参数信息等。
其他参数请参考<a href = "../../ui/index.html#command-line-argument">令行参数文档</a>
### 输出日志(Log)
```
TrainerInternal.cpp:160] Batch=20 samples=2560 AvgCost=0.628761 CurrentCost=0.628761 Eval: classification_error_evaluator=0.304297 CurrentEval: classification_error_evaluator=0.304297
```
模型训练会看到这样的日志,详细的参数解释如下面表格:
<center>
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<thead>
<th scope="col" class="left">名称</th>
<th scope="col" class="left">解释</th>
</tr>
</thead>
<tr>
<td class="left">Batch=20</td>
<td class="left"> 表示过了20个batch </td>
</tr>
<tr>
<td class="left">samples=2560</td>
<td class="left"> 表示过了2560个样本 </td>
</tr>
<tr>
<td class="left">AvgCost</td>
<td class="left"> 每个pass的第0个batch到当前batch所有样本的平均cost </td>
</tr>
<tr>
<td class="left">CurrentCost</td>
<td class="left"> 当前log_period个batch所有样本的平均cost </td>
</tr>
<tr>
<td class="left">Eval: classification_error_evaluator</td>
<td class="left"> 每个pass的第0个batch到当前batch所有样本的平均分类错误率 </td>
</tr>
<tr>
<td class="left">CurrentEval: classification_error_evaluator</td>
<td class="left"> 当前log_period个batch所有样本的平均分类错误率 </td>
</tr>
</tbody>
</table>
</center>
<br>
情感分析教程
===========================
.. toctree::
:maxdepth: 3
:glob:
Training Locally <sentiment_analysis.md>
\ No newline at end of file
构建PaddlePaddle的Docker Image
==============================
PaddlePaddle的Docker Image构建源码放置在 ``${源码根目录}/paddle/scripts/docker/`` 目录下。该目录有三类文件:
- Dockerfile:Docker Image的描述文件,包括构建步骤、各种参数和维护人员等。
- 一共维护了12个Dockerfile,Dockerfile.m4是它们的模板。
- PaddlePaddle中所有的Image都基于ubuntu 14.04。
- build.sh:Docker Image的构建脚本,使用方式见下一小节。
- generate.sh:通过Dockerfile.m4模板生成不同的Dockerfile。
使用脚本构建Docker Image
------------------------
进入源码目录,执行 ``docker build`` 命令,即可在本地编译出PaddlePaddle的镜像。简单的使用样例为
.. code-block:: bash
cd ${源码根目录}/paddle/scripts/docker/
docker build --build-arg LOWEST_DL_SPEED=50K \
--build-arg WITH_GPU=ON \
--tag paddle_gpu:latest .
其中,``--build-arg`` 传入的配置参数包括:
- LOWEST\_DL\_SPEED\: 在多线程下载过程中,设置下载线程的最低速度。
- 默认单位是Bytes,但可以传入10K、10M、或10G等这样的单位。
- 如果小于这个速度,那么这个线程将会关闭。当所有的线程都关闭了,那么下载进程将会重启。
- WITH\_GPU\: ON or OFF,是否开启GPU功能。注意,
- **编译** PaddlePaddle的GPU版本 **不一定** 要在具有GPU的机器上进行。
- **运行** PaddlePaddle的GPU版本 **一定** 要在具有GPU的机器上运行。
注意:所有Image的构建在Docker 1.12版本测试通过, 低于1.12的版本并没有测试。原因是旧版本可能缺乏 ``--build-arg`` 参数,从而不能在运行编译命令的时候接受参数。
PaddlePaddle文档
================
使用指南
--------
* `介绍 <introduction/index.html>`_
* `快速入门 <demo/quick_start/index.html>`_
* `基本使用概念 <concepts/use_concepts.html>`_
* `编译与安装 <build_and_install/index.html>`_
* `用户接口 <ui/index.html>`_
* `使用示例 <demo/index.html>`_
* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_
* `集群训练 <cluster/index.html>`_
开发指南
--------
* `新写Layer <../doc/dev/new_layer/index.html>`_
* `如何贡献文档 <howto/how_to_write_docs/index.html>`_
* `如何构建Docker Image <howto/build_docker_image.html>`_
算法教程
--------
* `Recurrent Group教程 <algorithm/rnn/rnn-tutorial.html>`_
* `单层RNN示例 <../doc/algorithm/rnn/rnn.html>`_
* :ref:`algo_hrnn_rnn_api_compare`
* `支持双层序列作为输入的Layer <algorithm/rnn/hierarchical-layer.html>`_
常见问题
--------
* `常见问题 <faq/index.html>`_
命令
====
安装好PaddlePaddle后,在命令行直接敲击 ``paddle`` 或 ``paddle --help`` 会显示如下一些命令。
* ``train`` Start a paddle_trainer
启动一个PaddlePaddle训练进程。 ``paddle train`` 可以通过命令行参数 ``-local=true`` 启动一个单机的训练进程;也可以和 ``paddle pserver`` 一起使用启动多机的分布式训练进程。
* ``pserver`` Start a paddle_pserver_main
在多机分布式训练下启动PaddlePaddle的parameter server进程。
* ``version`` Print paddle version
用于打印当前PaddlePaddle的版本和编译选项相关信息。常见的输出格式如下:1)第一行说明了PaddlePaddle的版本信息;2)第二行开始说明了一些主要的编译选项,具体意义可以参考 `编译参数选项文件 <../../build_and_install/cmake/compile_options.html>`_ 。
.. literalinclude:: paddle_version.txt
* ``merge_model`` Start a paddle_merge_model
用于将PaddlePaddle的模型参数文件和模型配置文件打包成一个文件,方便做部署分发。
* ``dump_config`` Dump the trainer config as proto string
用于将PaddlePaddle的模型配置文件以proto string的格式打印出来。
* ``make_diagram``
使用graphviz对PaddlePaddle的模型配置文件进行绘制。
\ No newline at end of file
PaddlePaddle 0.8.0b, compiled with
with_avx: ON
with_gpu: ON
with_double: OFF
with_python: ON
with_rdma: OFF
with_glog: ON
with_gflags: ON
with_metric_learning: OFF
with_timer: OFF
with_predict_sdk: OFF
########
用户接口
########
数据提供
========
.. toctree::
:maxdepth: 1
data_provider/dataprovider.rst
data_provider/pydataprovider2.rst
命令及命令行参数
================
.. toctree::
:maxdepth: 1
cmd/index.rst
* `参数用例 <../../doc/ui/cmd_argument/use_case.html>`_
* `参数分类 <../../doc/ui/cmd_argument/argument_outline.html>`_
* `参数描述 <../../doc/ui/cmd_argument/detail_introduction.html>`_
预测
=======
.. toctree::
:maxdepth: 1
predict/swig_py_paddle.rst
......@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// clang-format off
// Because clang-format 4.X and clang-format 3.8+ format
// following lines in different. So disable clang-format.
#include "hl_cuda.h"
#include <cuda_profiler_api.h>
#include <string.h>
......@@ -23,6 +26,7 @@ limitations under the License. */
#include "hl_dso_loader.h"
#include "hl_thread.ph"
#include "paddle/utils/Logging.h"
// clang-format on
namespace dynload {
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#include <memory>
#include <random>
#include "paddle/utils/TypeDefs.h"
namespace paddle {
......@@ -32,6 +32,17 @@ class MultinomialSampler {
public:
MultinomialSampler(const real* prob, int size);
//! protobuf always using double.
static MultinomialSampler* create(const double* prob, int size) {
#ifdef PADDLE_TYPE_DOUBLE
return new MultinomialSampler(prob, size);
#else
std::unique_ptr<real[]> tmp(new real[size]);
std::copy(prob, prob + size, tmp.get());
return new MultinomialSampler(tmp.get(), size);
#endif
}
/**
* @brief Generate a random sample.
* @param g is a random number engine. See <random>.
......
......@@ -99,8 +99,8 @@ public:
if (config_.neg_sampling_dist_size()) {
CHECK_EQ(numClasses_, config_.neg_sampling_dist_size());
sampler_.reset(new MultinomialSampler(config_.neg_sampling_dist().data(),
numClasses_));
sampler_.reset(MultinomialSampler::create(
config_.neg_sampling_dist().data(), numClasses_));
}
return true;
......
......@@ -31,7 +31,6 @@ bool WarpCTCLayer::init(const LayerMap& layerMap,
CHECK_EQ(numClasses_, inputLayers_[0]->getSize());
blank_ = config_.blank();
CHECK_GE(blank_, 0UL);
CHECK_LT(blank_, numClasses_);
normByTimes_ = config_.norm_by_times();
......
......@@ -408,7 +408,7 @@ public:
LOG(FATAL) << "Not implemented";
}
virtual void addBias(Matrix& b, real scale, bool sharedBias) {
void addBias(Matrix& b, real scale, bool sharedBias) {
if (!sharedBias) {
addBias(b, scale);
} else {
......@@ -425,7 +425,7 @@ public:
LOG(FATAL) << "Not implemented";
}
virtual void collectBias(Matrix& a, real scale, bool sharedBias) {
void collectBias(Matrix& a, real scale, bool sharedBias) {
if (!sharedBias) {
collectBias(a, scale);
} else {
......
......@@ -16,12 +16,10 @@ add_simple_unittest(test_CpuGpuVector)
add_simple_unittest(test_Allocator)
if(WITH_GPU)
if(COMPILER_SUPPORT_CXX11)
CUDA_ADD_EXECUTABLE(test_Tensor test_Tensor.cu)
link_paddle_test(test_Tensor)
CUDA_ADD_EXECUTABLE(test_lazyAssign test_lazyAssign.cu)
link_paddle_test(test_lazyAssign)
endif()
else()
compile_cu_as_cpp(test_Tensor.cu)
add_unittest(test_Tensor test_Tensor.cu)
......
......@@ -25,24 +25,17 @@ P_DEFINE_int32(parallel_thread_num, 1, "Thread number for parameter send");
namespace paddle {
template <class T>
void copyToRepeatedField(google::protobuf::RepeatedField<T>* dest,
const T* src,
template <typename T1, typename T2>
void copyToRepeatedField(google::protobuf::RepeatedField<T1>* dest,
const T2* src,
size_t size) {
dest->Clear();
dest->Reserve(size);
for (size_t i = 0; i < size; ++i) {
dest->AddAlreadyReserved(src[i]);
}
}
template <class T>
void copyToRepeatedField(const std::vector<T>& src,
google::protobuf::RepeatedField<T>* dest) {
copyToRepeatedField(dest, &src[0], src.size());
}
ParameterClient2::ParameterClient2(bool separate, int port, int numPorts)
: BaseClient(separate, numPorts), port_(port) {
#ifndef PADDLE_DISABLE_TIMER
......@@ -618,6 +611,8 @@ void PreparedOperations::addOperationHelper(Operation* op, CpuMatrixPtr mat) {
pmat.mutable_values(), mat->getData(), pmat.num_cols() * pmat.num_rows());
}
static inline real addTwo(real a, double b) { return a + b; }
void ParameterClient2::doOperation(PreparedOperations& ops,
bool waitForGradient,
bool sendBackGradient,
......@@ -682,8 +677,11 @@ void ParameterClient2::doOperation(PreparedOperations& ops,
CpuVectorPtr rvec = resultVectors[i];
if (!rvec) continue;
CHECK_EQ(rvec->getSize(), (size_t)vec.dim());
CpuVector avec(rvec->getSize(), const_cast<real*>(vec.values().data()));
rvec->add(avec);
std::transform(rvec->getData(),
rvec->getData() + rvec->getSize(),
vec.values().data(),
rvec->getData(),
addTwo);
}
CHECK_EQ(resultMatrices.size(), (size_t)result.matrices_size());
......@@ -693,11 +691,12 @@ void ParameterClient2::doOperation(PreparedOperations& ops,
if (!rmat) continue;
CHECK_EQ(rmat->getHeight(), (size_t)mat.num_rows());
CHECK_EQ(rmat->getWidth(), (size_t)mat.num_cols());
CpuMatrixPtr amat =
std::make_shared<CpuMatrix>(const_cast<real*>(mat.values().data()),
rmat->getHeight(),
rmat->getWidth());
rmat->add(*amat);
std::transform(rmat->getData(),
rmat->getData() + rmat->getElementCnt(),
mat.values().data(),
rmat->getData(),
addTwo);
}
}
}
......
......@@ -6,25 +6,6 @@ set(proto_filenames
ParameterService.proto
TrainerConfig.proto)
set(real_proto_files)
# TODO(yuyang18): Some internal proto will also be depended on.
# Find a way to automatically calculate all depends.
foreach(filename ${proto_filenames})
set(PROTOBUF_3_FLAGS "")
if (PROTOBUF_3)
set(PROTOBUF_3_FLAGS "-Dproto3")
endif()
add_custom_command(OUTPUT ${filename}
COMMAND ${M4_EXECUTABLE} -Dreal=${ACCURACY} ${PROTOBUF_3_FLAGS} -I '${INTERNAL_PROTO_PATH}'
${PROJ_ROOT}/proto/${filename}.m4 > ${filename}
DEPENDS ${PROJ_ROOT}/proto/${filename}.m4
COMMENT "Generate ${filename}")
endforeach()
add_custom_target(proto_accuracy ALL
DEPENDS ${proto_filenames})
set(PROTO_GEN)
set(PROTO_GEN_PY)
......@@ -39,9 +20,8 @@ foreach(filename ${proto_filenames})
add_custom_command(OUTPUT ${CUR_PROTO_GEN}
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
--cpp_out ${CMAKE_CURRENT_BINARY_DIR}
--proto_path ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/${filename}
DEPENDS proto_accuracy
${PROJ_ROOT}/proto/${filename}.m4)
--proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename}
DEPENDS ${filename})
set(CUR_PROTO_GEN_PY
${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py)
......@@ -50,9 +30,8 @@ foreach(filename ${proto_filenames})
${PROTO_GEN_PY})
add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY}
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${PROJ_ROOT}/python/paddle/proto
--proto_path ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/${filename}
DEPENDS proto_accuracy
${PROJ_ROOT}/proto/${filename}.m4)
--proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename}
DEPENDS ${filename})
endforeach()
include_directories(${CMAKE_CURRENT_BINARY_DIR}/proto)
......@@ -61,5 +40,4 @@ add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN})
add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY})
add_library(paddle_proto STATIC
${PROTO_GEN})
add_dependencies(paddle_proto proto_accuracy)
target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
......@@ -11,11 +11,11 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
ifdef(`proto3', `syntax = "proto2";')
syntax = "proto2";
package paddle;
sinclude(`DataConfigExt.proto.m4')
message FileGroupConf {
optional uint32 queue_capacity = 1 [default = 1];
// how many files to load for a load file thread
......@@ -26,7 +26,7 @@ message FileGroupConf {
};
message DataConfig {
sinclude(`DataConfigInter.proto.m4')
required string type = 1;
// name of a text file which contains a list of file names at each line
......@@ -51,11 +51,11 @@ sinclude(`DataConfigInter.proto.m4')
/// Note the field number 17, 18 and 19 have been deprecated.
// a list of values which will be used to create additional one dimensional real
// a list of values which will be used to create additional one dimensional float
// values slots. These one dimensional slots can be used as the weight input
// for cost layers.
// Currently this is only supported by ProtoDataProvider.
repeated real constant_slots = 20;
repeated double constant_slots = 20;
// for PyDataProvider.
// Specify the load data script module name, object name and user args
......@@ -80,6 +80,6 @@ sinclude(`DataConfigInter.proto.m4')
optional bool is_main_data = 26 [default = true];
// the usage ratio of instances. Setting to 1.0 means the use of all instances.
optional real usage_ratio = 27 [default = 1.0];
optional double usage_ratio = 27 [default = 1.0];
};
......@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
ifdef(`proto3', `syntax = "proto2";')
syntax = "proto2";
package paddle;
......
......@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
ifdef(`proto3', `syntax = "proto2";')
syntax = "proto2";
import "ParameterConfig.proto";
......@@ -20,7 +20,7 @@ package paddle;
/**
* Various structs for the configuration of a neural network
*/
sinclude(`ModelConfigExt.proto.m4')
message ExternalConfig {
repeated string layer_names = 1;
......@@ -146,8 +146,8 @@ message NormConfig {
// the parameters for normalization
// u = u / (1+scale*sum(u^2 in window))^pow
required real scale = 4;
required real pow = 5;
required double scale = 4;
required double pow = 5;
// The size of output feature map.
required uint32 output_x = 6;
......@@ -223,7 +223,7 @@ message OperatorConfig {
required uint64 output_size = 4;
// For DotMulOperator
optional real dotmul_scale = 5 [default = 1.0];
optional double dotmul_scale = 5 [default = 1.0];
// For ConvOperator
optional ConvConfig conv_conf = 6;
......@@ -266,7 +266,7 @@ message LayerInputConfig {
}
message LayerConfig {
sinclude(`ModelConfigLayer.proto.m4')
required string name = 1;
required string type = 2;
optional uint64 size = 3;
......@@ -293,7 +293,7 @@ sinclude(`ModelConfigLayer.proto.m4')
optional uint32 partial_sum = 9;
// for dropout
optional real drop_rate = 10;
optional double drop_rate = 10;
// for HierarchicalSoftmaxLayer and NCELayer
// the number of classes
......@@ -317,17 +317,17 @@ sinclude(`ModelConfigLayer.proto.m4')
// For NCELayer
// The distribution for generating the random negative labels.
// A uniform distribution will be used if not provided
repeated real neg_sampling_dist = 17 [packed = true];
repeated double neg_sampling_dist = 17 [packed = true];
// For MaxLayer
// default: output VALUE of MaxLayer. set this flag to true for output INDEX
// INDEX will be put in Argument::value as real values.
// INDEX will be put in Argument::value as double values.
optional bool output_max_index = 19 [default = false];
/// The filed number 20 have been deprecated.
// For self-normalized estimation
optional real softmax_selfnorm_alpha = 21 [default = 0.1];
optional double softmax_selfnorm_alpha = 21 [default = 0.1];
/// The filed numbers 22 and 23 have been deprecated.
......@@ -338,14 +338,14 @@ sinclude(`ModelConfigLayer.proto.m4')
optional bool norm_by_times = 25;
// for CostLayers
optional real coeff = 26 [default = 1.0];
optional double coeff = 26 [default = 1.0];
// for AverageLayer
// can be set to: 'average', 'sum' or 'squarerootn'
optional string average_strategy = 27;
// for error clipping
optional real error_clipping_threshold = 28 [default = 0.0];
optional double error_clipping_threshold = 28 [default = 0.0];
// for operators used by mixed layer
repeated OperatorConfig operator_confs = 29;
......@@ -355,11 +355,11 @@ sinclude(`ModelConfigLayer.proto.m4')
optional int32 max_sort_size = 31;
// for SlopeInterceptLayer
optional real slope = 32;
optional real intercept = 33;
optional double slope = 32;
optional double intercept = 33;
// for CosSimVecMatLayer and CosSimLayer
optional real cos_scale = 34;
optional double cos_scale = 34;
// for DataNormLayer
// can be set to: 'z-score', 'min-max' or 'decimal-scaling'
......@@ -394,7 +394,7 @@ sinclude(`ModelConfigLayer.proto.m4')
// if number of the selected columns is less than
// sample number * selective_fc output size * selective_fc_mull_mull_ratio
// sparse multiplication is used, otherwise, using full multiplication.
optional real selective_fc_full_mul_ratio = 44 [default = 0.02];
optional double selective_fc_full_mul_ratio = 44 [default = 0.02];
// to indicate how many threads selective_fc use to to accelate
// the plain_mul period
......@@ -406,7 +406,7 @@ sinclude(`ModelConfigLayer.proto.m4')
optional bool use_global_stats = 46;
// use to compute moving mean and variance.
optional real moving_average_fraction = 47 [default = 0.9];
optional double moving_average_fraction = 47 [default = 0.9];
// bias size
optional uint32 bias_size = 48 [default = 0];
......@@ -438,7 +438,7 @@ message EvaluatorConfig {
// Used by PrecisionRecallEvaluator and ClassificationErrorEvaluator
// For multi binary labels: true if output > classification_threshold
optional real classification_threshold = 6 [default = 0.5];
optional double classification_threshold = 6 [default = 0.5];
// The positive label. -1 means average precision and recall
optional int32 positive_label = 7 [default = -1];
......
......@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
ifdef(`proto3', `syntax = "proto2";')
syntax = "proto2";
package paddle;
......@@ -32,14 +32,14 @@ message ParameterUpdaterHookConfig {
message ParameterConfig {
required string name = 1;
required uint64 size = 2;
optional real learning_rate = 3 [default = 1.0];
optional real momentum = 4 [default = 0.0];
optional real initial_mean = 5 [default = 0.0];
optional real initial_std = 6 [default = 0.01];
optional double learning_rate = 3 [default = 1.0];
optional double momentum = 4 [default = 0.0];
optional double initial_mean = 5 [default = 0.0];
optional double initial_std = 6 [default = 0.01];
// use L2-regularization if decay_rate set and decay_rate_l1 not set
optional real decay_rate = 7 [default = 0.0];
optional double decay_rate = 7 [default = 0.0];
// use L1-regularization if decay_rate_l1 set
optional real decay_rate_l1 = 8 [default = 0.0];
optional double decay_rate_l1 = 8 [default = 0.0];
// dims of Parameter, e.g. dims[0] as height, dims[1] as width..
repeated uint64 dims = 9;
// the gpu device which the parameter in.
......@@ -60,7 +60,7 @@ message ParameterConfig {
// sparse remote update or not
optional bool sparse_remote_update = 16 [default = false];
// gradient clipping threshold, no clipping by default
optional real gradient_clipping_threshold = 17 [default = 0.0];
optional double gradient_clipping_threshold = 17 [default = 0.0];
// static parameters are fixed when training
optional bool is_static = 18 [default = false];
// para_id should NOT be set by config_parser. It is for
......
......@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
ifdef(`proto3', `syntax = "proto2";')
syntax = "proto2";
import "ParameterConfig.proto";
import "TrainerConfig.proto";
......@@ -73,7 +73,7 @@ message SendParameterRequest {
optional int64 num_samples = 4;
// cost will be used to calculate global objective value
optional real cost = 5;
optional double cost = 5;
required BatchStatus batch_status = 6;
......@@ -245,13 +245,13 @@ enum MatrixVectorOperation {
message ProtoVector {
required int64 dim = 1;
repeated real values = 2 [packed = true];
repeated double values = 2 [packed = true];
}
message ProtoMatrix {
required int64 num_rows = 1;
required int64 num_cols = 2;
repeated real values = 3 [packed = true];
repeated double values = 3 [packed = true];
}
message Operation {
......@@ -263,7 +263,7 @@ message Operation {
// matrix handles created on the pserver
repeated int64 pmatrices = 3; // A, B, C
repeated real scalars = 4; // a, b, c
repeated double scalars = 4; // a, b, c
repeated ProtoVector vectors = 5; // x, y, z
repeated ProtoMatrix matrices = 6; // X, Y, Z
}
......@@ -272,7 +272,7 @@ message OperationResult {
// error message. Empty if success
optional string return_message = 1;
//
repeated real scalars = 2; // d, e, f
repeated double scalars = 2; // d, e, f
repeated ProtoVector vectors = 3; // p, q, r
repeated ProtoMatrix matrices = 4; // P, Q, R
}
......
......@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
ifdef(`proto3', `syntax = "proto2";')
syntax = "proto2";
import "DataConfig.proto";
import "ModelConfig.proto";
......@@ -24,9 +24,9 @@ message OptimizationConfig {
optional int32 num_batches_per_send_parameter = 5 [default = 1];
optional int32 num_batches_per_get_parameter = 6 [default = 1];
required real learning_rate = 7;
optional real learning_rate_decay_a = 8 [default = 0];
optional real learning_rate_decay_b = 9 [default = 0];
required double learning_rate = 7;
optional double learning_rate_decay_a = 8 [default = 0];
optional double learning_rate_decay_b = 9 [default = 0];
optional string learning_rate_schedule = 27 [default = "constant"];
// learning rate will be scaled according to learning_rate_schedule
// 1), constant:
......@@ -49,14 +49,14 @@ message OptimizationConfig {
// owlqn related
// L1-regularization
optional real l1weight = 10 [default = 0.1];
optional double l1weight = 10 [default = 0.1];
// L2-regularization
optional real l2weight = 11 [default = 0];
optional double l2weight = 11 [default = 0];
// "c1" in wolfe condition: if (newobj <= oldobj + c1 * origDirDeriv * step)
// then accept the step
optional real c1 = 12 [default = 0.0001];
optional double c1 = 12 [default = 0.0001];
// multiply the step with "backoff", when wolfe condition doesn't satisfy
optional real backoff = 13 [default = 0.5];
optional double backoff = 13 [default = 0.5];
// how many "s"s and "y"s are kept in owlqn
optional int32 owlqn_steps = 14 [default = 10];
// accept the step if encountered "max_backoff" times of "reduce the step"
......@@ -82,15 +82,15 @@ message OptimizationConfig {
// default learning method("momentum") use global decayed learning rate with momentum.
// "adagrad", "adadelta" and "rmsprop" can set momentum too.
optional string learning_method = 23 [default = "momentum"];
optional real ada_epsilon = 24 [default = 1e-6];
optional real ada_rou = 26 [default = 0.95];
optional double ada_epsilon = 24 [default = 1e-6];
optional double ada_rou = 26 [default = 0.95];
// Force to do average in cpu in order to save gpu memory usage
optional bool do_average_in_cpu = 25 [default = false];
// delta add rate in pserver, used while num_batches_per_send_parameter>1
// will be divided by #machines automatically.
optional real delta_add_rate = 28 [default = 1.0];
optional double delta_add_rate = 28 [default = 1.0];
// We split a large size into smaller mini-batches, whose sizes are
// determined by mini_batch_size. It only takes effect when there is
......@@ -108,14 +108,14 @@ message OptimizationConfig {
// shrink sparse parameter value
// only works if parameter is remote sparse update and has L1 decay rate
optional real shrink_parameter_value = 32 [default = 0];
optional double shrink_parameter_value = 32 [default = 0];
////////////////////////////
// Options Adam Optimizer //
////////////////////////////
optional real adam_beta1 = 33 [default = 0.9];
optional real adam_beta2 = 34 [default = 0.999];
optional real adam_epsilon = 35 [default = 1e-8];
optional double adam_beta1 = 33 [default = 0.9];
optional double adam_beta2 = 34 [default = 0.999];
optional double adam_epsilon = 35 [default = 1e-8];
// arguments for learning rate scheduler
// Format: num1:rate1,num2:rate2,...,numK:rateK
......@@ -127,7 +127,7 @@ message OptimizationConfig {
// for async sgd gradient commit control.
// when async_lagged_grad_discard_ratio * num_gradient_servers commit passed,
// current async gradient will be discard silently.
optional real async_lagged_grad_discard_ratio = 37 [default = 1.5];
optional double async_lagged_grad_discard_ratio = 37 [default = 1.5];
};
message TrainerConfig {
......
......@@ -203,6 +203,26 @@ class CheckWrapper(object):
callback(each)
class CheckInputTypeWrapper(object):
def __init__(self, generator, input_types, logger):
self.generator = generator
self.input_types = input_types
self.logger = logger
def __call__(self, obj, filename):
for items in self.generator(obj, filename):
try:
# dict type is required for input_types when item is dict type
assert (isinstance(items, dict) and \
not isinstance(self.input_types, dict))==False
yield items
except AssertionError as e:
self.logger.error(
"%s type is required for input type but got %s" %
(repr(type(items)), repr(type(self.input_types))))
raise
def provider(input_types=None,
should_shuffle=None,
pool_size=-1,
......@@ -355,6 +375,9 @@ def provider(input_types=None,
if use_dynamic_order:
self.generator = InputOrderWrapper(self.generator,
self.input_order)
else:
self.generator = CheckInputTypeWrapper(
self.generator, self.slots, self.logger)
if self.check:
self.generator = CheckWrapper(self.generator, self.slots,
check_fail_continue,
......
......@@ -186,8 +186,7 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
obj="process",
args={"dictionary": dict_name})
The related data provider can refer to
`here <../../data_provider/pydataprovider2.html#dataprovider-for-the-sequential-model>`__.
The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` .
:param train_list: Train list name.
:type train_list: basestring
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册