Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
9795e016
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
9795e016
编写于
9月 20, 2016
作者:
L
liaogang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/master'
上级
2daa05c0
2c5a6ac0
变更
34
隐藏空白更改
内联
并排
Showing
34 changed file
with
880 addition
and
297 deletion
+880
-297
CMakeLists.txt
CMakeLists.txt
+2
-2
cmake/util.cmake
cmake/util.cmake
+2
-3
demo/seqToseq/seqToseq_net.py
demo/seqToseq/seqToseq_net.py
+9
-13
doc/build/contribute_to_paddle.md
doc/build/contribute_to_paddle.md
+24
-6
doc/ui/api/trainer_config_helpers/activations.rst
doc/ui/api/trainer_config_helpers/activations.rst
+7
-0
doc/ui/data_provider/pydataprovider2.rst
doc/ui/data_provider/pydataprovider2.rst
+17
-20
doc_cn/ui/data_provider/mnist_config.py
doc_cn/ui/data_provider/mnist_config.py
+2
-0
doc_cn/ui/data_provider/mnist_provider.dict.py
doc_cn/ui/data_provider/mnist_provider.dict.py
+25
-0
doc_cn/ui/data_provider/pydataprovider2.rst
doc_cn/ui/data_provider/pydataprovider2.rst
+69
-2
paddle/gserver/dataproviders/DataProvider.cpp
paddle/gserver/dataproviders/DataProvider.cpp
+7
-3
paddle/gserver/dataproviders/DataProvider.h
paddle/gserver/dataproviders/DataProvider.h
+31
-8
paddle/gserver/dataproviders/MultiDataProvider.cpp
paddle/gserver/dataproviders/MultiDataProvider.cpp
+7
-3
paddle/gserver/dataproviders/MultiDataProvider.h
paddle/gserver/dataproviders/MultiDataProvider.h
+3
-1
paddle/gserver/dataproviders/PyDataProvider2.cpp
paddle/gserver/dataproviders/PyDataProvider2.cpp
+81
-22
paddle/gserver/layers/CRFLayer.cpp
paddle/gserver/layers/CRFLayer.cpp
+1
-1
paddle/gserver/layers/CostLayer.cpp
paddle/gserver/layers/CostLayer.cpp
+1
-5
paddle/gserver/tests/rnn_data_provider.py
paddle/gserver/tests/rnn_data_provider.py
+6
-2
paddle/gserver/tests/sequenceGen.py
paddle/gserver/tests/sequenceGen.py
+12
-6
paddle/gserver/tests/test_PyDataProvider2.cpp
paddle/gserver/tests/test_PyDataProvider2.cpp
+118
-0
paddle/gserver/tests/test_PyDataProvider2.py
paddle/gserver/tests/test_PyDataProvider2.py
+44
-1
paddle/trainer/Trainer.cpp
paddle/trainer/Trainer.cpp
+2
-2
paddle/trainer/tests/.gitignore
paddle/trainer/tests/.gitignore
+1
-0
paddle/trainer/tests/sample_trainer_rnn_gen.conf
paddle/trainer/tests/sample_trainer_rnn_gen.conf
+42
-85
paddle/utils/PythonUtil.h
paddle/utils/PythonUtil.h
+21
-1
proto/ModelConfig.proto.m4
proto/ModelConfig.proto.m4
+1
-1
proto/ParameterConfig.proto.m4
proto/ParameterConfig.proto.m4
+4
-4
python/CMakeLists.txt
python/CMakeLists.txt
+9
-1
python/paddle/trainer/PyDataProvider2.py
python/paddle/trainer/PyDataProvider2.py
+176
-7
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+84
-41
python/paddle/trainer_config_helpers/activations.py
python/paddle/trainer_config_helpers/activations.py
+10
-1
python/paddle/trainer_config_helpers/evaluators.py
python/paddle/trainer_config_helpers/evaluators.py
+20
-17
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+37
-37
python/paddle/trainer_config_helpers/optimizers.py
python/paddle/trainer_config_helpers/optimizers.py
+1
-1
python/paddle/trainer_config_helpers/tests/layers_test_config.py
...paddle/trainer_config_helpers/tests/layers_test_config.py
+4
-1
未找到文件。
CMakeLists.txt
浏览文件 @
9795e016
...
...
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
project
(
paddle CXX C
)
set
(
PADDLE_MAJOR_VERSION 0
)
set
(
PADDLE_MINOR_VERSION 8
)
set
(
PADDLE_PATCH_VERSION 0b
0
)
set
(
PADDLE_PATCH_VERSION 0b
1
)
set
(
PADDLE_VERSION
${
PADDLE_MAJOR_VERSION
}
.
${
PADDLE_MINOR_VERSION
}
.
${
PADDLE_PATCH_VERSION
}
)
set
(
CMAKE_MODULE_PATH
${
CMAKE_MODULE_PATH
}
"
${
CMAKE_SOURCE_DIR
}
/cmake"
)
...
...
@@ -15,7 +15,7 @@ find_package(Protobuf REQUIRED)
find_package
(
PythonLibs 2.7 REQUIRED
)
find_package
(
PythonInterp 2.7 REQUIRED
)
find_package
(
ZLIB REQUIRED
)
find_package
(
NumPy
)
find_package
(
NumPy
REQUIRED
)
find_package
(
Threads REQUIRED
)
find_package
(
Glog
)
find_package
(
Gflags QUIET
)
...
...
cmake/util.cmake
浏览文件 @
9795e016
...
...
@@ -104,10 +104,9 @@ function(link_paddle_exe TARGET_NAME)
${
PROTOBUF_LIBRARY
}
${
CMAKE_THREAD_LIBS_INIT
}
${
CBLAS_LIBS
}
${
INTERAL_LIBS
}
${
ZLIB_LIBRARIES
}
${
CMAKE_D
L_LIBS
}
)
${
INTERA
L_LIBS
}
${
CMAKE_DL_LIBS
}
)
if
(
WITH_PYTHON
)
target_link_libraries
(
${
TARGET_NAME
}
...
...
demo/seqToseq/seqToseq_net.py
浏览文件 @
9795e016
...
...
@@ -128,12 +128,16 @@ def gru_encoder_decoder(data_conf,
return
out
decoder_group_name
=
"decoder_group"
group_inputs
=
[
StaticInput
(
input
=
encoded_vector
,
is_seq
=
True
),
StaticInput
(
input
=
encoded_proj
,
is_seq
=
True
)]
if
not
is_generating
:
trg_embedding
=
embedding_layer
(
input
=
data_layer
(
name
=
'target_language_word'
,
size
=
target_dict_dim
),
size
=
word_vector_dim
,
param_attr
=
ParamAttr
(
name
=
'_target_language_embedding'
))
group_inputs
.
append
(
trg_embedding
)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
...
...
@@ -142,22 +146,13 @@ def gru_encoder_decoder(data_conf,
# for the recurrent_group.
decoder
=
recurrent_group
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
[
StaticInput
(
input
=
encoded_vector
,
is_seq
=
True
),
StaticInput
(
input
=
encoded_proj
,
is_seq
=
True
),
trg_embedding
])
input
=
group_inputs
)
lbl
=
data_layer
(
name
=
'target_language_next_word'
,
size
=
target_dict_dim
)
cost
=
classification_cost
(
input
=
decoder
,
label
=
lbl
,
)
cost
=
classification_cost
(
input
=
decoder
,
label
=
lbl
)
outputs
(
cost
)
else
:
gen_inputs
=
[
StaticInput
(
input
=
encoded_vector
,
is_seq
=
True
),
StaticInput
(
input
=
encoded_proj
,
is_seq
=
True
),
]
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
...
...
@@ -171,10 +166,11 @@ def gru_encoder_decoder(data_conf,
size
=
target_dict_dim
,
embedding_name
=
'_target_language_embedding'
,
embedding_size
=
word_vector_dim
)
gen_inputs
.
append
(
trg_embedding
)
group_inputs
.
append
(
trg_embedding
)
beam_gen
=
beam_search
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
g
en
_inputs
,
input
=
g
roup
_inputs
,
id_input
=
data_layer
(
name
=
"sent_id"
,
size
=
1
),
dict_file
=
trg_dict_path
,
...
...
doc/build/contribute_to_paddle.md
浏览文件 @
9795e016
...
...
@@ -25,9 +25,12 @@ repo or just head straight to the command line:
```
shell
# Clone your fork to your local machine
git clone git@github.com:USERNAME/Paddle.git
git clone https://github.com/USERNAME/Paddle.git
```
Then you can start to develop by making a local developement branch
```
shell
git checkout
-b
MY_COOL_STUFF_BRANCH origin/master
```
Then you can start to develop.
## Commit
...
...
@@ -45,7 +48,7 @@ are the details if any.
## Keeping Fork Up to Date
Before pull your request, you sho
ld sync you
code from the latest PaddlePaddle.
Before pull your request, you sho
uld sync your
code from the latest PaddlePaddle.
To do this, you'll need to add a remote at first:
```
shell
...
...
@@ -60,8 +63,7 @@ git remote -v
Update your fork with the latest upstream changes:
```
shell
git fetch upstream
git pull upstream master
git pull
--rebase
upstream HEAD
```
If there are no unique commits locally, git will simply perform a fast-forward.
...
...
@@ -74,10 +76,26 @@ Now, your local master branch is up-to-date with everything modified upstream.
```
shell
# push to your repository in Github
git push origin
master
git push origin
HEAD
```
## Pull Request
Go to the page for your fork on GitHub, select your development branch,
and click the
**pull request button**
.
## Update your pull request with the lastest version
During the code review, your pull request may become stale because new commits in
baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this
by clicking the "Update Branch" button in your pull request page. However, in the case
of conflict, you need to do the update manually. You need to do the following on
your local repository:
```
shell
git checkout MY_COOL_STUFF_BRANCH
git pull
--rebase
upstream HEAD
# You may need to resolve the conflict according to the git prompt.
# Make and test your code.
git push
-f
origin HEAD
```
Now your Pull Request is updated with the latest version.
doc/ui/api/trainer_config_helpers/activations.rst
浏览文件 @
9795e016
...
...
@@ -12,6 +12,13 @@ AbsActivation
:members: AbsActivation
:noindex:
ExpActivation
===============
.. automodule:: paddle.trainer_config_helpers.activations
:members: ExpActivation
:noindex:
IdentityActivation
==================
...
...
doc/ui/data_provider/pydataprovider2.rst
浏览文件 @
9795e016
...
...
@@ -24,7 +24,7 @@ A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
Each line of the data contains two parts, separated by
';'
. The first part is
Each line of the data contains two parts, separated by
:code:`;`
. The first part is
label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this:
...
...
@@ -74,7 +74,20 @@ you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
Here we specify training data by 'train.list', and no testing data is specified.
Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
the order of :code:`data_layer` definition roughly to determine which feature to
which :code:`data_layer`. This order may be not correct, so TO DEFINE THE
:code:`data_layer`'s NAMES EXPLICITLY IS THE RECOMMANDED WAY TO PROVIDER DATA.
Now, this simple example of using PyDataProvider is finished.
The only thing that the user should know is how to generte **one sample** from
...
...
@@ -93,7 +106,7 @@ DataProvider for the sequential model
-------------------------------------
A sequence model takes sequences as its input. A sequence is made up of several
timesteps. The so-called timestep, is not necessary to have something to do
with
'time'
. It can also be explained to that the order of data are taken into
with
time
. It can also be explained to that the order of data are taken into
consideration into model design and training.
For example, the sentence can be interpreted as a kind of sequence data in NLP
tasks.
...
...
@@ -155,23 +168,7 @@ Reference
@provider
+++++++++
'@provider' is a Python `Decorator`_, it can construct a PyDataProvider in
PaddlePaddle from a user defined function. Its parameters are:
* `input_types`_ defines format of the data input.
* should_shuffle defines whether to shuffle data or not. By default, it is set
true during training, and false during testing.
* pool_size is the memory pool size (in sample number) in DataProvider.
-1 means no limit.
* can_over_batch_size defines whether PaddlePaddle can store little more
samples than pool_size. It is better to set True to avoid some deadlocks.
* calc_batch_size is a function define how to calculate batch size. This is
usefull in sequential model, that defines batch size is counted upon sequence
or token. By default, each sample or sequence counts to 1 when calculating
batch size.
* cache is a data cache strategy, see `cache`_.
* Init_hook function is invoked once the data provider is initialized,
see `init_hook`_.
.. autofunction:: paddle.trainer.PyDataProvider2.provider
input_types
+++++++++++
...
...
doc_cn/ui/data_provider/mnist_config.py
浏览文件 @
9795e016
...
...
@@ -4,3 +4,5 @@ define_py_data_sources2(train_list='train.list',
test_list
=
None
,
module
=
'mnist_provider'
,
obj
=
'process'
)
img
=
data_layer
(
name
=
'pixel'
,
size
=
784
)
label
=
data_layer
(
name
=
'label'
,
size
=
10
)
doc_cn/ui/data_provider/mnist_provider.dict.py
0 → 100644
浏览文件 @
9795e016
from
paddle.trainer.PyDataProvider2
import
*
# Define a py data provider
@
provider
(
input_types
=
[
dense_vector
(
28
*
28
),
integer_value
(
10
)
])
def
process
(
settings
,
filename
):
# settings is not used currently.
f
=
open
(
filename
,
'r'
)
# open one of training file
for
line
in
f
:
# read each line
label
,
pixel
=
line
.
split
(
';'
)
# get features and label
pixels_str
=
pixel
.
split
(
' '
)
pixels_float
=
[]
for
each_pixel_str
in
pixels_str
:
pixels_float
.
append
(
float
(
each_pixel_str
))
# give data to paddle.
yield
{
"pixel"
:
pixels_float
,
'label'
:
int
(
label
)
}
f
.
close
()
# close file
doc_cn/ui/data_provider/pydataprovider2.rst
浏览文件 @
9795e016
...
...
@@ -56,6 +56,14 @@ process函数调用多次 :code:`yield` 即可。 :code:`yield` 是Python的一
这里说明了训练数据是 'train.list',而没有测试数据。引用的DataProvider是 'mnist_provider'
这个模块中的 'process' 函数。
同时,根据模型配置文件中 :code:`data_layer` 的名字,用户也可以显式指定返回的数据对应关系。例如:
.. literalinclude:: mnist_provider.dict.py
:linenos:
如果用户不指定返回数据的对应关系,那么PaddlePaddle会粗略的根据layer的声明顺序,
来确定对应关系。这个对应关系可能不正确。所以推荐使用显式指定返回值和数据对应关系。
至此,简单的PyDataProvider样例就说明完毕了。对于用户来说,讲数据发送给PaddlePaddle,仅仅需要
知道如何从 **一个文件** 里面读取 **一条** 样本。而PaddlePaddle进程帮助用户做了
...
...
@@ -119,11 +127,13 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
@provider
+++++++++
'@provider'
是一个Python的 `Decorator`_ ,他可以将某一个函数标记成一个PyDataProvider。它包含的参数有:
:code:`@provider`
是一个Python的 `Decorator`_ ,他可以将某一个函数标记成一个PyDataProvider。它包含的参数有:
* `input_types`_ 是数据输入格式。具体有哪些格式,参考 `input_types`_ 。
* should_shuffle 是个DataProvider是不是要做shuffle,如果不设置的话,训练的时候默认shuffle,
测试的时候默认不shuffle
测试的时候默认不shuffle。
* min_pool_size 是设置DataProvider在内存中最小暂存的数据条数。这个也是PaddlePaddle所能够保证的shuffle粒度。
设置成-1的话,会预先读取全部数据到内存中。
* pool_size 是设置DataProvider在内存中暂存的数据条数。设置成-1的话,即不在乎内存暂存多少条数据。
* can_over_batch_size 表示是否允许Paddle暂存略微多余pool_size的数据。这样做可以避免很多死锁问题。
一般推荐设置成True
...
...
@@ -131,6 +141,11 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
是一个batch size,但是有时为了计算均衡性,可以将一条数据设置成多个batch size
* cache 是数据缓存的策略,参考 `cache`_
* init_hook 是初始化时调用的函数,参考 `init_hook`_
* use_dynamic_order 如果是true的话,可以返回一个dict,key是data_layer的名字,value是特征值。同时,也可以
返回一个list或者tuple。如果是false的话,只能够返回list或者tuple
* check 设置成true的话,会根据input_types检查数据的合法性。
* check_fail_continue 如果设置成true的话,即使在check中数据不合法,也会扔到这条数据,继续训练。 如果
check是false的话,没有作用。
input_types
+++++++++++
...
...
@@ -190,3 +205,55 @@ DataProvider提供了两种简单的Cache策略。他们是
* CacheType.NO_CACHE 不缓存任何数据,每次都会从python端读取数据
* CacheType.CACHE_PASS_IN_MEM 第一个pass会从python端读取数据,剩下的pass会直接从内存里
读取数据。
注意事项
--------
可能的内存泄露问题
++++++++++++++++++
PaddlePaddle将train.list中的每一行,都传递给process函数,从而生成多个generator。
即如果train.list中,有100个训练文件,即会生成100个generator。这个本身不是一个很
严重的问题。
但是,如果在训练时,每一条训练数据都是一个文件,并且,训练数据非常多的情况下,就
会生成多个generator。每个generator在没有调用的时候,是几乎不占内存的。但是,当调
用过一次的时候,generator便会存下当前的上下文(Context)。而这个Context可能会非常
大。并且,generator至少调用两次才会知道是否停止。所以,即使在process里面只会有一
个yield,也需要两次随机选择到同样的generator的时候,才会释放该段内存。
.. code-block:: python
def func():
yield 0
f = func() # 创建generator
tmp = next(f) # 调用一次,返回0
tmp = next(f) # 调用第二次的时候,才会Stop Iteration
而如果按顺序调用这些generator就不会出现这个问题。
所以最佳实践推荐不要将每一个样本都放入train.list。而是将样本的地址放入另一个文本
文件,train.list写入那个文本文件的地址。 或者在python generator的上下文中尽量留
下非常少的变量引用。例如
.. code-block:: python
def real_process(fn):
# ... read from fn
return result # 当函数返回的时候,python可以解除掉内部变量的引用。
def process(fn):
yield real_process(fn)
这个问题是PyDataProvider读数据时候的逻辑问题,基本上不能整体修正。
内存不够用的情况
++++++++++++++++
PyDataProvider2会尽量使用内存。所以如果对于内存比较小的机器,推荐设置
:code:`pool_size` 变量,而这个变量推荐大于训练的batch size,并且在内存足够
的情况下越大越好。
paddle/gserver/dataproviders/DataProvider.cpp
浏览文件 @
9795e016
...
...
@@ -149,9 +149,13 @@ void DoubleBuffer::startAsyncLoad() {
taskReadySem_
.
post
();
}
ClassRegistrar
<
DataProvider
,
DataConfig
,
bool
>
DataProvider
::
registrar_
;
DataProvider
*
DataProvider
::
create
(
const
DataConfig
&
config
,
bool
useGpu
)
{
return
registrar_
.
createByType
(
config
.
type
(),
config
,
useGpu
);
ClassRegistrar
<
DataProvider
,
DataConfig
,
ModelConfig
,
bool
>
DataProvider
::
registrar_
;
DataProvider
*
DataProvider
::
create
(
const
DataConfig
&
config
,
const
ModelConfig
&
modelConfig
,
bool
useGpu
)
{
return
registrar_
.
createByType
(
config
.
type
(),
config
,
modelConfig
,
useGpu
);
}
REGISTER_DATA_PROVIDER
(
simple
,
SimpleDataProvider
);
...
...
paddle/gserver/dataproviders/DataProvider.h
浏览文件 @
9795e016
...
...
@@ -39,15 +39,30 @@ limitations under the License. */
#include "paddle/parameter/Argument.h"
namespace
paddle
{
/**
* @def REGISTER_DATA_PROVIDER
* @brief Macro for registering a data provider
* @brief Macro for registering a data provider. The class type should contain
* a consturctor with parameter (DataConfig, bool).
*/
#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([]() { \
DataProvider::registrar_.registerClass<__class_name>(#__type_name); \
})
#define REGISTER_DATA_PROVIDER(__type_name, __class_name)\
static InitFunction __reg_type_##__type_name([]() {\
DataProvider::registrar_.registerClass(\
#__type_name, \
[](DataConfig conf, ModelConfig, bool useGpu) -> DataProvider* { \
DataProvider* dp = new __class_name (conf, useGpu);\
return dp;\
});\
})
/**
* @def REGISTER_DATA_PROVIDER_EX
* @brief Macro for registering a data provider, which contains a constructor
* with parameter (DataConfig, ModelConfig, bool).
*/
#define REGISTER_DATA_PROVIDER_EX(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([] { \
DataProvider::registrar_.registerClass<__class_name>(#__type_name); \
})
class
DataBatch
;
class
BufferBatch
;
...
...
@@ -285,10 +300,18 @@ protected:
*/
class
DataProvider
{
public:
static
ClassRegistrar
<
DataProvider
,
DataConfig
,
bool
>
registrar_
;
static
ClassRegistrar
<
DataProvider
,
DataConfig
,
ModelConfig
,
bool
>
registrar_
;
static
DataProvider
*
create
(
const
DataConfig
&
config
,
const
ModelConfig
&
modelConfig
,
bool
useGpu
=
FLAGS_use_gpu
);
/**
* @brief create only used for unittest.
*/
inline
static
DataProvider
*
create
(
const
DataConfig
&
config
,
bool
useGpu
)
{
return
create
(
config
,
ModelConfig
(),
useGpu
);
}
DataProvider
(
const
DataConfig
&
config
,
bool
useGpu
)
:
config_
(
config
),
skipShuffle_
(
false
),
...
...
@@ -336,13 +359,13 @@ public:
* @note return -1 to indicate unlimited number of samples.
*/
virtual
int64_t
getSize
()
=
0
;
/**
* @brief Get next batch training samples internally
* @param[in] size size of training samples to get
* @param[out] batch a batch of training samples
* @return actual size of obtained training samples
*/
virtual
int64_t
getNextBatchInternal
(
int64_t
size
,
DataBatch
*
batch
)
=
0
;
protected:
...
...
paddle/gserver/dataproviders/MultiDataProvider.cpp
浏览文件 @
9795e016
...
...
@@ -22,7 +22,9 @@ namespace paddle {
using
namespace
std
;
MultiDataProvider
::
MultiDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
)
MultiDataProvider
::
MultiDataProvider
(
const
DataConfig
&
config
,
const
ModelConfig
&
modelConfig
,
bool
useGpu
)
:
DataProvider
(
config
,
useGpu
)
{
bool
atLeastOneMainDataFlag
=
false
;
totalDataRatio_
=
0
;
...
...
@@ -58,7 +60,9 @@ MultiDataProvider::MultiDataProvider(const DataConfig& config, bool useGpu)
subConfig
.
set_async_load_data
(
false
);
}
subDataProviders_
[
i
]
=
std
::
unique_ptr
<
DataProvider
>
(
DataProvider
::
create
(
subConfig
,
useGpu_
));
std
::
unique_ptr
<
DataProvider
>
(
DataProvider
::
create
(
subConfig
,
modelConfig
,
useGpu_
));
}
}
...
...
@@ -116,6 +120,6 @@ int64_t MultiDataProvider::getNextBatchInternal(int64_t size,
return
batch
->
getSize
();
}
REGISTER_DATA_PROVIDER
(
multi
,
MultiDataProvider
);
REGISTER_DATA_PROVIDER
_EX
(
multi
,
MultiDataProvider
);
}
// namespace paddle
paddle/gserver/dataproviders/MultiDataProvider.h
浏览文件 @
9795e016
...
...
@@ -24,7 +24,9 @@ protected:
std
::
vector
<
std
::
unique_ptr
<
DataProvider
>>
subDataProviders_
;
public:
MultiDataProvider
(
const
DataConfig
&
config
,
bool
useGpu
);
MultiDataProvider
(
const
DataConfig
&
config
,
const
ModelConfig
&
modelConfig
,
bool
useGpu
);
~
MultiDataProvider
()
{}
virtual
void
reset
();
virtual
void
shuffle
();
...
...
paddle/gserver/dataproviders/PyDataProvider2.cpp
浏览文件 @
9795e016
...
...
@@ -24,6 +24,27 @@ limitations under the License. */
namespace
paddle
{
namespace
unittest
{
static
std
::
unique_ptr
<
std
::
function
<
void
(
size_t
/*poolActualSize */
)
>>
OnPoolFilled
;
namespace
pydp2
{
void
setOnPoolFilledHook
(
const
std
::
function
<
void
(
size_t
)
>&
callback
)
{
OnPoolFilled
.
reset
(
new
std
::
function
<
void
(
size_t
)
>
());
*
OnPoolFilled
=
callback
;
}
void
clearOnPoolFilledHook
()
{
OnPoolFilled
.
reset
();
}
}
// namespace pydp2
}
// namespace unittest
/**
* Slot type
*/
...
...
@@ -179,6 +200,7 @@ public:
* Ctor
*/
PyDataProvider2
(
const
DataConfig
&
config
,
const
ModelConfig
&
modelConfig
,
bool
useGpu
)
:
DataProvider
(
config
,
useGpu
),
callingContextCreated_
(
2
)
{
auto
&
args
=
config
.
load_data_args
();
...
...
@@ -192,6 +214,12 @@ public:
py
::
DictHelper
kwargsDict
(
kwargs
);
kwargsDict
.
setBool
(
"is_train"
,
!
config
.
for_test
());
std
::
vector
<
std
::
string
>
inputs
;
inputs
.
reserve
(
modelConfig
.
input_layer_names
().
size
());
std
::
copy
(
modelConfig
.
input_layer_names
().
begin
(),
modelConfig
.
input_layer_names
().
end
(),
std
::
back_inserter
(
inputs
));
kwargsDict
.
setStringList
(
"input_order"
,
inputs
);
// kwargs is keyword arguemts to create object.
this
->
createPyDataObj
(
config
.
load_data_module
(),
...
...
@@ -199,7 +227,7 @@ public:
config
.
files
(),
std
::
move
(
kwargs
));
DBG
<<
"Instance "
<<
instance_
.
get
()
<<
" loaded."
;
this
->
readPyFields
();
this
->
readPyFields
(
config
.
for_test
()
);
DBG
<<
"Py Field Done"
;
}
...
...
@@ -253,14 +281,28 @@ private:
CHECK_PY
(
instance_
)
<<
"Cannot Create instance"
;
}
void
readPyFields
()
{
void
readPyFields
(
bool
testing
)
{
py
::
ObjectHelper
self
(
this
->
instance_
);
this
->
skipShuffle_
=
!
self
.
getBoolAttr
(
"should_shuffle"
);
bool
ok
;
this
->
skipShuffle_
=
!
self
.
getBoolAttr
(
"should_shuffle"
,
&
ok
/*isBoolType*/
);
if
(
!
ok
)
{
this
->
skipShuffle_
=
testing
;
// shuffle when is training, skip shuffle
// when is testing.
}
DBG
<<
"Provider Skip Shuffle "
<<
this
->
skipShuffle_
;
this
->
poolSize_
=
self
.
getIntAttr
<
size_t
>
(
"pool_size"
,
&
ok
);
if
(
!
ok
)
{
this
->
poolSize_
=
-
1UL
;
}
this
->
minPoolSize_
=
self
.
getIntAttr
<
size_t
>
(
"min_pool_size"
,
&
ok
);
if
(
!
ok
)
{
this
->
minPoolSize_
=
-
1UL
;
}
this
->
minPoolSize_
=
std
::
min
(
this
->
poolSize_
,
this
->
minPoolSize_
);
this
->
canOverBatchSize_
=
self
.
getBoolAttr
(
"can_over_batch_size"
);
calcBatchSize_
.
reset
(
self
.
getAttr
(
"calc_batch_size"
));
...
...
@@ -307,7 +349,6 @@ private:
}
void
loadThread
()
{
callingContexts_
.
reserve
(
fileLists_
.
size
());
DBG
<<
"Creating context"
;
for
(
auto
&
filename
:
fileLists_
)
{
PyGuard
g
;
...
...
@@ -332,7 +373,14 @@ private:
bool
atEnd
;
data
=
py
::
iterNext
(
callingContexts_
[
cid
],
&
atEnd
);
if
(
atEnd
||
data
==
nullptr
)
{
callingContexts_
.
erase
(
callingContexts_
.
begin
()
+
cid
);
if
(
cid
!=
0
)
{
std
::
swap
(
callingContexts_
[
cid
],
callingContexts_
[
0
]);
cid
=
0
;
}
{
PyGuard
g
;
callingContexts_
.
pop_front
();
}
this
->
pullCV_
.
notify_all
();
continue
;
}
...
...
@@ -354,11 +402,7 @@ private:
if
(
this
->
loadThread_
){
// wait poolActualSize < poolSize;
std
::
unique_lock
<
std
::
mutex
>
l
(
mtx_
);
pushCV_
.
wait
(
l
,
[
this
,
additionalBatchSize
]
{
if
(
this
->
canOverBatchSize_
)
{
return
this
->
poolActualSize_
<
poolSize_
;
}
else
{
return
this
->
poolActualSize_
+
additionalBatchSize
<
poolSize_
;
}
return
this
->
poolActualSize_
<
poolSize_
;
});
}
...
...
@@ -402,7 +446,7 @@ private:
private:
std
::
unique_ptr
<
std
::
thread
>
loadThread_
;
std
::
atomic
<
bool
>
exit_
;
std
::
vector
<
PyObjectPtr
>
callingContexts_
;
std
::
deque
<
PyObjectPtr
>
callingContexts_
;
std
::
deque
<
PyObjectPtr
>
dataPool_
;
size_t
poolActualSize_
;
std
::
condition_variable
pushCV_
;
...
...
@@ -413,6 +457,7 @@ private:
PyObjectPtr
instance_
;
size_t
poolSize_
;
size_t
minPoolSize_
;
bool
canOverBatchSize_
;
PyObjectPtr
calcBatchSize_
;
PyObjectPtr
generator_
;
...
...
@@ -478,8 +523,13 @@ public:
// data pool ready.
std
::
unique_lock
<
std
::
mutex
>
l
(
mtx_
);
pullCV_
.
wait
(
l
,
[
this
,
&
size
]
{
return
this
->
poolActualSize_
>=
size
||
callingContexts_
.
empty
();
return
this
->
poolActualSize_
>=
std
::
max
(
size
,
this
->
minPoolSize_
)
||
callingContexts_
.
empty
();
});
if
(
unittest
::
OnPoolFilled
)
{
(
*
unittest
::
OnPoolFilled
)(
this
->
poolActualSize_
);
}
}
std
::
deque
<
PyObjectPtr
>
data
;
size_t
bsize
=
0
;
...
...
@@ -495,7 +545,8 @@ public:
std
::
deque
<
PyObjectPtr
>&
pool
=
*
poolPtr
;
while
(
bsize
<
size
&&
!
pool
.
empty
())
{
{
// move data from pool to data
{
// move data from pool to data
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
if
(
skipShuffle_
)
{
size_t
i
=
0
;
...
...
@@ -505,14 +556,13 @@ public:
}
else
{
// when shuffle, use swap to drop only last pool element.
size_t
i
=
ThreadLocalRand
::
rand
()
%
pool
.
size
();
CHECK
(
pool
[
i
]
!=
nullptr
);
if
(
i
!=
pool
.
size
()
-
1
)
{
std
::
swap
(
pool
[
i
],
pool
.
back
());
if
(
i
!=
0
)
{
std
::
swap
(
pool
[
i
],
pool
.
front
());
}
data
.
emplace_back
(
std
::
move
(
pool
.
back
()));
pool
.
pop_
back
();
data
.
emplace_back
(
std
::
move
(
pool
.
front
()));
pool
.
pop_
front
();
}
}
{
if
(
calcBatchSize_
)
{
// custom calc batch size.
PyGuard
guard
;
Py_INCREF
(
data
.
back
().
get
());
...
...
@@ -521,8 +571,17 @@ public:
calcBatchSize
.
getArgs
().
set
(
0
,
data
.
back
());
PyObjectPtr
customBatchSize
(
calcBatchSize
());
bool
ok
;
bsize
+
=
py
::
castInt
<
size_t
>
(
customBatchSize
.
get
(),
&
ok
);
size_t
tmp
=
py
::
castInt
<
size_t
>
(
customBatchSize
.
get
(),
&
ok
);
CHECK
(
ok
)
<<
"calc_batch_size must return int"
;
if
(
bsize
+
tmp
>
size
&&
!
canOverBatchSize_
)
{
// Put data back.
pool
.
push_front
(
std
::
move
(
data
.
back
()));
data
.
pop_back
();
break
;
}
else
{
bsize
+=
tmp
;
}
}
else
{
bsize
+=
1
;
}
...
...
@@ -598,7 +657,6 @@ public:
}
else
{
*
batch
=
cpuBatch
;
}
return
bsize
;
}
};
...
...
@@ -606,7 +664,8 @@ public:
std
::
unordered_set
<
uintptr_t
>
PyDataProvider2
::
gModuleClsPtrs_
;
PyObjectPtr
PyDataProvider2
::
zeroTuple_
(
PyTuple_New
(
0
));
REGISTER_DATA_PROVIDER
(
py2
,
PyDataProvider2
);
REGISTER_DATA_PROVIDER_EX
(
py2
,
PyDataProvider2
);
/**
* Scanner for dense slot.
...
...
paddle/gserver/layers/CRFLayer.cpp
浏览文件 @
9795e016
...
...
@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
}
// coeff only affect bp, keep consistent with CostLayer
coeff_
=
config_
.
has_coeff
()
?
config_
.
coeff
()
:
real
(
1.0
);
coeff_
=
config_
.
coeff
(
);
if
(
inputLayers_
.
size
()
==
3
)
{
weightLayer_
=
inputLayers_
[
2
];
}
...
...
paddle/gserver/layers/CostLayer.cpp
浏览文件 @
9795e016
...
...
@@ -26,11 +26,7 @@ namespace paddle {
bool
CostLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
bool
ret
=
Layer
::
init
(
layerMap
,
parameterMap
);
if
(
config_
.
has_coeff
())
{
coeff_
=
config_
.
coeff
();
// coeff only affact bp
}
else
{
coeff_
=
real
(
1.0
);
}
coeff_
=
config_
.
coeff
();
if
(
!
ret
)
return
ret
;
CHECK_GE
(
inputLayers_
.
size
(),
2UL
);
CHECK_LE
(
inputLayers_
.
size
(),
3UL
);
...
...
paddle/gserver/tests/rnn_data_provider.py
浏览文件 @
9795e016
...
...
@@ -19,14 +19,18 @@ data = [
[[[
0
,
2
],
[
2
,
5
],
[
0
,
1
,
2
]],
1
],
]
@
provider
(
input_types
=
[
integer_value_sub_sequence
(
10
),
integer_value
(
2
)])
integer_value
(
2
)],
should_shuffle
=
False
)
def
process_subseq
(
settings
,
file_name
):
for
d
in
data
:
yield
d
@
provider
(
input_types
=
[
integer_value_sequence
(
10
),
integer_value
(
2
)])
integer_value
(
2
)],
should_shuffle
=
False
)
def
process_seq
(
settings
,
file_name
):
for
d
in
data
:
seq
=
[]
...
...
paddle/gserver/tests/sequenceGen.py
浏览文件 @
9795e016
...
...
@@ -17,22 +17,26 @@ import sys
from
paddle.trainer.PyDataProvider2
import
*
def
hook
(
settings
,
dict_file
,
**
kwargs
):
settings
.
word_dict
=
dict_file
settings
.
input_types
=
[
integer_value_sequence
(
len
(
settings
.
word_dict
)),
settings
.
input_types
=
[
integer_value_sequence
(
len
(
settings
.
word_dict
)),
integer_value_sequence
(
3
)]
settings
.
logger
.
info
(
'dict len : %d'
%
(
len
(
settings
.
word_dict
)))
@
provider
(
init_hook
=
hook
)
@
provider
(
init_hook
=
hook
,
should_shuffle
=
False
)
def
process
(
settings
,
file_name
):
with
open
(
file_name
,
'r'
)
as
fdata
:
for
line
in
fdata
:
label
,
comment
=
line
.
strip
().
split
(
'
\t
'
)
label
=
int
(
''
.
join
(
label
.
split
()))
words
=
comment
.
split
()
word_slot
=
[
settings
.
word_dict
[
w
]
for
w
in
words
if
w
in
settings
.
word_dict
]
word_slot
=
[
settings
.
word_dict
[
w
]
for
w
in
words
if
w
in
settings
.
word_dict
]
yield
word_slot
,
[
label
]
## for hierarchical sequence network
def
hook2
(
settings
,
dict_file
,
**
kwargs
):
settings
.
word_dict
=
dict_file
...
...
@@ -40,17 +44,19 @@ def hook2(settings, dict_file, **kwargs):
integer_value_sub_sequence
(
3
)]
settings
.
logger
.
info
(
'dict len : %d'
%
(
len
(
settings
.
word_dict
)))
@
provider
(
init_hook
=
hook2
)
@
provider
(
init_hook
=
hook2
,
should_shuffle
=
False
)
def
process2
(
settings
,
file_name
):
with
open
(
file_name
)
as
fdata
:
label_list
=
[]
word_slot_list
=
[]
for
line
in
fdata
:
if
(
len
(
line
))
>
1
:
label
,
comment
=
line
.
strip
().
split
(
'
\t
'
)
label
,
comment
=
line
.
strip
().
split
(
'
\t
'
)
label
=
int
(
''
.
join
(
label
.
split
()))
words
=
comment
.
split
()
word_slot
=
[
settings
.
word_dict
[
w
]
for
w
in
words
if
w
in
settings
.
word_dict
]
word_slot
=
[
settings
.
word_dict
[
w
]
for
w
in
words
if
w
in
settings
.
word_dict
]
label_list
.
append
([
label
])
word_slot_list
.
append
(
word_slot
)
else
:
...
...
paddle/gserver/tests/test_PyDataProvider2.cpp
浏览文件 @
9795e016
...
...
@@ -20,6 +20,18 @@ limitations under the License. */
#include "paddle/gserver/dataproviders/DataProvider.h"
P_DEFINE_string
(
train_list
,
"unittest.list"
,
"file list for unittest"
);
namespace
paddle
{
namespace
unittest
{
namespace
pydp2
{
extern
void
setOnPoolFilledHook
(
const
std
::
function
<
void
(
size_t
)
>&
func
);
extern
void
clearOnPoolFilledHook
();
}
// namespace pydp2
}
// namespace unittest
}
// namespace paddle
const
paddle
::
real
epsilon
=
1e-5
;
static
inline
int64_t
readDataBatch
(
...
...
@@ -235,6 +247,112 @@ TEST(PyDataProvider2, index_sub_seq) {
}
}
TEST
(
PyDataProvider2
,
min_pool_size
)
{
paddle
::
DataConfig
config
;
config
.
set_type
(
"py2"
);
config
.
set_files
(
FLAGS_train_list
.
c_str
());
config
.
set_load_data_module
(
"test_PyDataProvider2"
);
config
.
set_load_data_object
(
"test_min_pool_size"
);
config
.
set_load_data_args
(
""
);
size_t
totalData
=
1
<<
14
;
constexpr
size_t
batchSize
=
100
;
constexpr
size_t
minPoolSize
=
1000
;
paddle
::
DataBatch
batch
;
std
::
unique_ptr
<
paddle
::
DataProvider
>
provider
(
paddle
::
DataProvider
::
create
(
config
,
false
));
provider
->
reset
();
paddle
::
unittest
::
pydp2
::
setOnPoolFilledHook
([
&
](
size_t
poolSize
)
{
if
(
totalData
>
batchSize
)
{
CHECK_GE
(
poolSize
,
std
::
min
(
totalData
-
batchSize
,
minPoolSize
));
}
});
while
(
true
)
{
size_t
realBatchSize
=
provider
->
getNextBatchInternal
(
batchSize
,
&
batch
);
if
(
realBatchSize
)
{
totalData
-=
realBatchSize
;
}
else
{
break
;
}
}
paddle
::
unittest
::
pydp2
::
clearOnPoolFilledHook
();
}
TEST
(
PyDataProvider2
,
can_over_batch_size
)
{
paddle
::
DataConfig
config
;
config
.
set_type
(
"py2"
);
config
.
set_files
(
FLAGS_train_list
.
c_str
());
config
.
set_load_data_module
(
"test_PyDataProvider2"
);
config
.
set_load_data_object
(
"test_can_over_batch_size"
);
config
.
set_load_data_args
(
""
);
paddle
::
DataBatch
batch
;
std
::
unique_ptr
<
paddle
::
DataProvider
>
provider
(
paddle
::
DataProvider
::
create
(
config
,
false
));
provider
->
reset
();
constexpr
size_t
batchSize
=
100
;
while
(
true
)
{
size_t
realBatchSize
=
provider
->
getNextBatchInternal
(
batchSize
,
&
batch
);
if
(
realBatchSize
)
{
CHECK_LE
(
realBatchSize
,
batchSize
);
}
else
{
break
;
}
}
}
TEST
(
PyDataProvider2
,
input_order
)
{
paddle
::
DataConfig
config
;
config
.
set_type
(
"py2"
);
config
.
set_files
(
FLAGS_train_list
.
c_str
());
config
.
set_load_data_module
(
"test_PyDataProvider2"
);
config
.
set_load_data_object
(
"test_input_order"
);
config
.
set_load_data_args
(
""
);
paddle
::
ModelConfig
modelConfig
;
*
modelConfig
.
add_input_layer_names
()
=
"input1"
;
*
modelConfig
.
add_input_layer_names
()
=
"input2"
;
paddle
::
DataBatch
batch
;
std
::
unique_ptr
<
paddle
::
DataProvider
>
provider
(
paddle
::
DataProvider
::
create
(
config
,
modelConfig
,
false
));
provider
->
reset
();
constexpr
size_t
batchSize
=
100
;
while
(
true
)
{
size_t
realBatchSize
=
provider
->
getNextBatchInternal
(
batchSize
,
&
batch
);
if
(
!
realBatchSize
)
{
break
;
}
ASSERT_EQ
(
batch
.
getStreams
().
size
(),
2
);
for
(
size_t
i
=
0
;
i
<
realBatchSize
;
++
i
)
{
ASSERT_EQ
(
batch
.
getStream
(
0
).
ids
->
getData
()[
i
],
0
);
ASSERT_EQ
(
batch
.
getStream
(
1
).
ids
->
getData
()[
i
],
1
);
}
}
}
TEST
(
PyDataProvider2
,
test_check
)
{
paddle
::
DataConfig
config
;
config
.
set_type
(
"py2"
);
config
.
set_files
(
FLAGS_train_list
.
c_str
());
config
.
set_load_data_module
(
"test_PyDataProvider2"
);
config
.
set_load_data_object
(
"test_check"
);
config
.
set_load_data_args
(
""
);
paddle
::
DataBatch
batch
;
std
::
unique_ptr
<
paddle
::
DataProvider
>
provider
(
paddle
::
DataProvider
::
create
(
config
,
false
));
provider
->
reset
();
while
(
true
)
{
size_t
realBatchSize
=
provider
->
getNextBatchInternal
(
100
,
&
batch
);
if
(
!
realBatchSize
)
{
break
;
}
else
{
auto
&
ivec
=
batch
.
getStream
(
0
).
ids
;
for
(
size_t
i
=
0
;
i
<
ivec
->
getSize
();
++
i
)
{
CHECK_LT
(
ivec
->
getData
()[
i
],
10
);
}
}
}
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
paddle
::
initMain
(
argc
,
argv
);
...
...
paddle/gserver/tests/test_PyDataProvider2.py
浏览文件 @
9795e016
...
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
random
from
paddle.trainer.PyDataProvider2
import
*
...
...
@@ -39,7 +41,8 @@ def test_init_hook(setting, filename):
@
provider
(
input_types
=
[
sparse_binary_vector
(
30000
,
seq_type
=
SequenceType
.
NO_SEQUENCE
)])
input_types
=
[
sparse_binary_vector
(
30000
,
seq_type
=
SequenceType
.
NO_SEQUENCE
)])
def
test_sparse_non_value_no_seq
(
setting
,
filename
):
for
i
in
xrange
(
200
):
yield
[(
i
+
1
)
*
(
j
+
1
)
for
j
in
xrange
(
10
)]
...
...
@@ -66,3 +69,43 @@ def test_index_sub_seq(setting, filename):
for
i
in
xrange
(
200
):
yield
list
(
gen_sub_seq
(
i
))
@
provider
(
input_types
=
[
index_slot
(
100
)],
min_pool_size
=
1000
)
def
test_min_pool_size
(
setting
,
filename
):
for
_
in
xrange
(
1
<<
14
):
yield
random
.
randint
(
0
,
100
-
1
)
@
provider
(
input_types
=
[
index_slot
(
100
,
seq_type
=
SequenceType
.
SEQUENCE
)],
can_over_batch_size
=
False
,
calc_batch_size
=
lambda
x
:
len
(
x
[
0
]))
def
test_can_over_batch_size
(
setting
,
filename
):
for
_
in
xrange
(
1
<<
10
):
seq_len
=
random
.
randint
(
0
,
99
)
yield
[
random
.
randint
(
0
,
100
-
1
)
for
_
in
xrange
(
seq_len
)]
@
provider
(
input_types
=
[
index_slot
(
10
),
index_slot
(
10
)])
def
test_input_order
(
setting
,
filename
):
for
_
in
xrange
(
1000
):
yield
{
'input1'
:
0
,
'input2'
:
1
}
@
provider
(
input_types
=
[
index_slot
(
10
)],
check
=
True
,
check_fail_continue
=
True
,
should_shuffle
=
"123"
)
# also test should shuffle
def
test_check
(
settings
,
filename
):
yield_good_value
=
False
while
not
yield_good_value
:
for
_
in
xrange
(
10000
):
i
=
random
.
randint
(
0
,
100
)
if
i
<
10
:
yield_good_value
=
True
yield
i
paddle/trainer/Trainer.cpp
浏览文件 @
9795e016
...
...
@@ -194,7 +194,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config,
dataProvider_
=
dataProvider
;
if
(
!
dataProvider_
&&
config_
->
hasDataConfig
())
{
dataProvider_
.
reset
(
DataProvider
::
create
(
*
config_
,
gpuData
));
dataProvider_
.
reset
(
DataProvider
::
create
(
*
config_
,
*
config_
,
gpuData
));
}
if
(
dataProvider_
)
{
evaluator_
.
reset
(
trainerInternal_
.
getGradientMachine
()
->
makeEvaluator
());
...
...
@@ -212,7 +212,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config,
testDataProvider_
=
testDataProvider
;
if
(
!
testDataProvider_
&&
config_
->
hasTestDataConfig
())
{
testDataProvider_
.
reset
(
DataProvider
::
create
(
config_
->
getTestDataConfig
(),
gpuData
));
DataProvider
::
create
(
config_
->
getTestDataConfig
(),
*
config_
,
gpuData
));
}
if
(
testDataProvider_
)
{
tester_
.
reset
(
new
Tester
(
config_
,
createTesterConfig
(),
...
...
paddle/trainer/tests/.gitignore
浏览文件 @
9795e016
dump_text.test
test_pydata_provider_wrapper.json
*proto.bin
paddle/trainer/tests/sample_trainer_rnn_gen.conf
浏览文件 @
9795e016
...
...
@@ -13,96 +13,53 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
import
math
from
paddle
.
trainer_config_helpers
import
*
beam_search
=
get_config_arg
(
'beam_search'
,
bool
,
False
)
model_type
(
"recurrent_nn"
)
Settings
(
learning_rate
=
0
,
batch_size
=
15
,
algorithm
=
'sgd'
)
Inputs
(
"sent_id"
,
"dummy_data_input"
)
Outputs
(
"predict_word"
)
settings
(
batch_size
=
15
,
learning_rate
=
0
)
num_words
=
5
beam_flag
=
get_config_arg
(
'beam_search'
,
bool
,
False
)
DataLayer
(
name
=
"sent_id"
,
size
=
1
,
)
sent_id
=
data_layer
(
name
=
"sent_id"
,
size
=
1
)
# This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
DataLayer
(
name
=
"dummy_data_input"
,
size
=
2
, )
if
beam_search
:
RecurrentLayerGroupBegin
(
"decoding_layer_group"
,
in_links
=[],
out_links
=[
"predict_word"
],
generator
=
Generator
(
max_num_frames
=
10
,
beam_size
=
2
,
num_results_per_sample
=
2
, ))
else
:
RecurrentLayerGroupBegin
(
"decoding_layer_group"
,
in_links
=[],
out_links
=[
"predict_word"
],
generator
=
Generator
(
max_num_frames
=
10
, ))
dummy_memory
=
Memory
(
name
=
"dummy_memory"
,
size
=
2
,
boot_layer
=
"dummy_data_input"
)
MixedLayer
(
name
=
"dummy_memory"
,
size
=
2
,
bias
=
False
,
inputs
=[
IdentityProjection
(
dummy_memory
)], )
state_memory
=
Memory
(
name
=
"state"
,
size
=
num_words
,
#boot_bias=True,
#boot_bias_active_type = "tanh",
)
predict_word_memory
=
Memory
(
name
=
"predict_word"
,
size
=
num_words
,
boot_with_const_id
=
0
, )
MixedLayer
(
name
=
"word_embedding"
,
size
=
num_words
,
# word embedding dim is the same as num_words in this test.
bias
=
False
,
inputs
=
TableProjection
(
predict_word_memory
,
initial_std
=
1
,
learning_rate
=
0
,
parameter_name
=
"wordvec"
))
Layer
(
# simplified RNN for testing
name
=
"state"
,
type
=
"mixed"
,
size
=
num_words
,
bias
=
False
,
inputs
=[
FullMatrixProjection
(
"word_embedding"
,
parameter_name
=
"transtable"
)])
Layer
(
name
=
"output"
,
type
=
"mixed"
,
size
=
num_words
,
active_type
=
"exponential"
,
bias
=
False
,
inputs
=
TransposedFullMatrixProjection
(
"state"
,
initial_std
=
1
,
learning_rate
=
0
,
parameter_name
=
"wordvec"
), )
Layer
(
name
=
"predict_word"
,
type
=
"maxid"
,
inputs
=[
"output"
], )
Layer
(
name
=
"eos_check"
,
type
=
"eos_id"
,
eos_id
=
num_words
-
1
,
inputs
=[
"predict_word"
], )
RecurrentLayerGroupEnd
(
"decoding_layer_group"
)
Evaluator
(
name
=
"answer_printer"
,
type
=
"seq_text_printer"
,
dict_file
=
"./trainer/tests/test_gen_dict.txt"
,
result_file
=
"./trainer/tests/dump_text.test"
,
inputs
=[
"sent_id"
,
"predict_word"
,
], )
dummy_data
=
data_layer
(
name
=
"dummy_data_input"
,
size
=
2
)
gen_inputs
= [
StaticInput
(
input
=
dummy_data
,
size
=
2
),
GeneratedInput
(
size
=
num_words
,
embedding_name
=
"wordvec"
,
embedding_size
=
num_words
)]
def
step
(
dummy_memory
,
predict_word
):
# simplified RNN for testing
with
mixed_layer
(
size
=
num_words
)
as
layer
:
layer
+=
full_matrix_projection
(
input
=
predict_word
,
param_attr
=
ParamAttr
(
name
=
"transtable"
))
with
mixed_layer
(
size
=
num_words
,
act
=
ExpActivation
())
as
out
:
out
+=
trans_full_matrix_projection
(
input
=
layer
,
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
return
out
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
step
=
step
,
input
=
gen_inputs
,
id_input
=
sent_id
,
dict_file
=
"./trainer/tests/test_gen_dict.txt"
,
result_file
=
"./trainer/tests/dump_text.test"
,
bos_id
=
0
,
eos_id
=
num_words
-
1
,
beam_size
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
max_length
=
10
)
#outputs(beam_gen)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
Inputs
(
"sent_id"
,
"dummy_data_input"
)
Outputs
(
"__beam_search_predict__"
)
paddle/utils/PythonUtil.h
浏览文件 @
9795e016
...
...
@@ -183,10 +183,21 @@ public:
/**
* Get bool attribute.
* @param field
* @param [out] isBoolType return true if attribute is bool type. If the
* attribute is not bool type, then an implicit
* conversion will happens, and will return the
* conversion result.
*
* Such as, if the attribute is 1, then the return
* value of function will be true, but the isBoolType
* will return false.
* @return
*/
bool
getBoolAttr
(
const
std
::
string
&
field
)
const
{
bool
getBoolAttr
(
const
std
::
string
&
field
,
bool
*
isBoolType
=
nullptr
)
const
{
PyObjectPtr
tmp
(
getAttr
(
field
));
if
(
isBoolType
)
{
*
isBoolType
=
PyBool_Check
(
tmp
.
get
());
}
return
PyObject_IsTrue
(
tmp
.
get
());
}
...
...
@@ -266,6 +277,15 @@ public:
this
->
set
(
key
,
PyBool_FromLong
(
b
));
}
void
setStringList
(
const
std
::
string
&
key
,
const
std
::
vector
<
std
::
string
>&
items
)
{
auto
*
list
=
PyList_New
(
items
.
size
());
for
(
size_t
i
=
0
;
i
<
items
.
size
();
++
i
)
{
PyList_SetItem
(
list
,
i
,
PyString_FromString
(
items
[
i
].
c_str
()));
}
this
->
set
(
key
,
list
);
}
private:
inline
void
checkDict
()
{
CHECK
(
PyDict_Check
(
this
->
dict_
));
...
...
proto/ModelConfig.proto.m4
浏览文件 @
9795e016
...
...
@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4')
optional
bool
norm_by_times
=
25
;
//
for
CostLayers
optional
real
coeff
=
26
;
optional
real
coeff
=
26
[
default
=
1.0
]
;
//
for
AverageLayer
//
can
be
set
to
:
'average'
,
'sum'
or
'squarerootn'
...
...
proto/ParameterConfig.proto.m4
浏览文件 @
9795e016
...
...
@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig {
message
ParameterConfig
{
required
string
name
=
1
;
required
uint64
size
=
2
;
required
real
learning_rate
=
3
;
required
real
momentum
=
4
;
optional
real
learning_rate
=
3
[
default
=
1.0
]
;
optional
real
momentum
=
4
[
default
=
0.0
]
;
optional
real
initial_mean
=
5
[
default
=
0.0
];
optional
real
initial_std
=
6
[
default
=
0.01
];
//
use
L2
-
regularization
if
decay_rate
set
and
decay_rate_l1
not
set
...
...
@@ -54,8 +54,8 @@ message ParameterConfig {
optional
int32
num_batches_regularization
=
13
[
default
=
1
];
//
if
is_sparse
is
true
,
para
is
sparse
,
else
para
is
dense
optional
bool
is_sparse
=
14
[
default
=
false
];
//
if
para
is
sparse
,
format
should
be
"csc"
or
"csr"
optional
string
format
=
15
[
default
=
"csr
"
];
//
if
para
is
sparse
,
format
should
be
"csc"
or
"csr"
,
empty
means
is
not
sparse
optional
string
format
=
15
[
default
=
"
"
];
//
sparse
remote
update
or
not
optional
bool
sparse_remote_update
=
16
[
default
=
false
];
//
gradient
clipping
threshold
,
no
clipping
by
default
...
...
python/CMakeLists.txt
浏览文件 @
9795e016
set
(
OUTPUT_DIR
"
${
CMAKE_CURRENT_BINARY_DIR
}
/build"
)
file
(
GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py
)
file
(
GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py
)
file
(
GLOB UTILS_PY_FILES . ./paddle/utils/*.py
)
set
(
PY_FILES paddle/__init__.py
${
TRAINER_PY_FILES
}
${
HELPERS_PY_FILES
}
${
UTILS_PY_FILES
}
)
set
(
PADDLE_INTERNAL_PACKAGE
""
)
if
(
PADDLE_WITH_INTERNAL
)
...
...
@@ -13,7 +21,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command
(
OUTPUT
${
OUTPUT_DIR
}
/.timestamp
COMMAND
${
PYTHON_EXECUTABLE
}
setup.py bdist_wheel
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
OUTPUT_DIR
}
/.timestamp
DEPENDS gen_proto_py
)
DEPENDS gen_proto_py
${
PY_FILES
}
)
add_custom_target
(
paddle_python ALL DEPENDS
${
OUTPUT_DIR
}
/.timestamp
)
...
...
python/paddle/trainer/PyDataProvider2.py
浏览文件 @
9795e016
...
...
@@ -14,6 +14,13 @@
import
cPickle
import
logging
import
collections
import
functools
import
itertools
logging
.
basicConfig
(
format
=
"[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
" %(message)s"
)
class
SequenceType
(
object
):
...
...
@@ -68,30 +75,39 @@ sparse_binary_vector = sparse_non_value_slot
sparse_vector
=
sparse_value_slot
integer_value
=
index_slot
def
dense_vector_sequence
(
dim
):
return
dense_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
def
dense_vector_sub_sequence
(
dim
):
return
dense_vector
(
dim
,
seq_type
=
SequenceType
.
SUB_SEQUENCE
)
def
sparse_binary_vector_sequence
(
dim
):
return
sparse_binary_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
def
sparse_binary_vector_sub_sequence
(
dim
):
return
sparse_binary_vector
(
dim
,
seq_type
=
SequenceType
.
SUB_SEQUENCE
)
def
sparse_vector_sequence
(
dim
):
return
sparse_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
def
sparse_vector_sub_sequence
(
dim
):
return
sparse_vector
(
dim
,
seq_type
=
SequenceType
.
SUB_SEQUENCE
)
def
integer_value_sequence
(
dim
):
return
integer_value
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
def
integer_value_sub_sequence
(
dim
):
return
integer_value
(
dim
,
seq_type
=
SequenceType
.
SUB_SEQUENCE
)
def
integer_sequence
(
dim
):
return
index_slot
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
...
...
@@ -102,13 +118,97 @@ class SingleSlotWrapper(object):
def
__call__
(
self
,
obj
,
filename
):
for
item
in
self
.
generator
(
obj
,
filename
):
yield
[
item
]
if
isinstance
(
item
,
dict
):
yield
item
else
:
yield
[
item
]
def
provider
(
input_types
=
None
,
should_shuffle
=
True
,
pool_size
=-
1
,
class
InputOrderWrapper
(
object
):
def
__init__
(
self
,
generator
,
input_order
):
self
.
generator
=
generator
self
.
input_order
=
input_order
def
__call__
(
self
,
obj
,
filename
):
for
item
in
self
.
generator
(
obj
,
filename
):
if
isinstance
(
item
,
dict
):
yield
[
item
.
get
(
input_name
,
None
)
for
input_name
in
self
.
input_order
]
else
:
yield
item
class
CheckWrapper
(
object
):
def
__init__
(
self
,
generator
,
input_types
,
check_fail_continue
,
logger
):
self
.
generator
=
generator
self
.
input_types
=
input_types
self
.
check_fail_continue
=
check_fail_continue
self
.
logger
=
logger
def
__call__
(
self
,
obj
,
filename
):
for
items
in
self
.
generator
(
obj
,
filename
):
try
:
assert
len
(
items
)
==
len
(
self
.
input_types
)
assert
len
(
filter
(
lambda
x
:
x
is
None
,
items
))
==
0
for
item
,
input_type
in
itertools
.
izip
(
items
,
self
.
input_types
):
callback
=
functools
.
partial
(
CheckWrapper
.
loop_callback
,
input_type
)
for
_
in
xrange
(
input_type
.
seq_type
):
callback
=
functools
.
partial
(
CheckWrapper
.
loop_check
,
callback
)
callback
(
item
)
yield
items
except
AssertionError
as
e
:
self
.
logger
.
warning
(
"Item (%s) is not fit the input type with error %s"
%
(
repr
(
item
),
repr
(
e
)))
if
self
.
check_fail_continue
:
continue
else
:
raise
@
staticmethod
def
loop_callback
(
input_type
,
each
):
assert
isinstance
(
input_type
,
InputType
)
if
input_type
.
type
==
DataType
.
Dense
:
assert
isinstance
(
each
,
collections
.
Sequence
)
for
d
in
each
:
assert
isinstance
(
d
,
float
)
assert
len
(
each
,
input_type
.
dim
)
elif
input_type
.
type
==
DataType
.
Index
:
assert
isinstance
(
each
,
int
)
assert
each
<
input_type
.
dim
elif
input_type
.
type
==
DataType
.
SparseNonValue
\
or
input_type
.
type
==
DataType
.
SparseValue
:
assert
isinstance
(
each
,
collections
.
Sequence
)
sparse_id
=
set
()
for
k
in
each
:
if
input_type
.
type
==
DataType
.
SparseValue
:
k
,
v
=
k
assert
isinstance
(
v
,
float
)
assert
isinstance
(
k
,
int
)
assert
k
<
input_type
.
dim
sparse_id
.
add
(
k
)
assert
len
(
sparse_id
)
==
len
(
each
)
else
:
raise
RuntimeError
(
"Not support input type"
)
@
staticmethod
def
loop_check
(
callback
,
item
):
for
each
in
item
:
callback
(
each
)
def
provider
(
input_types
=
None
,
should_shuffle
=
None
,
pool_size
=-
1
,
min_pool_size
=-
1
,
can_over_batch_size
=
True
,
calc_batch_size
=
None
,
cache
=
CacheType
.
NO_CACHE
,
check
=
False
,
check_fail_continue
=
False
,
use_dynamic_order
=
True
,
init_hook
=
None
,
**
kwargs
):
"""
Provider decorator. Use it to make a function into PyDataProvider2 object.
...
...
@@ -130,30 +230,63 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1,
:param input_types: Specify the input types, can also be set in init_hook.
It is a list of InputType object. For example, input_types=
\
[dense_vector(9), integer_value(2)].
:param should_shuffle: True if data should shuffle.
:type input_types: list|tuple
:param should_shuffle: True if data should shuffle. Pass None means shuffle
when is training and not to shuffle when is testing.
:type should_shuffle: bool
:param pool_size: Max number of sample in data pool.
:type pool_size: int
:param min_pool_size: Set minimal sample in data pool. The PaddlePaddle will
random pick sample in pool. So the min_pool_size
effect the randomize of data.
:type min_pool_size: int
:param can_over_batch_size: True if paddle can return a mini-batch larger
than batch size in settings. It is useful when
custom calculate one sample's batch_size.
It is very danger to set it to false and use
calc_batch_size together. Default is false.
:type can_over_batch_size: bool
:param calc_batch_size: a method to calculate each sample's batch size.
Default each sample's batch size is 1. But to you
can customize each sample's batch size.
:type calc_batch_size: callable
:param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE
:type cache: int
:param init_hook: Initialize hook. Useful when data provider need load some
external data like dictionary. The parameter is
(settings, file_list, \*\*kwargs).
- settings\: Is the global settings. User can set
settings.input_types here.
- file_list\: All file names for passed to data provider.
- kwargs: Other keyword arguments passed from
- settings. It is the global settings object. User can set
settings.input_types here.
- file_list. All file names for passed to data provider.
- is_train. Is this data provider used for training or not.
- kwargs. Other keyword arguments passed from
trainer_config's args parameter.
:type init_hook: callable
:param check: Check the yield data format is as same as input_types. Enable
this will make data provide process slow but it is very useful
for debug. Default is disabled.
:type check: bool
:param check_fail_continue: Continue train or not when check failed. Just
drop the wrong format data when it is True. Has
no effect when check set to False.
:type check_fail_continue: bool
:param use_dynamic_order: Allow provider to yield a dictionary object, whose
key is a input data layer name, and value is the
feature value. The tuples are still allowed when
use_dynmaic_order is True.
:type use_dynamic_order: bool
"""
def
__wrapper__
(
generator
):
...
...
@@ -168,12 +301,38 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1,
self
.
slots
=
kwargs
[
'slots'
]
self
.
slots
=
input_types
self
.
should_shuffle
=
should_shuffle
true_table
=
[
1
,
't'
,
'true'
,
'on'
]
false_table
=
[
0
,
'f'
,
'false'
,
'off'
]
if
not
isinstance
(
self
.
should_shuffle
,
bool
)
and
\
self
.
should_shuffle
is
not
None
:
if
isinstance
(
self
.
should_shuffle
,
basestring
):
self
.
should_shuffle
=
self
.
should_shuffle
.
lower
()
if
self
.
should_shuffle
in
true_table
:
self
.
should_shuffle
=
True
elif
self
.
should_shuffle
in
false_table
:
self
.
should_shuffle
=
False
else
:
self
.
logger
.
warning
(
"Could not recognize should_shuffle (%s), "
"just use default value of should_shuffle."
" Please set should_shuffle to bool value or "
"something in %s"
%
(
repr
(
self
.
should_shuffle
),
repr
(
true_table
+
false_table
)))
self
.
should_shuffle
=
None
self
.
pool_size
=
pool_size
self
.
can_over_batch_size
=
can_over_batch_size
self
.
calc_batch_size
=
calc_batch_size
self
.
file_list
=
file_list
self
.
generator
=
generator
self
.
cache
=
cache
self
.
min_pool_size
=
min_pool_size
self
.
input_order
=
kwargs
[
'input_order'
]
self
.
check
=
check
if
init_hook
is
not
None
:
init_hook
(
self
,
file_list
=
file_list
,
**
kwargs
)
if
self
.
input_types
is
not
None
:
...
...
@@ -184,6 +343,15 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1,
if
len
(
self
.
slots
)
==
1
:
self
.
generator
=
SingleSlotWrapper
(
self
.
generator
)
if
use_dynamic_order
:
self
.
generator
=
InputOrderWrapper
(
self
.
generator
,
self
.
input_order
)
if
self
.
check
:
self
.
generator
=
CheckWrapper
(
self
.
generator
,
self
.
slots
,
check_fail_continue
,
self
.
logger
)
return
DataProvider
return
__wrapper__
...
...
@@ -196,3 +364,4 @@ def deserialize_args(args):
:return:
"""
return
cPickle
.
loads
(
args
)
python/paddle/trainer/config_parser.py
浏览文件 @
9795e016
...
...
@@ -114,15 +114,15 @@ g_layer_type_map = {}
# Initialize global variables. We use this function so that we can
# call parse_config() multiple times
def
init_config_environment
(
g_default_momentum
=
0.
,
g_default_decay_rate
=
0.
,
g_default_momentum
=
None
,
g_default_decay_rate
=
None
,
g_default_initial_mean
=
0.
,
g_default_initial_std
=
0.01
,
g_default_num_batches_regularization
=
1
,
g_default_num_batches_regularization
=
None
,
g_default_initial_strategy
=
0
,
g_default_initial_smart
=
False
,
g_default_gradient_clipping_threshold
=
0.
,
g_default_device
=
-
1
,
g_default_gradient_clipping_threshold
=
None
,
g_default_device
=
None
,
g_default_update_hooks
=
None
,
g_default_compact_func
=
None
,
...
...
@@ -1099,12 +1099,12 @@ def Evaluator(
inputs
,
chunk_scheme
=
None
,
num_chunk_types
=
None
,
classification_threshold
=
0.5
,
positive_label
=
-
1
,
dict_file
=
""
,
result_file
=
""
,
num_results
=
1
,
delimited
=
Tru
e
,
classification_threshold
=
None
,
positive_label
=
None
,
dict_file
=
None
,
result_file
=
None
,
num_results
=
None
,
delimited
=
Non
e
,
):
evaluator
=
g_config
.
model_config
.
evaluators
.
add
()
evaluator
.
type
=
type
...
...
@@ -1120,12 +1120,19 @@ def Evaluator(
evaluator
.
num_chunk_types
=
num_chunk_types
g_current_submodel
.
evaluator_names
.
append
(
evaluator
.
name
)
evaluator
.
classification_threshold
=
classification_threshold
evaluator
.
positive_label
=
positive_label
evaluator
.
dict_file
=
dict_file
evaluator
.
result_file
=
result_file
evaluator
.
num_results
=
num_results
evaluator
.
delimited
=
delimited
if
classification_threshold
is
not
None
:
evaluator
.
classification_threshold
=
classification_threshold
if
positive_label
is
not
None
:
evaluator
.
positive_label
=
positive_label
if
dict_file
is
not
None
:
evaluator
.
dict_file
=
dict_file
if
result_file
is
not
None
:
evaluator
.
result_file
=
result_file
if
num_results
is
not
None
:
evaluator
.
num_results
=
num_results
if
delimited
is
not
None
:
evaluator
.
delimited
=
delimited
class
LayerBase
(
object
):
def
__init__
(
...
...
@@ -1137,7 +1144,7 @@ class LayerBase(object):
device
=
None
,
active_type
=
""
,
drop_rate
=
0.
,
coeff
=
1.
):
coeff
=
None
):
config_assert
(
'@'
not
in
name
,
"layer name: %s contain special character @"
%
name
)
global
g_current_submodel
...
...
@@ -1155,10 +1162,12 @@ class LayerBase(object):
self
.
inputs
=
[
self
.
inputs
]
self
.
config
=
g_config
.
model_config
.
layers
.
add
()
assert
isinstance
(
self
.
config
,
LayerConfig
)
self
.
config
.
name
=
name
self
.
config
.
type
=
type
self
.
config
.
active_type
=
active_type
self
.
config
.
coeff
=
coeff
if
coeff
is
not
None
:
self
.
config
.
coeff
=
float
(
coeff
)
if
size
!=
0
:
self
.
config
.
size
=
size
if
drop_rate
!=
0
:
...
...
@@ -1166,7 +1175,7 @@ class LayerBase(object):
if
device
is
not
None
:
self
.
config
.
device
=
device
el
s
e
:
el
if
g_default_device
is
not
Non
e
:
self
.
config
.
device
=
g_default_device
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
...
...
@@ -1236,10 +1245,12 @@ class LayerBase(object):
if
bias
.
parameter_name
is
None
:
bias
.
parameter_name
=
gen_bias_parameter_name
(
self
.
config
.
name
)
if
bias
.
parameter_name
not
in
g_parameter_map
:
assert
isinstance
(
self
.
config
,
LayerConfig
)
Parameter
(
bias
.
parameter_name
,
size
,
self
.
config
.
device
,
self
.
config
.
device
if
self
.
config
.
HasField
(
'device'
)
else
None
,
dims
,
bias
.
learning_rate
,
bias
.
momentum
,
...
...
@@ -1265,7 +1276,7 @@ class LayerBase(object):
input_index
,
size
,
dims
=
None
,
sparse
=
Fals
e
,
sparse
=
Non
e
,
format
=
"csr"
):
if
dims
is
None
:
# TODO(yuyang18): print warning and callstack here!
...
...
@@ -1293,7 +1304,7 @@ class LayerBase(object):
Parameter
(
input_config
.
parameter_name
,
size
,
self
.
config
.
device
,
self
.
config
.
device
if
self
.
config
.
HasField
(
"device"
)
else
None
,
dims
,
input_config
.
learning_rate
,
input_config
.
momentum
,
...
...
@@ -1353,6 +1364,8 @@ class FCLayer(LayerBase):
if
sparse
:
psize
=
self
.
inputs
[
input_index
].
nnz
else
:
sparse
=
None
self
.
create_input_parameter
(
input_index
,
psize
,
dims
,
sparse
,
format
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
...
...
@@ -2430,7 +2443,6 @@ class MixedLayer(LayerBase):
config_assert
(
inputs
,
'inputs cannot be empty'
)
super
(
MixedLayer
,
self
).
__init__
(
name
,
'mixed'
,
size
,
inputs
=
inputs
,
**
xargs
)
operator_input_index
=
[]
for
operator
in
self
.
operators
:
operator_conf
=
operator
.
operator_conf
...
...
@@ -2445,21 +2457,31 @@ class MixedLayer(LayerBase):
input_layer
=
self
.
get_input_layer
(
input_index
)
operator_conf
.
input_sizes
.
append
(
input_layer
.
size
)
operator_input_index
.
append
(
input_index
)
if
self
.
config
.
size
==
0
:
if
self
.
config
.
size
==
0
:
size
=
operator
.
calc_output_size
(
operator_conf
.
input_sizes
)
if
size
!=
0
:
self
.
set_layer_size
(
size
)
else
:
size
=
operator
.
calc_output_size
(
operator_conf
.
input_sizes
)
if
size
!=
0
:
config_assert
(
size
==
self
.
config
.
size
,
"different inputs have different size: %s vs. %s"
%
(
size
,
self
.
config
.
size
))
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
input_layer
=
self
.
get_input_layer
(
input_index
)
input
=
self
.
inputs
[
input_index
]
if
input_index
not
in
operator_input_index
:
config_assert
(
isinstance
(
input
,
Projection
),
"input should be projection or operation"
)
if
self
.
config
.
size
==
0
and
isinstance
(
input
,
Projection
):
if
self
.
config
.
size
==
0
and
isinstance
(
input
,
Projection
):
size
=
input
.
calc_output_size
(
input_layer
)
if
size
!=
0
:
self
.
set_layer_size
(
size
)
elif
isinstance
(
input
,
Projection
):
sz
=
input
.
calc_output_size
(
input_layer
)
if
sz
!=
0
:
config_assert
(
sz
==
self
.
config
.
size
,
"different inputs have different size: %s vs. %s"
%
(
sz
,
self
.
config
.
size
))
config_assert
(
size
!=
0
,
"size is not set"
)
for
input_index
in
xrange
(
len
(
self
.
inputs
)):
...
...
@@ -2827,27 +2849,44 @@ def Parameter(
para
=
g_config
.
model_config
.
parameters
.
add
()
para
.
name
=
name
para
.
size
=
size
para
.
device
=
device
para
.
dims
.
extend
(
dims
);
para
.
learning_rate
=
default
(
learning_rate
,
1.
)
para
.
momentum
=
default
(
momentum
,
g_default_momentum
)
if
device
is
not
None
:
para
.
device
=
int
(
device
)
para
.
dims
.
extend
(
dims
)
if
learning_rate
is
not
None
:
para
.
learning_rate
=
float
(
learning_rate
)
momentum
=
default
(
momentum
,
g_default_momentum
)
if
momentum
is
not
None
:
para
.
momentum
=
float
(
momentum
)
config_assert
(
not
momentum
or
not
decay_rate_l1
,
"momentum and decay_rate_l1 cannot both be non-zero"
)
para
.
decay_rate
=
default
(
decay_rate
,
g_default_decay_rate
)
decay_rate
=
default
(
decay_rate
,
g_default_decay_rate
)
if
decay_rate
is
not
None
:
para
.
decay_rate
=
decay_rate
if
decay_rate_l1
is
not
None
:
para
.
decay_rate_l1
=
decay_rate_l1
para
.
initial_std
=
default
(
initial_std
,
g_default_initial_std
)
para
.
initial_mean
=
default
(
initial_mean
,
g_default_initial_mean
)
para
.
num_batches_regularization
=
default
(
num_batches_regularization
=
default
(
num_batches_regularization
,
g_default_num_batches_regularization
)
if
num_batches_regularization
is
not
None
:
para
.
num_batches_regularization
=
int
(
num_batches_regularization
)
if
sparse_remote_update
is
not
None
:
para
.
sparse_remote_update
=
sparse_remote_update
if
sparse_remote_update
:
g_config
.
opt_config
.
use_sparse_remote_updater
=
True
if
sparse_update
is
not
None
:
para
.
sparse_update
=
sparse_update
para
.
gradient_clipping_threshold
=
default
(
gradient_clipping_threshold
,
g_default_gradient_clipping_threshold
);
gradient_clipping_threshold
=
default
(
gradient_clipping_threshold
,
g_default_gradient_clipping_threshold
)
if
gradient_clipping_threshold
is
not
None
:
para
.
gradient_clipping_threshold
=
gradient_clipping_threshold
para
.
initial_strategy
=
default
(
initial_strategy
,
g_default_initial_strategy
)
para
.
initial_smart
=
default
(
initial_smart
,
g_default_initial_smart
)
if
para
.
initial_smart
:
...
...
@@ -2860,15 +2899,19 @@ def Parameter(
para
.
initial_std
=
1.
/
math
.
sqrt
(
para
.
size
)
if
g_default_compact_func
is
not
None
:
sparse
,
format
,
need_compact
=
g_default_compact_func
(
para
.
name
)
para
.
is_sparse
=
default
(
sparse
,
False
)
para
.
format
=
default
(
format
,
""
)
para
.
need_compact
=
default
(
need_compact
,
False
)
if
sparse
is
not
None
:
para
.
is_sparse
=
sparse
if
format
is
not
None
:
para
.
format
=
format
if
need_compact
is
not
None
:
para
.
need_compact
=
need_compact
if
is_static
is
not
None
:
para
.
is_static
=
is_static
config_assert
(
not
para
.
sparse_remote_update
or
not
para
.
is_static
,
"sparse_remote_update and is_static cannot both be true"
)
para
.
is_shared
=
default
(
is_shared
,
False
)
if
is_shared
is
not
None
:
para
.
is_shared
=
is_shared
update_hooks
=
default
(
update_hooks
,
g_default_update_hooks
)
...
...
python/paddle/trainer_config_helpers/activations.py
浏览文件 @
9795e016
...
...
@@ -14,7 +14,7 @@
__all__
=
[
"TanhActivation"
,
"SigmoidActivation"
,
"SoftmaxActivation"
,
"IdentityActivation"
,
"LinearActivation"
,
'SequenceSoftmaxActivation'
,
'SequenceSoftmaxActivation'
,
'ExpActivation'
,
"ReluActivation"
,
"BReluActivation"
,
"SoftReluActivation"
,
"STanhActivation"
,
"AbsActivation"
,
"SquareActivation"
,
"BaseActivation"
]
...
...
@@ -185,3 +185,12 @@ class SquareActivation(BaseActivation):
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'square'
,
False
)
class
ExpActivation
(
BaseActivation
):
"""
Exponential Activation.
.. math::
f(z) = e^z.
"""
def
__init__
(
self
):
BaseActivation
.
__init__
(
self
,
'exponential'
,
False
)
python/paddle/trainer_config_helpers/evaluators.py
浏览文件 @
9795e016
...
...
@@ -65,12 +65,12 @@ def evaluator_base(
name
=
None
,
chunk_scheme
=
None
,
num_chunk_types
=
None
,
classification_threshold
=
0.5
,
positive_label
=
-
1
,
dict_file
=
""
,
result_file
=
""
,
num_results
=
1
,
delimited
=
Tru
e
):
classification_threshold
=
None
,
positive_label
=
None
,
dict_file
=
None
,
result_file
=
None
,
num_results
=
None
,
delimited
=
Non
e
):
"""
Evaluator will evaluate the network status while training/testing.
...
...
@@ -105,9 +105,10 @@ def evaluator_base(
:type weight: LayerOutput.
"""
# inputs type assertions.
assert
isinstance
(
classification_threshold
,
float
)
assert
isinstance
(
positive_label
,
int
)
assert
isinstance
(
num_results
,
int
)
assert
classification_threshold
is
None
or
isinstance
(
classification_threshold
,
float
)
assert
positive_label
is
None
or
isinstance
(
positive_label
,
int
)
assert
num_results
is
None
or
isinstance
(
num_results
,
int
)
if
not
isinstance
(
input
,
list
):
input
=
[
input
]
...
...
@@ -136,7 +137,7 @@ def classification_error_evaluator(
label
,
name
=
None
,
weight
=
None
,
threshold
=
0.5
):
threshold
=
None
):
"""
Classification Error Evaluator. It will print error rate for classification.
...
...
@@ -253,7 +254,7 @@ def pnpair_evaluator(
def
precision_recall_evaluator
(
input
,
label
,
positive_label
=
-
1
,
positive_label
=
None
,
weight
=
None
,
name
=
None
,
):
...
...
@@ -494,7 +495,7 @@ def gradient_printer_evaluator(
@
wrap_name_default
()
def
maxid_printer_evaluator
(
input
,
num_results
=
1
,
num_results
=
None
,
name
=
None
,
):
"""
...
...
@@ -518,13 +519,14 @@ def maxid_printer_evaluator(
"""
evaluator_base
(
name
=
name
,
type
=
"max_id_printer"
,
input
=
input
)
input
=
input
,
num_results
=
num_results
)
@
evaluator
(
EvaluatorAttribute
.
FOR_PRINT
)
@
wrap_name_default
()
def
maxframe_printer_evaluator
(
input
,
num_results
=
1
,
num_results
=
None
,
name
=
None
,
):
"""
...
...
@@ -556,9 +558,9 @@ def maxframe_printer_evaluator(
@
wrap_name_default
()
def
seqtext_printer_evaluator
(
input
,
dict_file
=
""
,
result_file
=
""
,
delimited
=
Tru
e
,
result_file
,
dict_file
=
None
,
delimited
=
Non
e
,
name
=
None
,
):
"""
...
...
@@ -616,6 +618,7 @@ def seqtext_printer_evaluator(
:param name: Evaluator name.
:type name: None|basestring
"""
assert
isinstance
(
result_file
,
basestring
)
evaluator_base
(
name
=
name
,
type
=
"seq_text_printer"
,
input
=
input
,
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
9795e016
...
...
@@ -28,7 +28,7 @@ except ImportError:
import
copy
__all__
=
[
"full_matrix_projection"
,
"AggregateLevel"
,
"ExpandLevel"
,
"identity_projection"
,
"dotmul_projection"
,
"identity_projection"
,
"dotmul_projection"
,
"dotmul_operator"
,
"table_projection"
,
"mixed_layer"
,
"data_layer"
,
"embedding_layer"
,
"fc_layer"
,
"grumemory"
,
"pooling_layer"
,
"lstmemory"
,
"last_seq"
,
"first_seq"
,
...
...
@@ -389,7 +389,7 @@ def identity_projection(input, offset=None):
@
wrap_param_attr_default
()
def
dotmul_projection
(
input
,
param_attr
=
None
,
scale
=
1
):
"""
1. DotMulProjection if input is a layer
.
DotMulProjection with a layer as input
.
It performs element-wise multiplication with weight.
.. math::
...
...
@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1):
proj = dotmul_projection(input=layer)
2. DotMulOperator if input is a list or tuple.
It takes two inputs, performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_projection(input=[layer1, layer2],
scale=2.0)
:param input: Input layer.
:type input: LayerOutput
|list|tuple
:type input: LayerOutput
:param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulProjection
or DotMulOperator
Object.
:rtype: DotMulProjection
or DotMulOperator
:return: A DotMulProjection Object.
:rtype: DotMulProjection
"""
if
isinstance
(
input
,
LayerOutput
):
proj
=
DotMulProjection
(
input_layer_name
=
input
.
name
,
proj
=
DotMulProjection
(
input_layer_name
=
input
.
name
,
size
=
input
.
size
,
**
param_attr
.
attr
)
proj
.
origin
=
input
proj
.
origin
.
projection
=
"dot_mul"
return
proj
else
:
assert
isinstance
(
input
,
list
)
or
isinstance
(
input
,
tuple
)
assert
len
(
input
)
==
2
assert
param_attr
is
None
op
=
DotMulOperator
(
input_layer_name
=
[
x
.
name
for
x
in
input
],
scale
=
scale
)
op
.
origin
=
input
op
.
origin
.
operator
=
"dot_mul"
return
op
proj
.
origin
=
input
return
proj
def
dotmul_operator
(
x
,
y
,
scale
=
1
):
"""
DotMulOperator takes two inputs and performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_operator(x, y,
scale=1)
:param input: Input layer
:type input: LayerOutput
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulOperator Object.
:rtype: DotMulOperator
"""
assert
isinstance
(
x
,
LayerOutput
)
assert
isinstance
(
y
,
LayerOutput
)
op
=
DotMulOperator
(
input_layer_names
=
[
x
.
name
,
y
.
name
],
scale
=
scale
)
op
.
origin
=
[
x
,
y
]
return
op
@
wrap_bias_attr_default
([
'padding_attr'
])
def
context_projection
(
input
,
context_len
,
context_start
=
None
,
...
...
@@ -539,7 +536,10 @@ class MixedLayerType(LayerOutput):
if
not
self
.
finalized
:
assert
isinstance
(
other
,
Projection
)
or
isinstance
(
other
,
Operator
)
self
.
inputs
.
append
(
other
)
self
.
parents
.
append
(
other
.
origin
)
if
isinstance
(
other
,
Projection
):
self
.
parents
.
append
(
other
.
origin
)
else
:
self
.
parents
.
extend
(
other
.
origin
)
return
self
else
:
raise
MixedLayerType
.
AddToSealedMixedLayerException
()
...
...
@@ -565,7 +565,7 @@ class MixedLayerType(LayerOutput):
@
wrap_act_default
(
act
=
LinearActivation
())
@
wrap_bias_attr_default
(
has_bias
=
False
)
@
layer_support
(
ERROR_CLIPPING
,
DROPOUT
)
def
mixed_layer
(
size
,
input
=
None
,
name
=
None
,
act
=
None
,
bias_attr
=
False
,
def
mixed_layer
(
size
=
0
,
input
=
None
,
name
=
None
,
act
=
None
,
bias_attr
=
False
,
layer_attr
=
None
):
"""
Mixed Layer. A mixed layer will add all inputs together, then activate.
...
...
python/paddle/trainer_config_helpers/optimizers.py
浏览文件 @
9795e016
...
...
@@ -79,7 +79,7 @@ class MomentumOptimizer(BaseSGDOptimizer):
'learning_method'
:
'momentum'
}
def
__init__
(
self
,
momentum
=
1e-3
):
def
__init__
(
self
,
momentum
=
None
):
self
.
momentum
=
momentum
...
...
python/paddle/trainer_config_helpers/tests/layers_test_config.py
浏览文件 @
9795e016
...
...
@@ -38,8 +38,11 @@ print_layer(input=[out])
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
dotmul
=
mixed_layer
(
input
=
[
dotmul_operator
(
x
=
x1
,
y
=
y1
),
dotmul_projection
(
input
=
y1
)])
# for ctc
tmp
=
fc_layer
(
input
=
x1
,
tmp
=
fc_layer
(
input
=
[
x1
,
dotmul
]
,
size
=
num_classes
+
1
,
act
=
SoftmaxActivation
())
ctc
=
ctc_layer
(
input
=
tmp
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录