提交 9795e016 编写于 作者: L liaogang

Merge remote-tracking branch 'upstream/master'

...@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8) ...@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C) project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0) set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8) set(PADDLE_MINOR_VERSION 8)
set(PADDLE_PATCH_VERSION 0b0) set(PADDLE_PATCH_VERSION 0b1)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION}) set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
...@@ -15,7 +15,7 @@ find_package(Protobuf REQUIRED) ...@@ -15,7 +15,7 @@ find_package(Protobuf REQUIRED)
find_package(PythonLibs 2.7 REQUIRED) find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED) find_package(PythonInterp 2.7 REQUIRED)
find_package(ZLIB REQUIRED) find_package(ZLIB REQUIRED)
find_package(NumPy) find_package(NumPy REQUIRED)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
find_package(Glog) find_package(Glog)
find_package(Gflags QUIET) find_package(Gflags QUIET)
......
...@@ -104,10 +104,9 @@ function(link_paddle_exe TARGET_NAME) ...@@ -104,10 +104,9 @@ function(link_paddle_exe TARGET_NAME)
${PROTOBUF_LIBRARY} ${PROTOBUF_LIBRARY}
${CMAKE_THREAD_LIBS_INIT} ${CMAKE_THREAD_LIBS_INIT}
${CBLAS_LIBS} ${CBLAS_LIBS}
${INTERAL_LIBS}
${ZLIB_LIBRARIES} ${ZLIB_LIBRARIES}
${CMAKE_DL_LIBS} ${INTERAL_LIBS}
) ${CMAKE_DL_LIBS})
if(WITH_PYTHON) if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME} target_link_libraries(${TARGET_NAME}
......
...@@ -128,12 +128,16 @@ def gru_encoder_decoder(data_conf, ...@@ -128,12 +128,16 @@ def gru_encoder_decoder(data_conf,
return out return out
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
if not is_generating: if not is_generating:
trg_embedding = embedding_layer( trg_embedding = embedding_layer(
input=data_layer(name='target_language_word', input=data_layer(name='target_language_word',
size=target_dict_dim), size=target_dict_dim),
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding')) param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training, # For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input, # target embeding (the groudtruth) is the data input,
...@@ -142,22 +146,13 @@ def gru_encoder_decoder(data_conf, ...@@ -142,22 +146,13 @@ def gru_encoder_decoder(data_conf,
# for the recurrent_group. # for the recurrent_group.
decoder = recurrent_group(name=decoder_group_name, decoder = recurrent_group(name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=[ input=group_inputs)
StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), trg_embedding
])
lbl = data_layer(name='target_language_next_word', lbl = data_layer(name='target_language_next_word',
size=target_dict_dim) size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl, ) cost = classification_cost(input=decoder, label=lbl)
outputs(cost) outputs(cost)
else: else:
gen_inputs = [StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), ]
# In generation, the decoder predicts a next target word based on # In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word. # the encoded source sequence and the last generated target word.
...@@ -171,10 +166,11 @@ def gru_encoder_decoder(data_conf, ...@@ -171,10 +166,11 @@ def gru_encoder_decoder(data_conf,
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
gen_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name, beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=gen_inputs, input=group_inputs,
id_input=data_layer(name="sent_id", id_input=data_layer(name="sent_id",
size=1), size=1),
dict_file=trg_dict_path, dict_file=trg_dict_path,
......
...@@ -25,9 +25,12 @@ repo or just head straight to the command line: ...@@ -25,9 +25,12 @@ repo or just head straight to the command line:
```shell ```shell
# Clone your fork to your local machine # Clone your fork to your local machine
git clone git@github.com:USERNAME/Paddle.git git clone https://github.com/USERNAME/Paddle.git
```
Then you can start to develop by making a local developement branch
```shell
git checkout -b MY_COOL_STUFF_BRANCH origin/master
``` ```
Then you can start to develop.
## Commit ## Commit
...@@ -45,7 +48,7 @@ are the details if any. ...@@ -45,7 +48,7 @@ are the details if any.
## Keeping Fork Up to Date ## Keeping Fork Up to Date
Before pull your request, you shold sync you code from the latest PaddlePaddle. Before pull your request, you should sync your code from the latest PaddlePaddle.
To do this, you'll need to add a remote at first: To do this, you'll need to add a remote at first:
```shell ```shell
...@@ -60,8 +63,7 @@ git remote -v ...@@ -60,8 +63,7 @@ git remote -v
Update your fork with the latest upstream changes: Update your fork with the latest upstream changes:
```shell ```shell
git fetch upstream git pull --rebase upstream HEAD
git pull upstream master
``` ```
If there are no unique commits locally, git will simply perform a fast-forward. If there are no unique commits locally, git will simply perform a fast-forward.
...@@ -74,10 +76,26 @@ Now, your local master branch is up-to-date with everything modified upstream. ...@@ -74,10 +76,26 @@ Now, your local master branch is up-to-date with everything modified upstream.
```shell ```shell
# push to your repository in Github # push to your repository in Github
git push origin master git push origin HEAD
``` ```
## Pull Request ## Pull Request
Go to the page for your fork on GitHub, select your development branch, Go to the page for your fork on GitHub, select your development branch,
and click the **pull request button**. and click the **pull request button**.
## Update your pull request with the lastest version
During the code review, your pull request may become stale because new commits in
baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this
by clicking the "Update Branch" button in your pull request page. However, in the case
of conflict, you need to do the update manually. You need to do the following on
your local repository:
```shell
git checkout MY_COOL_STUFF_BRANCH
git pull --rebase upstream HEAD
# You may need to resolve the conflict according to the git prompt.
# Make and test your code.
git push -f origin HEAD
```
Now your Pull Request is updated with the latest version.
...@@ -12,6 +12,13 @@ AbsActivation ...@@ -12,6 +12,13 @@ AbsActivation
:members: AbsActivation :members: AbsActivation
:noindex: :noindex:
ExpActivation
===============
.. automodule:: paddle.trainer_config_helpers.activations
:members: ExpActivation
:noindex:
IdentityActivation IdentityActivation
================== ==================
......
...@@ -24,7 +24,7 @@ A small part of the original data as an example is shown as below: ...@@ -24,7 +24,7 @@ A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt .. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
Each line of the data contains two parts, separated by ';'. The first part is Each line of the data contains two parts, separated by :code:`;`. The first part is
label of an image. The second part contains 28x28 pixel float values. label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this: Just write path of the above data into train.list. It looks like this:
...@@ -74,7 +74,20 @@ you can take this as an example. ...@@ -74,7 +74,20 @@ you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py .. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
Here we specify training data by 'train.list', and no testing data is specified. Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
the order of :code:`data_layer` definition roughly to determine which feature to
which :code:`data_layer`. This order may be not correct, so TO DEFINE THE
:code:`data_layer`'s NAMES EXPLICITLY IS THE RECOMMANDED WAY TO PROVIDER DATA.
Now, this simple example of using PyDataProvider is finished. Now, this simple example of using PyDataProvider is finished.
The only thing that the user should know is how to generte **one sample** from The only thing that the user should know is how to generte **one sample** from
...@@ -93,7 +106,7 @@ DataProvider for the sequential model ...@@ -93,7 +106,7 @@ DataProvider for the sequential model
------------------------------------- -------------------------------------
A sequence model takes sequences as its input. A sequence is made up of several A sequence model takes sequences as its input. A sequence is made up of several
timesteps. The so-called timestep, is not necessary to have something to do timesteps. The so-called timestep, is not necessary to have something to do
with 'time'. It can also be explained to that the order of data are taken into with time. It can also be explained to that the order of data are taken into
consideration into model design and training. consideration into model design and training.
For example, the sentence can be interpreted as a kind of sequence data in NLP For example, the sentence can be interpreted as a kind of sequence data in NLP
tasks. tasks.
...@@ -155,23 +168,7 @@ Reference ...@@ -155,23 +168,7 @@ Reference
@provider @provider
+++++++++ +++++++++
'@provider' is a Python `Decorator`_, it can construct a PyDataProvider in .. autofunction:: paddle.trainer.PyDataProvider2.provider
PaddlePaddle from a user defined function. Its parameters are:
* `input_types`_ defines format of the data input.
* should_shuffle defines whether to shuffle data or not. By default, it is set
true during training, and false during testing.
* pool_size is the memory pool size (in sample number) in DataProvider.
-1 means no limit.
* can_over_batch_size defines whether PaddlePaddle can store little more
samples than pool_size. It is better to set True to avoid some deadlocks.
* calc_batch_size is a function define how to calculate batch size. This is
usefull in sequential model, that defines batch size is counted upon sequence
or token. By default, each sample or sequence counts to 1 when calculating
batch size.
* cache is a data cache strategy, see `cache`_.
* Init_hook function is invoked once the data provider is initialized,
see `init_hook`_.
input_types input_types
+++++++++++ +++++++++++
......
...@@ -4,3 +4,5 @@ define_py_data_sources2(train_list='train.list', ...@@ -4,3 +4,5 @@ define_py_data_sources2(train_list='train.list',
test_list=None, test_list=None,
module='mnist_provider', module='mnist_provider',
obj='process') obj='process')
img = data_layer(name='pixel', size=784)
label = data_layer(name='label', size=10)
from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
@provider(input_types=[
dense_vector(28 * 28),
integer_value(10)
])
def process(settings, filename): # settings is not used currently.
f = open(filename, 'r') # open one of training file
for line in f: # read each line
label, pixel = line.split(';')
# get features and label
pixels_str = pixel.split(' ')
pixels_float = []
for each_pixel_str in pixels_str:
pixels_float.append(float(each_pixel_str))
# give data to paddle.
yield { "pixel": pixels_float, 'label': int(label) }
f.close() # close file
...@@ -56,6 +56,14 @@ process函数调用多次 :code:`yield` 即可。 :code:`yield` 是Python的一 ...@@ -56,6 +56,14 @@ process函数调用多次 :code:`yield` 即可。 :code:`yield` 是Python的一
这里说明了训练数据是 'train.list',而没有测试数据。引用的DataProvider是 'mnist_provider' 这里说明了训练数据是 'train.list',而没有测试数据。引用的DataProvider是 'mnist_provider'
这个模块中的 'process' 函数。 这个模块中的 'process' 函数。
同时,根据模型配置文件中 :code:`data_layer` 的名字,用户也可以显式指定返回的数据对应关系。例如:
.. literalinclude:: mnist_provider.dict.py
:linenos:
如果用户不指定返回数据的对应关系,那么PaddlePaddle会粗略的根据layer的声明顺序,
来确定对应关系。这个对应关系可能不正确。所以推荐使用显式指定返回值和数据对应关系。
至此,简单的PyDataProvider样例就说明完毕了。对于用户来说,讲数据发送给PaddlePaddle,仅仅需要 至此,简单的PyDataProvider样例就说明完毕了。对于用户来说,讲数据发送给PaddlePaddle,仅仅需要
知道如何从 **一个文件** 里面读取 **一条** 样本。而PaddlePaddle进程帮助用户做了 知道如何从 **一个文件** 里面读取 **一条** 样本。而PaddlePaddle进程帮助用户做了
...@@ -119,11 +127,13 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数: ...@@ -119,11 +127,13 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
@provider @provider
+++++++++ +++++++++
'@provider'是一个Python的 `Decorator`_ ,他可以将某一个函数标记成一个PyDataProvider。它包含的参数有: :code:`@provider` 是一个Python的 `Decorator`_ ,他可以将某一个函数标记成一个PyDataProvider。它包含的参数有:
* `input_types`_ 是数据输入格式。具体有哪些格式,参考 `input_types`_ 。 * `input_types`_ 是数据输入格式。具体有哪些格式,参考 `input_types`_ 。
* should_shuffle 是个DataProvider是不是要做shuffle,如果不设置的话,训练的时候默认shuffle, * should_shuffle 是个DataProvider是不是要做shuffle,如果不设置的话,训练的时候默认shuffle,
测试的时候默认不shuffle 测试的时候默认不shuffle。
* min_pool_size 是设置DataProvider在内存中最小暂存的数据条数。这个也是PaddlePaddle所能够保证的shuffle粒度。
设置成-1的话,会预先读取全部数据到内存中。
* pool_size 是设置DataProvider在内存中暂存的数据条数。设置成-1的话,即不在乎内存暂存多少条数据。 * pool_size 是设置DataProvider在内存中暂存的数据条数。设置成-1的话,即不在乎内存暂存多少条数据。
* can_over_batch_size 表示是否允许Paddle暂存略微多余pool_size的数据。这样做可以避免很多死锁问题。 * can_over_batch_size 表示是否允许Paddle暂存略微多余pool_size的数据。这样做可以避免很多死锁问题。
一般推荐设置成True 一般推荐设置成True
...@@ -131,6 +141,11 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数: ...@@ -131,6 +141,11 @@ DataProvider创建的时候执行。这个初始化函数具有如下参数:
是一个batch size,但是有时为了计算均衡性,可以将一条数据设置成多个batch size 是一个batch size,但是有时为了计算均衡性,可以将一条数据设置成多个batch size
* cache 是数据缓存的策略,参考 `cache`_ * cache 是数据缓存的策略,参考 `cache`_
* init_hook 是初始化时调用的函数,参考 `init_hook`_ * init_hook 是初始化时调用的函数,参考 `init_hook`_
* use_dynamic_order 如果是true的话,可以返回一个dict,key是data_layer的名字,value是特征值。同时,也可以
返回一个list或者tuple。如果是false的话,只能够返回list或者tuple
* check 设置成true的话,会根据input_types检查数据的合法性。
* check_fail_continue 如果设置成true的话,即使在check中数据不合法,也会扔到这条数据,继续训练。 如果
check是false的话,没有作用。
input_types input_types
+++++++++++ +++++++++++
...@@ -190,3 +205,55 @@ DataProvider提供了两种简单的Cache策略。他们是 ...@@ -190,3 +205,55 @@ DataProvider提供了两种简单的Cache策略。他们是
* CacheType.NO_CACHE 不缓存任何数据,每次都会从python端读取数据 * CacheType.NO_CACHE 不缓存任何数据,每次都会从python端读取数据
* CacheType.CACHE_PASS_IN_MEM 第一个pass会从python端读取数据,剩下的pass会直接从内存里 * CacheType.CACHE_PASS_IN_MEM 第一个pass会从python端读取数据,剩下的pass会直接从内存里
读取数据。 读取数据。
注意事项
--------
可能的内存泄露问题
++++++++++++++++++
PaddlePaddle将train.list中的每一行,都传递给process函数,从而生成多个generator。
即如果train.list中,有100个训练文件,即会生成100个generator。这个本身不是一个很
严重的问题。
但是,如果在训练时,每一条训练数据都是一个文件,并且,训练数据非常多的情况下,就
会生成多个generator。每个generator在没有调用的时候,是几乎不占内存的。但是,当调
用过一次的时候,generator便会存下当前的上下文(Context)。而这个Context可能会非常
大。并且,generator至少调用两次才会知道是否停止。所以,即使在process里面只会有一
个yield,也需要两次随机选择到同样的generator的时候,才会释放该段内存。
.. code-block:: python
def func():
yield 0
f = func() # 创建generator
tmp = next(f) # 调用一次,返回0
tmp = next(f) # 调用第二次的时候,才会Stop Iteration
而如果按顺序调用这些generator就不会出现这个问题。
所以最佳实践推荐不要将每一个样本都放入train.list。而是将样本的地址放入另一个文本
文件,train.list写入那个文本文件的地址。 或者在python generator的上下文中尽量留
下非常少的变量引用。例如
.. code-block:: python
def real_process(fn):
# ... read from fn
return result # 当函数返回的时候,python可以解除掉内部变量的引用。
def process(fn):
yield real_process(fn)
这个问题是PyDataProvider读数据时候的逻辑问题,基本上不能整体修正。
内存不够用的情况
++++++++++++++++
PyDataProvider2会尽量使用内存。所以如果对于内存比较小的机器,推荐设置
:code:`pool_size` 变量,而这个变量推荐大于训练的batch size,并且在内存足够
的情况下越大越好。
...@@ -149,9 +149,13 @@ void DoubleBuffer::startAsyncLoad() { ...@@ -149,9 +149,13 @@ void DoubleBuffer::startAsyncLoad() {
taskReadySem_.post(); taskReadySem_.post();
} }
ClassRegistrar<DataProvider, DataConfig, bool> DataProvider::registrar_; ClassRegistrar<DataProvider, DataConfig, ModelConfig, bool>
DataProvider* DataProvider::create(const DataConfig& config, bool useGpu) { DataProvider::registrar_;
return registrar_.createByType(config.type(), config, useGpu);
DataProvider* DataProvider::create(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu) {
return registrar_.createByType(config.type(), config, modelConfig, useGpu);
} }
REGISTER_DATA_PROVIDER(simple, SimpleDataProvider); REGISTER_DATA_PROVIDER(simple, SimpleDataProvider);
......
...@@ -39,15 +39,30 @@ limitations under the License. */ ...@@ -39,15 +39,30 @@ limitations under the License. */
#include "paddle/parameter/Argument.h" #include "paddle/parameter/Argument.h"
namespace paddle { namespace paddle {
/** /**
* @def REGISTER_DATA_PROVIDER * @def REGISTER_DATA_PROVIDER
* @brief Macro for registering a data provider * @brief Macro for registering a data provider. The class type should contain
* a consturctor with parameter (DataConfig, bool).
*/ */
#define REGISTER_DATA_PROVIDER(__type_name, __class_name) \ #define REGISTER_DATA_PROVIDER(__type_name, __class_name)\
static InitFunction __reg_type_##__type_name([]() { \ static InitFunction __reg_type_##__type_name([]() {\
DataProvider::registrar_.registerClass<__class_name>(#__type_name); \ DataProvider::registrar_.registerClass(\
}) #__type_name, \
[](DataConfig conf, ModelConfig, bool useGpu) -> DataProvider* { \
DataProvider* dp = new __class_name (conf, useGpu);\
return dp;\
});\
})
/**
* @def REGISTER_DATA_PROVIDER_EX
* @brief Macro for registering a data provider, which contains a constructor
* with parameter (DataConfig, ModelConfig, bool).
*/
#define REGISTER_DATA_PROVIDER_EX(__type_name, __class_name) \
static InitFunction __reg_type_##__type_name([] { \
DataProvider::registrar_.registerClass<__class_name>(#__type_name); \
})
class DataBatch; class DataBatch;
class BufferBatch; class BufferBatch;
...@@ -285,10 +300,18 @@ protected: ...@@ -285,10 +300,18 @@ protected:
*/ */
class DataProvider { class DataProvider {
public: public:
static ClassRegistrar<DataProvider, DataConfig, bool> registrar_; static ClassRegistrar<DataProvider, DataConfig, ModelConfig, bool> registrar_;
static DataProvider* create(const DataConfig& config, static DataProvider* create(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu = FLAGS_use_gpu); bool useGpu = FLAGS_use_gpu);
/**
* @brief create only used for unittest.
*/
inline static DataProvider* create(const DataConfig &config, bool useGpu) {
return create(config, ModelConfig(), useGpu);
}
DataProvider(const DataConfig& config, bool useGpu) DataProvider(const DataConfig& config, bool useGpu)
: config_(config), : config_(config),
skipShuffle_(false), skipShuffle_(false),
...@@ -336,13 +359,13 @@ public: ...@@ -336,13 +359,13 @@ public:
* @note return -1 to indicate unlimited number of samples. * @note return -1 to indicate unlimited number of samples.
*/ */
virtual int64_t getSize() = 0; virtual int64_t getSize() = 0;
/** /**
* @brief Get next batch training samples internally * @brief Get next batch training samples internally
* @param[in] size size of training samples to get * @param[in] size size of training samples to get
* @param[out] batch a batch of training samples * @param[out] batch a batch of training samples
* @return actual size of obtained training samples * @return actual size of obtained training samples
*/ */
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0; virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch) = 0;
protected: protected:
......
...@@ -22,7 +22,9 @@ namespace paddle { ...@@ -22,7 +22,9 @@ namespace paddle {
using namespace std; using namespace std;
MultiDataProvider::MultiDataProvider(const DataConfig& config, bool useGpu) MultiDataProvider::MultiDataProvider(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu)
: DataProvider(config, useGpu) { : DataProvider(config, useGpu) {
bool atLeastOneMainDataFlag = false; bool atLeastOneMainDataFlag = false;
totalDataRatio_ = 0; totalDataRatio_ = 0;
...@@ -58,7 +60,9 @@ MultiDataProvider::MultiDataProvider(const DataConfig& config, bool useGpu) ...@@ -58,7 +60,9 @@ MultiDataProvider::MultiDataProvider(const DataConfig& config, bool useGpu)
subConfig.set_async_load_data(false); subConfig.set_async_load_data(false);
} }
subDataProviders_[i] = subDataProviders_[i] =
std::unique_ptr<DataProvider>(DataProvider::create(subConfig, useGpu_)); std::unique_ptr<DataProvider>(DataProvider::create(subConfig,
modelConfig,
useGpu_));
} }
} }
...@@ -116,6 +120,6 @@ int64_t MultiDataProvider::getNextBatchInternal(int64_t size, ...@@ -116,6 +120,6 @@ int64_t MultiDataProvider::getNextBatchInternal(int64_t size,
return batch->getSize(); return batch->getSize();
} }
REGISTER_DATA_PROVIDER(multi, MultiDataProvider); REGISTER_DATA_PROVIDER_EX(multi, MultiDataProvider);
} // namespace paddle } // namespace paddle
...@@ -24,7 +24,9 @@ protected: ...@@ -24,7 +24,9 @@ protected:
std::vector<std::unique_ptr<DataProvider>> subDataProviders_; std::vector<std::unique_ptr<DataProvider>> subDataProviders_;
public: public:
MultiDataProvider(const DataConfig& config, bool useGpu); MultiDataProvider(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu);
~MultiDataProvider() {} ~MultiDataProvider() {}
virtual void reset(); virtual void reset();
virtual void shuffle(); virtual void shuffle();
......
...@@ -24,6 +24,27 @@ limitations under the License. */ ...@@ -24,6 +24,27 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace unittest {
static std::unique_ptr<std::function<void(size_t /*poolActualSize */)>>
OnPoolFilled;
namespace pydp2 {
void setOnPoolFilledHook(const std::function<void(size_t)>& callback) {
OnPoolFilled.reset(new std::function<void(size_t)>());
*OnPoolFilled = callback;
}
void clearOnPoolFilledHook() {
OnPoolFilled.reset();
}
} // namespace pydp2
} // namespace unittest
/** /**
* Slot type * Slot type
*/ */
...@@ -179,6 +200,7 @@ public: ...@@ -179,6 +200,7 @@ public:
* Ctor * Ctor
*/ */
PyDataProvider2(const DataConfig& config, PyDataProvider2(const DataConfig& config,
const ModelConfig& modelConfig,
bool useGpu) bool useGpu)
:DataProvider(config, useGpu), callingContextCreated_(2) { :DataProvider(config, useGpu), callingContextCreated_(2) {
auto& args = config.load_data_args(); auto& args = config.load_data_args();
...@@ -192,6 +214,12 @@ public: ...@@ -192,6 +214,12 @@ public:
py::DictHelper kwargsDict(kwargs); py::DictHelper kwargsDict(kwargs);
kwargsDict.setBool("is_train", !config.for_test()); kwargsDict.setBool("is_train", !config.for_test());
std::vector<std::string> inputs;
inputs.reserve(modelConfig.input_layer_names().size());
std::copy(modelConfig.input_layer_names().begin(),
modelConfig.input_layer_names().end(),
std::back_inserter(inputs));
kwargsDict.setStringList("input_order", inputs);
// kwargs is keyword arguemts to create object. // kwargs is keyword arguemts to create object.
this->createPyDataObj(config.load_data_module(), this->createPyDataObj(config.load_data_module(),
...@@ -199,7 +227,7 @@ public: ...@@ -199,7 +227,7 @@ public:
config.files(), config.files(),
std::move(kwargs)); std::move(kwargs));
DBG << "Instance " << instance_.get() << " loaded."; DBG << "Instance " << instance_.get() << " loaded.";
this->readPyFields(); this->readPyFields(config.for_test());
DBG << "Py Field Done"; DBG << "Py Field Done";
} }
...@@ -253,14 +281,28 @@ private: ...@@ -253,14 +281,28 @@ private:
CHECK_PY(instance_) << "Cannot Create instance"; CHECK_PY(instance_) << "Cannot Create instance";
} }
void readPyFields() { void readPyFields(bool testing) {
py::ObjectHelper self(this->instance_); py::ObjectHelper self(this->instance_);
this->skipShuffle_ = !self.getBoolAttr("should_shuffle");
bool ok; bool ok;
this->skipShuffle_ = !self.getBoolAttr("should_shuffle",
&ok /*isBoolType*/);
if (!ok) {
this->skipShuffle_ = testing; // shuffle when is training, skip shuffle
// when is testing.
}
DBG << "Provider Skip Shuffle " << this->skipShuffle_;
this->poolSize_ = self.getIntAttr<size_t>("pool_size", &ok); this->poolSize_ = self.getIntAttr<size_t>("pool_size", &ok);
if (!ok) { if (!ok) {
this->poolSize_ = -1UL; this->poolSize_ = -1UL;
} }
this->minPoolSize_ = self.getIntAttr<size_t>("min_pool_size", &ok);
if (!ok) {
this->minPoolSize_ = -1UL;
}
this->minPoolSize_ = std::min(this->poolSize_, this->minPoolSize_);
this->canOverBatchSize_ = self.getBoolAttr("can_over_batch_size"); this->canOverBatchSize_ = self.getBoolAttr("can_over_batch_size");
calcBatchSize_.reset(self.getAttr("calc_batch_size")); calcBatchSize_.reset(self.getAttr("calc_batch_size"));
...@@ -307,7 +349,6 @@ private: ...@@ -307,7 +349,6 @@ private:
} }
void loadThread() { void loadThread() {
callingContexts_.reserve(fileLists_.size());
DBG << "Creating context"; DBG << "Creating context";
for (auto& filename : fileLists_) { for (auto& filename : fileLists_) {
PyGuard g; PyGuard g;
...@@ -332,7 +373,14 @@ private: ...@@ -332,7 +373,14 @@ private:
bool atEnd; bool atEnd;
data = py::iterNext(callingContexts_[cid], &atEnd); data = py::iterNext(callingContexts_[cid], &atEnd);
if (atEnd || data == nullptr) { if (atEnd || data == nullptr) {
callingContexts_.erase(callingContexts_.begin() + cid); if (cid != 0) {
std::swap(callingContexts_[cid], callingContexts_[0]);
cid = 0;
}
{
PyGuard g;
callingContexts_.pop_front();
}
this->pullCV_.notify_all(); this->pullCV_.notify_all();
continue; continue;
} }
...@@ -354,11 +402,7 @@ private: ...@@ -354,11 +402,7 @@ private:
if (this->loadThread_){ // wait poolActualSize < poolSize; if (this->loadThread_){ // wait poolActualSize < poolSize;
std::unique_lock<std::mutex> l(mtx_); std::unique_lock<std::mutex> l(mtx_);
pushCV_.wait(l, [this, additionalBatchSize] { pushCV_.wait(l, [this, additionalBatchSize] {
if (this->canOverBatchSize_) { return this->poolActualSize_ < poolSize_;
return this->poolActualSize_ < poolSize_;
} else {
return this->poolActualSize_ + additionalBatchSize < poolSize_;
}
}); });
} }
...@@ -402,7 +446,7 @@ private: ...@@ -402,7 +446,7 @@ private:
private: private:
std::unique_ptr<std::thread> loadThread_; std::unique_ptr<std::thread> loadThread_;
std::atomic<bool> exit_; std::atomic<bool> exit_;
std::vector<PyObjectPtr> callingContexts_; std::deque<PyObjectPtr> callingContexts_;
std::deque<PyObjectPtr> dataPool_; std::deque<PyObjectPtr> dataPool_;
size_t poolActualSize_; size_t poolActualSize_;
std::condition_variable pushCV_; std::condition_variable pushCV_;
...@@ -413,6 +457,7 @@ private: ...@@ -413,6 +457,7 @@ private:
PyObjectPtr instance_; PyObjectPtr instance_;
size_t poolSize_; size_t poolSize_;
size_t minPoolSize_;
bool canOverBatchSize_; bool canOverBatchSize_;
PyObjectPtr calcBatchSize_; PyObjectPtr calcBatchSize_;
PyObjectPtr generator_; PyObjectPtr generator_;
...@@ -478,8 +523,13 @@ public: ...@@ -478,8 +523,13 @@ public:
// data pool ready. // data pool ready.
std::unique_lock<std::mutex> l(mtx_); std::unique_lock<std::mutex> l(mtx_);
pullCV_.wait(l, [this, &size] { pullCV_.wait(l, [this, &size] {
return this->poolActualSize_ >= size || callingContexts_.empty(); return this->poolActualSize_ >= std::max(size, this->minPoolSize_)
|| callingContexts_.empty();
}); });
if (unittest::OnPoolFilled) {
(*unittest::OnPoolFilled)(this->poolActualSize_);
}
} }
std::deque<PyObjectPtr> data; std::deque<PyObjectPtr> data;
size_t bsize = 0; size_t bsize = 0;
...@@ -495,7 +545,8 @@ public: ...@@ -495,7 +545,8 @@ public:
std::deque<PyObjectPtr>& pool = *poolPtr; std::deque<PyObjectPtr>& pool = *poolPtr;
while (bsize < size && !pool.empty()) { while (bsize < size && !pool.empty()) {
{ // move data from pool to data {
// move data from pool to data
std::lock_guard<std::mutex> guard(mtx_); std::lock_guard<std::mutex> guard(mtx_);
if (skipShuffle_) { if (skipShuffle_) {
size_t i = 0; size_t i = 0;
...@@ -505,14 +556,13 @@ public: ...@@ -505,14 +556,13 @@ public:
} else { // when shuffle, use swap to drop only last pool element. } else { // when shuffle, use swap to drop only last pool element.
size_t i = ThreadLocalRand::rand() % pool.size(); size_t i = ThreadLocalRand::rand() % pool.size();
CHECK(pool[i] != nullptr); CHECK(pool[i] != nullptr);
if (i != pool.size() - 1) { if (i != 0) {
std::swap(pool[i], pool.back()); std::swap(pool[i], pool.front());
} }
data.emplace_back(std::move(pool.back())); data.emplace_back(std::move(pool.front()));
pool.pop_back(); pool.pop_front();
} }
}
{
if (calcBatchSize_) { // custom calc batch size. if (calcBatchSize_) { // custom calc batch size.
PyGuard guard; PyGuard guard;
Py_INCREF(data.back().get()); Py_INCREF(data.back().get());
...@@ -521,8 +571,17 @@ public: ...@@ -521,8 +571,17 @@ public:
calcBatchSize.getArgs().set(0, data.back()); calcBatchSize.getArgs().set(0, data.back());
PyObjectPtr customBatchSize(calcBatchSize()); PyObjectPtr customBatchSize(calcBatchSize());
bool ok; bool ok;
bsize += py::castInt<size_t>(customBatchSize.get(), &ok); size_t tmp = py::castInt<size_t>(customBatchSize.get(), &ok);
CHECK(ok) << "calc_batch_size must return int"; CHECK(ok) << "calc_batch_size must return int";
if (bsize + tmp > size && !canOverBatchSize_) {
// Put data back.
pool.push_front(std::move(data.back()));
data.pop_back();
break;
} else {
bsize += tmp;
}
} else { } else {
bsize += 1; bsize += 1;
} }
...@@ -598,7 +657,6 @@ public: ...@@ -598,7 +657,6 @@ public:
} else { } else {
*batch = cpuBatch; *batch = cpuBatch;
} }
return bsize; return bsize;
} }
}; };
...@@ -606,7 +664,8 @@ public: ...@@ -606,7 +664,8 @@ public:
std::unordered_set<uintptr_t > PyDataProvider2::gModuleClsPtrs_; std::unordered_set<uintptr_t > PyDataProvider2::gModuleClsPtrs_;
PyObjectPtr PyDataProvider2::zeroTuple_(PyTuple_New(0)); PyObjectPtr PyDataProvider2::zeroTuple_(PyTuple_New(0));
REGISTER_DATA_PROVIDER(py2, PyDataProvider2); REGISTER_DATA_PROVIDER_EX(py2, PyDataProvider2);
/** /**
* Scanner for dense slot. * Scanner for dense slot.
......
...@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap, ...@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
} }
// coeff only affect bp, keep consistent with CostLayer // coeff only affect bp, keep consistent with CostLayer
coeff_ = config_.has_coeff() ? config_.coeff() : real(1.0); coeff_ = config_.coeff();
if (inputLayers_.size() == 3) { if (inputLayers_.size() == 3) {
weightLayer_ = inputLayers_[2]; weightLayer_ = inputLayers_[2];
} }
......
...@@ -26,11 +26,7 @@ namespace paddle { ...@@ -26,11 +26,7 @@ namespace paddle {
bool CostLayer::init(const LayerMap& layerMap, bool CostLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap); bool ret = Layer::init(layerMap, parameterMap);
if (config_.has_coeff()) { coeff_ = config_.coeff();
coeff_ = config_.coeff(); // coeff only affact bp
} else {
coeff_ = real(1.0);
}
if (!ret) return ret; if (!ret) return ret;
CHECK_GE(inputLayers_.size(), 2UL); CHECK_GE(inputLayers_.size(), 2UL);
CHECK_LE(inputLayers_.size(), 3UL); CHECK_LE(inputLayers_.size(), 3UL);
......
...@@ -19,14 +19,18 @@ data = [ ...@@ -19,14 +19,18 @@ data = [
[[[0, 2], [2, 5], [0, 1, 2]], 1], [[[0, 2], [2, 5], [0, 1, 2]], 1],
] ]
@provider(input_types=[integer_value_sub_sequence(10), @provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)]) integer_value(2)],
should_shuffle=False)
def process_subseq(settings, file_name): def process_subseq(settings, file_name):
for d in data: for d in data:
yield d yield d
@provider(input_types=[integer_value_sequence(10), @provider(input_types=[integer_value_sequence(10),
integer_value(2)]) integer_value(2)],
should_shuffle=False)
def process_seq(settings, file_name): def process_seq(settings, file_name):
for d in data: for d in data:
seq = [] seq = []
......
...@@ -17,22 +17,26 @@ import sys ...@@ -17,22 +17,26 @@ import sys
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
def hook(settings, dict_file, **kwargs): def hook(settings, dict_file, **kwargs):
settings.word_dict = dict_file settings.word_dict = dict_file
settings.input_types = [integer_value_sequence(len(settings.word_dict)), settings.input_types = [integer_value_sequence(len(settings.word_dict)),
integer_value_sequence(3)] integer_value_sequence(3)]
settings.logger.info('dict len : %d' % (len(settings.word_dict))) settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@provider(init_hook=hook)
@provider(init_hook=hook, should_shuffle=False)
def process(settings, file_name): def process(settings, file_name):
with open(file_name, 'r') as fdata: with open(file_name, 'r') as fdata:
for line in fdata: for line in fdata:
label, comment = line.strip().split('\t') label, comment = line.strip().split('\t')
label = int(''.join(label.split())) label = int(''.join(label.split()))
words = comment.split() words = comment.split()
word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict] word_slot = [settings.word_dict[w] for w in words if
w in settings.word_dict]
yield word_slot, [label] yield word_slot, [label]
## for hierarchical sequence network ## for hierarchical sequence network
def hook2(settings, dict_file, **kwargs): def hook2(settings, dict_file, **kwargs):
settings.word_dict = dict_file settings.word_dict = dict_file
...@@ -40,17 +44,19 @@ def hook2(settings, dict_file, **kwargs): ...@@ -40,17 +44,19 @@ def hook2(settings, dict_file, **kwargs):
integer_value_sub_sequence(3)] integer_value_sub_sequence(3)]
settings.logger.info('dict len : %d' % (len(settings.word_dict))) settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@provider(init_hook=hook2)
@provider(init_hook=hook2, should_shuffle=False)
def process2(settings, file_name): def process2(settings, file_name):
with open(file_name) as fdata: with open(file_name) as fdata:
label_list = [] label_list = []
word_slot_list = [] word_slot_list = []
for line in fdata: for line in fdata:
if (len(line)) > 1: if (len(line)) > 1:
label,comment = line.strip().split('\t') label, comment = line.strip().split('\t')
label = int(''.join(label.split())) label = int(''.join(label.split()))
words = comment.split() words = comment.split()
word_slot = [settings.word_dict[w] for w in words if w in settings.word_dict] word_slot = [settings.word_dict[w] for w in words if
w in settings.word_dict]
label_list.append([label]) label_list.append([label])
word_slot_list.append(word_slot) word_slot_list.append(word_slot)
else: else:
......
...@@ -20,6 +20,18 @@ limitations under the License. */ ...@@ -20,6 +20,18 @@ limitations under the License. */
#include "paddle/gserver/dataproviders/DataProvider.h" #include "paddle/gserver/dataproviders/DataProvider.h"
P_DEFINE_string(train_list, "unittest.list", "file list for unittest"); P_DEFINE_string(train_list, "unittest.list", "file list for unittest");
namespace paddle {
namespace unittest {
namespace pydp2 {
extern void setOnPoolFilledHook(const std::function<void(size_t)>& func);
extern void clearOnPoolFilledHook();
} // namespace pydp2
} // namespace unittest
} // namespace paddle
const paddle::real epsilon = 1e-5; const paddle::real epsilon = 1e-5;
static inline int64_t readDataBatch( static inline int64_t readDataBatch(
...@@ -235,6 +247,112 @@ TEST(PyDataProvider2, index_sub_seq) { ...@@ -235,6 +247,112 @@ TEST(PyDataProvider2, index_sub_seq) {
} }
} }
TEST(PyDataProvider2, min_pool_size) {
paddle::DataConfig config;
config.set_type("py2");
config.set_files(FLAGS_train_list.c_str());
config.set_load_data_module("test_PyDataProvider2");
config.set_load_data_object("test_min_pool_size");
config.set_load_data_args("");
size_t totalData = 1 << 14;
constexpr size_t batchSize = 100;
constexpr size_t minPoolSize = 1000;
paddle::DataBatch batch;
std::unique_ptr<paddle::DataProvider> provider(
paddle::DataProvider::create(config, false));
provider->reset();
paddle::unittest::pydp2::setOnPoolFilledHook([&](size_t poolSize) {
if (totalData > batchSize) {
CHECK_GE(poolSize, std::min(totalData-batchSize, minPoolSize));
}
});
while (true) {
size_t realBatchSize = provider->getNextBatchInternal(batchSize, &batch);
if (realBatchSize) {
totalData -= realBatchSize;
} else {
break;
}
}
paddle::unittest::pydp2::clearOnPoolFilledHook();
}
TEST(PyDataProvider2, can_over_batch_size) {
paddle::DataConfig config;
config.set_type("py2");
config.set_files(FLAGS_train_list.c_str());
config.set_load_data_module("test_PyDataProvider2");
config.set_load_data_object("test_can_over_batch_size");
config.set_load_data_args("");
paddle::DataBatch batch;
std::unique_ptr<paddle::DataProvider> provider(
paddle::DataProvider::create(config, false));
provider->reset();
constexpr size_t batchSize = 100;
while (true) {
size_t realBatchSize = provider->getNextBatchInternal(batchSize, &batch);
if (realBatchSize) {
CHECK_LE(realBatchSize, batchSize);
} else {
break;
}
}
}
TEST(PyDataProvider2, input_order) {
paddle::DataConfig config;
config.set_type("py2");
config.set_files(FLAGS_train_list.c_str());
config.set_load_data_module("test_PyDataProvider2");
config.set_load_data_object("test_input_order");
config.set_load_data_args("");
paddle::ModelConfig modelConfig;
*modelConfig.add_input_layer_names() = "input1";
*modelConfig.add_input_layer_names() = "input2";
paddle::DataBatch batch;
std::unique_ptr<paddle::DataProvider> provider(
paddle::DataProvider::create(config, modelConfig, false));
provider->reset();
constexpr size_t batchSize = 100;
while (true) {
size_t realBatchSize = provider->getNextBatchInternal(batchSize, &batch);
if (!realBatchSize) {
break;
}
ASSERT_EQ(batch.getStreams().size(), 2);
for (size_t i = 0; i < realBatchSize; ++i) {
ASSERT_EQ(batch.getStream(0).ids->getData()[i], 0);
ASSERT_EQ(batch.getStream(1).ids->getData()[i], 1);
}
}
}
TEST(PyDataProvider2, test_check) {
paddle::DataConfig config;
config.set_type("py2");
config.set_files(FLAGS_train_list.c_str());
config.set_load_data_module("test_PyDataProvider2");
config.set_load_data_object("test_check");
config.set_load_data_args("");
paddle::DataBatch batch;
std::unique_ptr<paddle::DataProvider> provider(
paddle::DataProvider::create(config, false));
provider->reset();
while (true) {
size_t realBatchSize = provider->getNextBatchInternal(100, &batch);
if (!realBatchSize) {
break;
} else {
auto& ivec = batch.getStream(0).ids;
for (size_t i=0; i < ivec->getSize(); ++i) {
CHECK_LT(ivec->getData()[i], 10);
}
}
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
paddle::initMain(argc, argv); paddle::initMain(argc, argv);
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import random
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
...@@ -39,7 +41,8 @@ def test_init_hook(setting, filename): ...@@ -39,7 +41,8 @@ def test_init_hook(setting, filename):
@provider( @provider(
input_types=[sparse_binary_vector(30000, seq_type=SequenceType.NO_SEQUENCE)]) input_types=[
sparse_binary_vector(30000, seq_type=SequenceType.NO_SEQUENCE)])
def test_sparse_non_value_no_seq(setting, filename): def test_sparse_non_value_no_seq(setting, filename):
for i in xrange(200): for i in xrange(200):
yield [(i + 1) * (j + 1) for j in xrange(10)] yield [(i + 1) * (j + 1) for j in xrange(10)]
...@@ -66,3 +69,43 @@ def test_index_sub_seq(setting, filename): ...@@ -66,3 +69,43 @@ def test_index_sub_seq(setting, filename):
for i in xrange(200): for i in xrange(200):
yield list(gen_sub_seq(i)) yield list(gen_sub_seq(i))
@provider(input_types=[index_slot(100)], min_pool_size=1000)
def test_min_pool_size(setting, filename):
for _ in xrange(1 << 14):
yield random.randint(0, 100 - 1)
@provider(input_types=[index_slot(100, seq_type=SequenceType.SEQUENCE)],
can_over_batch_size=False,
calc_batch_size=lambda x: len(x[0]))
def test_can_over_batch_size(setting, filename):
for _ in xrange(1 << 10):
seq_len = random.randint(0, 99)
yield [random.randint(0, 100 - 1) for _ in xrange(seq_len)]
@provider(input_types=[index_slot(10), index_slot(10)])
def test_input_order(setting, filename):
for _ in xrange(1000):
yield {
'input1': 0,
'input2': 1
}
@provider(input_types=[index_slot(10)],
check=True,
check_fail_continue=True,
should_shuffle="123") # also test should shuffle
def test_check(settings, filename):
yield_good_value = False
while not yield_good_value:
for _ in xrange(10000):
i = random.randint(0, 100)
if i < 10:
yield_good_value = True
yield i
...@@ -194,7 +194,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config, ...@@ -194,7 +194,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config,
dataProvider_ = dataProvider; dataProvider_ = dataProvider;
if (!dataProvider_ && config_->hasDataConfig()) { if (!dataProvider_ && config_->hasDataConfig()) {
dataProvider_.reset(DataProvider::create(*config_, gpuData)); dataProvider_.reset(DataProvider::create(*config_, *config_, gpuData));
} }
if (dataProvider_) { if (dataProvider_) {
evaluator_.reset(trainerInternal_.getGradientMachine()->makeEvaluator()); evaluator_.reset(trainerInternal_.getGradientMachine()->makeEvaluator());
...@@ -212,7 +212,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config, ...@@ -212,7 +212,7 @@ void Trainer::init(const std::shared_ptr<TrainerConfigHelper> &config,
testDataProvider_ = testDataProvider; testDataProvider_ = testDataProvider;
if (!testDataProvider_ && config_->hasTestDataConfig()) { if (!testDataProvider_ && config_->hasTestDataConfig()) {
testDataProvider_.reset( testDataProvider_.reset(
DataProvider::create(config_->getTestDataConfig(), gpuData)); DataProvider::create(config_->getTestDataConfig(), *config_, gpuData));
} }
if (testDataProvider_) { if (testDataProvider_) {
tester_.reset(new Tester(config_, createTesterConfig(), tester_.reset(new Tester(config_, createTesterConfig(),
......
dump_text.test dump_text.test
test_pydata_provider_wrapper.json test_pydata_provider_wrapper.json
*proto.bin
...@@ -13,96 +13,53 @@ ...@@ -13,96 +13,53 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
import math from paddle.trainer_config_helpers import *
beam_search = get_config_arg('beam_search', bool, False) settings(batch_size=15, learning_rate=0)
model_type("recurrent_nn")
Settings(learning_rate=0, batch_size=15, algorithm='sgd')
Inputs("sent_id", "dummy_data_input")
Outputs("predict_word")
num_words = 5 num_words = 5
beam_flag = get_config_arg('beam_search', bool, False)
DataLayer(name="sent_id", size=1, ) sent_id = data_layer(name="sent_id", size=1)
# This layer has no actual use, but only to decide batch_size in generation. # This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer. # When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
DataLayer(name="dummy_data_input", size=2, ) dummy_data = data_layer(name="dummy_data_input", size=2)
if beam_search: gen_inputs = [StaticInput(input=dummy_data, size=2),
RecurrentLayerGroupBegin("decoding_layer_group", GeneratedInput(size=num_words,
in_links=[], embedding_name="wordvec",
out_links=["predict_word"], embedding_size=num_words)]
generator=Generator(max_num_frames=10,
beam_size=2, def step(dummy_memory, predict_word):
num_results_per_sample=2, ))
else: # simplified RNN for testing
RecurrentLayerGroupBegin("decoding_layer_group", with mixed_layer(size=num_words) as layer:
in_links=[], layer += full_matrix_projection(input=predict_word,
out_links=["predict_word"], param_attr=ParamAttr(name="transtable"))
generator=Generator(max_num_frames=10, ))
dummy_memory = Memory(name="dummy_memory", with mixed_layer(size=num_words, act=ExpActivation()) as out:
size=2, out += trans_full_matrix_projection(input=layer,
boot_layer="dummy_data_input") param_attr=ParamAttr(name="wordvec"))
MixedLayer(name="dummy_memory",
size=2, return out
bias=False,
inputs=[IdentityProjection(dummy_memory)], ) beam_gen = beam_search(name="rnn_gen",
state_memory = Memory(name="state", step=step,
size=num_words, input=gen_inputs,
#boot_bias=True, id_input=sent_id,
#boot_bias_active_type = "tanh", dict_file="./trainer/tests/test_gen_dict.txt",
) result_file="./trainer/tests/dump_text.test",
bos_id=0,
predict_word_memory = Memory(name="predict_word", eos_id=num_words-1,
size=num_words, beam_size=2 if beam_flag else 1,
boot_with_const_id=0, ) num_results_per_sample=2 if beam_flag else 1,
max_length=10)
MixedLayer(
name = "word_embedding", #outputs(beam_gen)
size = num_words, # word embedding dim is the same as num_words in this test. # In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
bias = False, # is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
inputs = TableProjection(predict_word_memory, # as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
initial_std=1, Inputs("sent_id","dummy_data_input")
learning_rate=0, Outputs("__beam_search_predict__")
parameter_name="wordvec"))
Layer( # simplified RNN for testing
name="state",
type="mixed",
size=num_words,
bias=False,
inputs=[FullMatrixProjection("word_embedding",
parameter_name="transtable")])
Layer(name="output",
type="mixed",
size=num_words,
active_type="exponential",
bias=False,
inputs=TransposedFullMatrixProjection("state",
initial_std=1,
learning_rate=0,
parameter_name="wordvec"), )
Layer(name="predict_word", type="maxid", inputs=["output"], )
Layer(name="eos_check",
type="eos_id",
eos_id=num_words - 1,
inputs=["predict_word"], )
RecurrentLayerGroupEnd("decoding_layer_group")
Evaluator(name="answer_printer",
type="seq_text_printer",
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test",
inputs=[
"sent_id",
"predict_word",
], )
...@@ -183,10 +183,21 @@ public: ...@@ -183,10 +183,21 @@ public:
/** /**
* Get bool attribute. * Get bool attribute.
* @param field * @param field
* @param [out] isBoolType return true if attribute is bool type. If the
* attribute is not bool type, then an implicit
* conversion will happens, and will return the
* conversion result.
*
* Such as, if the attribute is 1, then the return
* value of function will be true, but the isBoolType
* will return false.
* @return * @return
*/ */
bool getBoolAttr(const std::string& field) const { bool getBoolAttr(const std::string& field, bool* isBoolType = nullptr) const {
PyObjectPtr tmp(getAttr(field)); PyObjectPtr tmp(getAttr(field));
if (isBoolType) {
*isBoolType = PyBool_Check(tmp.get());
}
return PyObject_IsTrue(tmp.get()); return PyObject_IsTrue(tmp.get());
} }
...@@ -266,6 +277,15 @@ public: ...@@ -266,6 +277,15 @@ public:
this->set(key, PyBool_FromLong(b)); this->set(key, PyBool_FromLong(b));
} }
void setStringList(const std::string& key,
const std::vector<std::string>& items) {
auto * list = PyList_New(items.size());
for (size_t i=0; i < items.size(); ++i) {
PyList_SetItem(list, i, PyString_FromString(items[i].c_str()));
}
this->set(key, list);
}
private: private:
inline void checkDict() { inline void checkDict() {
CHECK(PyDict_Check(this->dict_)); CHECK(PyDict_Check(this->dict_));
......
...@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4') ...@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4')
optional bool norm_by_times = 25; optional bool norm_by_times = 25;
// for CostLayers // for CostLayers
optional real coeff = 26; optional real coeff = 26 [default = 1.0];
// for AverageLayer // for AverageLayer
// can be set to: 'average', 'sum' or 'squarerootn' // can be set to: 'average', 'sum' or 'squarerootn'
......
...@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig { ...@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig {
message ParameterConfig { message ParameterConfig {
required string name = 1; required string name = 1;
required uint64 size = 2; required uint64 size = 2;
required real learning_rate = 3; optional real learning_rate = 3 [default = 1.0];
required real momentum = 4; optional real momentum = 4 [default = 0.0];
optional real initial_mean = 5 [default = 0.0]; optional real initial_mean = 5 [default = 0.0];
optional real initial_std = 6 [default = 0.01]; optional real initial_std = 6 [default = 0.01];
// use L2-regularization if decay_rate set and decay_rate_l1 not set // use L2-regularization if decay_rate set and decay_rate_l1 not set
...@@ -54,8 +54,8 @@ message ParameterConfig { ...@@ -54,8 +54,8 @@ message ParameterConfig {
optional int32 num_batches_regularization = 13 [default = 1]; optional int32 num_batches_regularization = 13 [default = 1];
// if is_sparse is true, para is sparse, else para is dense // if is_sparse is true, para is sparse, else para is dense
optional bool is_sparse = 14[default = false]; optional bool is_sparse = 14[default = false];
// if para is sparse, format should be "csc" or "csr" // if para is sparse, format should be "csc" or "csr", empty means is not sparse
optional string format = 15[default = "csr"]; optional string format = 15 [default = ""];
// sparse remote update or not // sparse remote update or not
optional bool sparse_remote_update = 16 [default = false]; optional bool sparse_remote_update = 16 [default = false];
// gradient clipping threshold, no clipping by default // gradient clipping threshold, no clipping by default
......
set(OUTPUT_DIR set(OUTPUT_DIR
"${CMAKE_CURRENT_BINARY_DIR}/build") "${CMAKE_CURRENT_BINARY_DIR}/build")
file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py)
file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py)
file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py)
set(PY_FILES paddle/__init__.py
${TRAINER_PY_FILES}
${HELPERS_PY_FILES}
${UTILS_PY_FILES})
set(PADDLE_INTERNAL_PACKAGE "") set(PADDLE_INTERNAL_PACKAGE "")
if (PADDLE_WITH_INTERNAL) if (PADDLE_WITH_INTERNAL)
...@@ -13,7 +21,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -13,7 +21,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py) DEPENDS gen_proto_py ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp) ${OUTPUT_DIR}/.timestamp)
......
...@@ -14,6 +14,13 @@ ...@@ -14,6 +14,13 @@
import cPickle import cPickle
import logging import logging
import collections
import functools
import itertools
logging.basicConfig(
format="[%(levelname)s %(asctime)s %(filename)s:%(lineno)s]"
" %(message)s")
class SequenceType(object): class SequenceType(object):
...@@ -68,30 +75,39 @@ sparse_binary_vector = sparse_non_value_slot ...@@ -68,30 +75,39 @@ sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot sparse_vector = sparse_value_slot
integer_value = index_slot integer_value = index_slot
def dense_vector_sequence(dim): def dense_vector_sequence(dim):
return dense_vector(dim, seq_type=SequenceType.SEQUENCE) return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
def dense_vector_sub_sequence(dim): def dense_vector_sub_sequence(dim):
return dense_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) return dense_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def sparse_binary_vector_sequence(dim): def sparse_binary_vector_sequence(dim):
return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE) return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
def sparse_binary_vector_sub_sequence(dim): def sparse_binary_vector_sub_sequence(dim):
return sparse_binary_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) return sparse_binary_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def sparse_vector_sequence(dim): def sparse_vector_sequence(dim):
return sparse_vector(dim, seq_type=SequenceType.SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SEQUENCE)
def sparse_vector_sub_sequence(dim): def sparse_vector_sub_sequence(dim):
return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def integer_value_sequence(dim): def integer_value_sequence(dim):
return integer_value(dim, seq_type=SequenceType.SEQUENCE) return integer_value(dim, seq_type=SequenceType.SEQUENCE)
def integer_value_sub_sequence(dim): def integer_value_sub_sequence(dim):
return integer_value(dim, seq_type=SequenceType.SUB_SEQUENCE) return integer_value(dim, seq_type=SequenceType.SUB_SEQUENCE)
def integer_sequence(dim): def integer_sequence(dim):
return index_slot(dim, seq_type=SequenceType.SEQUENCE) return index_slot(dim, seq_type=SequenceType.SEQUENCE)
...@@ -102,13 +118,97 @@ class SingleSlotWrapper(object): ...@@ -102,13 +118,97 @@ class SingleSlotWrapper(object):
def __call__(self, obj, filename): def __call__(self, obj, filename):
for item in self.generator(obj, filename): for item in self.generator(obj, filename):
yield [item] if isinstance(item, dict):
yield item
else:
yield [item]
def provider(input_types=None, should_shuffle=True, pool_size=-1, class InputOrderWrapper(object):
def __init__(self, generator, input_order):
self.generator = generator
self.input_order = input_order
def __call__(self, obj, filename):
for item in self.generator(obj, filename):
if isinstance(item, dict):
yield [item.get(input_name, None) for input_name in
self.input_order]
else:
yield item
class CheckWrapper(object):
def __init__(self, generator, input_types, check_fail_continue, logger):
self.generator = generator
self.input_types = input_types
self.check_fail_continue = check_fail_continue
self.logger = logger
def __call__(self, obj, filename):
for items in self.generator(obj, filename):
try:
assert len(items) == len(self.input_types)
assert len(filter(lambda x: x is None, items)) == 0
for item, input_type in itertools.izip(items, self.input_types):
callback = functools.partial(CheckWrapper.loop_callback,
input_type)
for _ in xrange(input_type.seq_type):
callback = functools.partial(CheckWrapper.loop_check,
callback)
callback(item)
yield items
except AssertionError as e:
self.logger.warning(
"Item (%s) is not fit the input type with error %s"
% (repr(item), repr(e)))
if self.check_fail_continue:
continue
else:
raise
@staticmethod
def loop_callback(input_type, each):
assert isinstance(input_type, InputType)
if input_type.type == DataType.Dense:
assert isinstance(each, collections.Sequence)
for d in each:
assert isinstance(d, float)
assert len(each, input_type.dim)
elif input_type.type == DataType.Index:
assert isinstance(each, int)
assert each < input_type.dim
elif input_type.type == DataType.SparseNonValue \
or input_type.type == DataType.SparseValue:
assert isinstance(each, collections.Sequence)
sparse_id = set()
for k in each:
if input_type.type == DataType.SparseValue:
k, v = k
assert isinstance(v, float)
assert isinstance(k, int)
assert k < input_type.dim
sparse_id.add(k)
assert len(sparse_id) == len(each)
else:
raise RuntimeError("Not support input type")
@staticmethod
def loop_check(callback, item):
for each in item:
callback(each)
def provider(input_types=None, should_shuffle=None, pool_size=-1,
min_pool_size=-1,
can_over_batch_size=True, can_over_batch_size=True,
calc_batch_size=None, calc_batch_size=None,
cache=CacheType.NO_CACHE, cache=CacheType.NO_CACHE,
check=False, check_fail_continue=False,
use_dynamic_order=True,
init_hook=None, **kwargs): init_hook=None, **kwargs):
""" """
Provider decorator. Use it to make a function into PyDataProvider2 object. Provider decorator. Use it to make a function into PyDataProvider2 object.
...@@ -130,30 +230,63 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1, ...@@ -130,30 +230,63 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1,
:param input_types: Specify the input types, can also be set in init_hook. :param input_types: Specify the input types, can also be set in init_hook.
It is a list of InputType object. For example, input_types= \ It is a list of InputType object. For example, input_types= \
[dense_vector(9), integer_value(2)]. [dense_vector(9), integer_value(2)].
:param should_shuffle: True if data should shuffle. :type input_types: list|tuple
:param should_shuffle: True if data should shuffle. Pass None means shuffle
when is training and not to shuffle when is testing.
:type should_shuffle: bool :type should_shuffle: bool
:param pool_size: Max number of sample in data pool. :param pool_size: Max number of sample in data pool.
:type pool_size: int :type pool_size: int
:param min_pool_size: Set minimal sample in data pool. The PaddlePaddle will
random pick sample in pool. So the min_pool_size
effect the randomize of data.
:type min_pool_size: int
:param can_over_batch_size: True if paddle can return a mini-batch larger :param can_over_batch_size: True if paddle can return a mini-batch larger
than batch size in settings. It is useful when than batch size in settings. It is useful when
custom calculate one sample's batch_size. custom calculate one sample's batch_size.
It is very danger to set it to false and use It is very danger to set it to false and use
calc_batch_size together. Default is false. calc_batch_size together. Default is false.
:type can_over_batch_size: bool
:param calc_batch_size: a method to calculate each sample's batch size. :param calc_batch_size: a method to calculate each sample's batch size.
Default each sample's batch size is 1. But to you Default each sample's batch size is 1. But to you
can customize each sample's batch size. can customize each sample's batch size.
:type calc_batch_size: callable
:param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE :param cache: Cache strategy of Data Provider. Default is CacheType.NO_CACHE
:type cache: int
:param init_hook: Initialize hook. Useful when data provider need load some :param init_hook: Initialize hook. Useful when data provider need load some
external data like dictionary. The parameter is external data like dictionary. The parameter is
(settings, file_list, \*\*kwargs). (settings, file_list, \*\*kwargs).
- settings\: Is the global settings. User can set - settings. It is the global settings object. User can set
settings.input_types here. settings.input_types here.
- file_list\: All file names for passed to data provider. - file_list. All file names for passed to data provider.
- kwargs: Other keyword arguments passed from - is_train. Is this data provider used for training or not.
- kwargs. Other keyword arguments passed from
trainer_config's args parameter. trainer_config's args parameter.
:type init_hook: callable
:param check: Check the yield data format is as same as input_types. Enable
this will make data provide process slow but it is very useful
for debug. Default is disabled.
:type check: bool
:param check_fail_continue: Continue train or not when check failed. Just
drop the wrong format data when it is True. Has
no effect when check set to False.
:type check_fail_continue: bool
:param use_dynamic_order: Allow provider to yield a dictionary object, whose
key is a input data layer name, and value is the
feature value. The tuples are still allowed when
use_dynmaic_order is True.
:type use_dynamic_order: bool
""" """
def __wrapper__(generator): def __wrapper__(generator):
...@@ -168,12 +301,38 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1, ...@@ -168,12 +301,38 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1,
self.slots = kwargs['slots'] self.slots = kwargs['slots']
self.slots = input_types self.slots = input_types
self.should_shuffle = should_shuffle self.should_shuffle = should_shuffle
true_table = [1, 't', 'true', 'on']
false_table = [0, 'f', 'false', 'off']
if not isinstance(self.should_shuffle, bool) and \
self.should_shuffle is not None:
if isinstance(self.should_shuffle, basestring):
self.should_shuffle = self.should_shuffle.lower()
if self.should_shuffle in true_table:
self.should_shuffle = True
elif self.should_shuffle in false_table:
self.should_shuffle = False
else:
self.logger.warning(
"Could not recognize should_shuffle (%s), "
"just use default value of should_shuffle."
" Please set should_shuffle to bool value or "
"something in %s" % (
repr(self.should_shuffle),
repr(true_table + false_table)))
self.should_shuffle = None
self.pool_size = pool_size self.pool_size = pool_size
self.can_over_batch_size = can_over_batch_size self.can_over_batch_size = can_over_batch_size
self.calc_batch_size = calc_batch_size self.calc_batch_size = calc_batch_size
self.file_list = file_list self.file_list = file_list
self.generator = generator self.generator = generator
self.cache = cache self.cache = cache
self.min_pool_size = min_pool_size
self.input_order = kwargs['input_order']
self.check = check
if init_hook is not None: if init_hook is not None:
init_hook(self, file_list=file_list, **kwargs) init_hook(self, file_list=file_list, **kwargs)
if self.input_types is not None: if self.input_types is not None:
...@@ -184,6 +343,15 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1, ...@@ -184,6 +343,15 @@ def provider(input_types=None, should_shuffle=True, pool_size=-1,
if len(self.slots) == 1: if len(self.slots) == 1:
self.generator = SingleSlotWrapper(self.generator) self.generator = SingleSlotWrapper(self.generator)
if use_dynamic_order:
self.generator = InputOrderWrapper(self.generator,
self.input_order)
if self.check:
self.generator = CheckWrapper(self.generator,
self.slots,
check_fail_continue,
self.logger)
return DataProvider return DataProvider
return __wrapper__ return __wrapper__
...@@ -196,3 +364,4 @@ def deserialize_args(args): ...@@ -196,3 +364,4 @@ def deserialize_args(args):
:return: :return:
""" """
return cPickle.loads(args) return cPickle.loads(args)
...@@ -114,15 +114,15 @@ g_layer_type_map = {} ...@@ -114,15 +114,15 @@ g_layer_type_map = {}
# Initialize global variables. We use this function so that we can # Initialize global variables. We use this function so that we can
# call parse_config() multiple times # call parse_config() multiple times
def init_config_environment( def init_config_environment(
g_default_momentum = 0., g_default_momentum = None,
g_default_decay_rate = 0., g_default_decay_rate = None,
g_default_initial_mean = 0., g_default_initial_mean = 0.,
g_default_initial_std = 0.01, g_default_initial_std = 0.01,
g_default_num_batches_regularization = 1, g_default_num_batches_regularization = None,
g_default_initial_strategy = 0, g_default_initial_strategy = 0,
g_default_initial_smart = False, g_default_initial_smart = False,
g_default_gradient_clipping_threshold = 0., g_default_gradient_clipping_threshold = None,
g_default_device = -1, g_default_device = None,
g_default_update_hooks = None, g_default_update_hooks = None,
g_default_compact_func = None, g_default_compact_func = None,
...@@ -1099,12 +1099,12 @@ def Evaluator( ...@@ -1099,12 +1099,12 @@ def Evaluator(
inputs, inputs,
chunk_scheme = None, chunk_scheme = None,
num_chunk_types = None, num_chunk_types = None,
classification_threshold = 0.5, classification_threshold = None,
positive_label = -1, positive_label = None,
dict_file = "", dict_file = None,
result_file = "", result_file = None,
num_results = 1, num_results = None,
delimited = True, delimited = None,
): ):
evaluator = g_config.model_config.evaluators.add() evaluator = g_config.model_config.evaluators.add()
evaluator.type = type evaluator.type = type
...@@ -1120,12 +1120,19 @@ def Evaluator( ...@@ -1120,12 +1120,19 @@ def Evaluator(
evaluator.num_chunk_types = num_chunk_types evaluator.num_chunk_types = num_chunk_types
g_current_submodel.evaluator_names.append(evaluator.name) g_current_submodel.evaluator_names.append(evaluator.name)
evaluator.classification_threshold = classification_threshold if classification_threshold is not None:
evaluator.positive_label = positive_label evaluator.classification_threshold = classification_threshold
evaluator.dict_file = dict_file if positive_label is not None:
evaluator.result_file = result_file evaluator.positive_label = positive_label
evaluator.num_results = num_results if dict_file is not None:
evaluator.delimited = delimited evaluator.dict_file = dict_file
if result_file is not None:
evaluator.result_file = result_file
if num_results is not None:
evaluator.num_results = num_results
if delimited is not None:
evaluator.delimited = delimited
class LayerBase(object): class LayerBase(object):
def __init__( def __init__(
...@@ -1137,7 +1144,7 @@ class LayerBase(object): ...@@ -1137,7 +1144,7 @@ class LayerBase(object):
device=None, device=None,
active_type="", active_type="",
drop_rate=0., drop_rate=0.,
coeff=1.): coeff=None):
config_assert('@' not in name, config_assert('@' not in name,
"layer name: %s contain special character @" % name) "layer name: %s contain special character @" % name)
global g_current_submodel global g_current_submodel
...@@ -1155,10 +1162,12 @@ class LayerBase(object): ...@@ -1155,10 +1162,12 @@ class LayerBase(object):
self.inputs = [self.inputs] self.inputs = [self.inputs]
self.config = g_config.model_config.layers.add() self.config = g_config.model_config.layers.add()
assert isinstance(self.config, LayerConfig)
self.config.name = name self.config.name = name
self.config.type = type self.config.type = type
self.config.active_type = active_type self.config.active_type = active_type
self.config.coeff = coeff if coeff is not None:
self.config.coeff = float(coeff)
if size != 0: if size != 0:
self.config.size = size self.config.size = size
if drop_rate != 0: if drop_rate != 0:
...@@ -1166,7 +1175,7 @@ class LayerBase(object): ...@@ -1166,7 +1175,7 @@ class LayerBase(object):
if device is not None: if device is not None:
self.config.device = device self.config.device = device
else: elif g_default_device is not None:
self.config.device = g_default_device self.config.device = g_default_device
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -1236,10 +1245,12 @@ class LayerBase(object): ...@@ -1236,10 +1245,12 @@ class LayerBase(object):
if bias.parameter_name is None: if bias.parameter_name is None:
bias.parameter_name = gen_bias_parameter_name(self.config.name) bias.parameter_name = gen_bias_parameter_name(self.config.name)
if bias.parameter_name not in g_parameter_map: if bias.parameter_name not in g_parameter_map:
assert isinstance(self.config, LayerConfig)
Parameter( Parameter(
bias.parameter_name, bias.parameter_name,
size, size,
self.config.device, self.config.device if self.config.HasField('device') else None,
dims, dims,
bias.learning_rate, bias.learning_rate,
bias.momentum, bias.momentum,
...@@ -1265,7 +1276,7 @@ class LayerBase(object): ...@@ -1265,7 +1276,7 @@ class LayerBase(object):
input_index, input_index,
size, size,
dims=None, dims=None,
sparse = False, sparse = None,
format = "csr"): format = "csr"):
if dims is None: if dims is None:
# TODO(yuyang18): print warning and callstack here! # TODO(yuyang18): print warning and callstack here!
...@@ -1293,7 +1304,7 @@ class LayerBase(object): ...@@ -1293,7 +1304,7 @@ class LayerBase(object):
Parameter( Parameter(
input_config.parameter_name, input_config.parameter_name,
size, size,
self.config.device, self.config.device if self.config.HasField("device") else None,
dims, dims,
input_config.learning_rate, input_config.learning_rate,
input_config.momentum, input_config.momentum,
...@@ -1353,6 +1364,8 @@ class FCLayer(LayerBase): ...@@ -1353,6 +1364,8 @@ class FCLayer(LayerBase):
if sparse: if sparse:
psize = self.inputs[input_index].nnz psize = self.inputs[input_index].nnz
else:
sparse = None
self.create_input_parameter(input_index, psize, dims, sparse, format) self.create_input_parameter(input_index, psize, dims, sparse, format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
...@@ -2430,7 +2443,6 @@ class MixedLayer(LayerBase): ...@@ -2430,7 +2443,6 @@ class MixedLayer(LayerBase):
config_assert(inputs, 'inputs cannot be empty') config_assert(inputs, 'inputs cannot be empty')
super(MixedLayer, self).__init__( super(MixedLayer, self).__init__(
name, 'mixed', size, inputs=inputs, **xargs) name, 'mixed', size, inputs=inputs, **xargs)
operator_input_index = [] operator_input_index = []
for operator in self.operators: for operator in self.operators:
operator_conf = operator.operator_conf operator_conf = operator.operator_conf
...@@ -2445,21 +2457,31 @@ class MixedLayer(LayerBase): ...@@ -2445,21 +2457,31 @@ class MixedLayer(LayerBase):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
operator_conf.input_sizes.append(input_layer.size) operator_conf.input_sizes.append(input_layer.size)
operator_input_index.append(input_index) operator_input_index.append(input_index)
if self.config.size == 0: if self.config.size == 0:
size = operator.calc_output_size(operator_conf.input_sizes) size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0: if size != 0:
self.set_layer_size(size) self.set_layer_size(size)
else:
size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0:
config_assert(size == self.config.size,
"different inputs have different size: %s vs. %s" %
(size, self.config.size))
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
input = self.inputs[input_index] input = self.inputs[input_index]
if input_index not in operator_input_index: if input_index not in operator_input_index:
config_assert(isinstance(input, Projection), "input should be projection or operation") config_assert(isinstance(input, Projection), "input should be projection or operation")
if self.config.size == 0 and isinstance(input, Projection): if self.config.size == 0 and isinstance(input, Projection):
size = input.calc_output_size(input_layer) size = input.calc_output_size(input_layer)
if size != 0: if size != 0:
self.set_layer_size(size) self.set_layer_size(size)
elif isinstance(input, Projection):
sz = input.calc_output_size(input_layer)
if sz != 0:
config_assert(sz == self.config.size,
"different inputs have different size: %s vs. %s" %
(sz, self.config.size))
config_assert(size != 0, "size is not set") config_assert(size != 0, "size is not set")
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -2827,27 +2849,44 @@ def Parameter( ...@@ -2827,27 +2849,44 @@ def Parameter(
para = g_config.model_config.parameters.add() para = g_config.model_config.parameters.add()
para.name = name para.name = name
para.size = size para.size = size
para.device = device if device is not None:
para.dims.extend(dims); para.device = int(device)
para.learning_rate = default(learning_rate, 1.) para.dims.extend(dims)
para.momentum = default(momentum, g_default_momentum)
if learning_rate is not None:
para.learning_rate = float(learning_rate)
momentum = default(momentum, g_default_momentum)
if momentum is not None:
para.momentum = float(momentum)
config_assert(not momentum or not decay_rate_l1, config_assert(not momentum or not decay_rate_l1,
"momentum and decay_rate_l1 cannot both be non-zero") "momentum and decay_rate_l1 cannot both be non-zero")
para.decay_rate = default(decay_rate, g_default_decay_rate)
decay_rate = default(decay_rate, g_default_decay_rate)
if decay_rate is not None:
para.decay_rate = decay_rate
if decay_rate_l1 is not None: if decay_rate_l1 is not None:
para.decay_rate_l1 = decay_rate_l1 para.decay_rate_l1 = decay_rate_l1
para.initial_std = default(initial_std, g_default_initial_std) para.initial_std = default(initial_std, g_default_initial_std)
para.initial_mean = default(initial_mean, g_default_initial_mean) para.initial_mean = default(initial_mean, g_default_initial_mean)
para.num_batches_regularization = default(
num_batches_regularization = default(
num_batches_regularization, g_default_num_batches_regularization) num_batches_regularization, g_default_num_batches_regularization)
if num_batches_regularization is not None:
para.num_batches_regularization = int(num_batches_regularization)
if sparse_remote_update is not None: if sparse_remote_update is not None:
para.sparse_remote_update = sparse_remote_update para.sparse_remote_update = sparse_remote_update
if sparse_remote_update: if sparse_remote_update:
g_config.opt_config.use_sparse_remote_updater = True g_config.opt_config.use_sparse_remote_updater = True
if sparse_update is not None: if sparse_update is not None:
para.sparse_update = sparse_update para.sparse_update = sparse_update
para.gradient_clipping_threshold = default( gradient_clipping_threshold = default(
gradient_clipping_threshold, g_default_gradient_clipping_threshold); gradient_clipping_threshold, g_default_gradient_clipping_threshold)
if gradient_clipping_threshold is not None:
para.gradient_clipping_threshold = gradient_clipping_threshold
para.initial_strategy = default(initial_strategy, g_default_initial_strategy) para.initial_strategy = default(initial_strategy, g_default_initial_strategy)
para.initial_smart = default(initial_smart, g_default_initial_smart) para.initial_smart = default(initial_smart, g_default_initial_smart)
if para.initial_smart: if para.initial_smart:
...@@ -2860,15 +2899,19 @@ def Parameter( ...@@ -2860,15 +2899,19 @@ def Parameter(
para.initial_std = 1. / math.sqrt(para.size) para.initial_std = 1. / math.sqrt(para.size)
if g_default_compact_func is not None: if g_default_compact_func is not None:
sparse, format, need_compact = g_default_compact_func(para.name) sparse, format, need_compact = g_default_compact_func(para.name)
para.is_sparse = default(sparse, False)
para.format = default(format, "") if sparse is not None:
para.need_compact = default(need_compact, False) para.is_sparse = sparse
if format is not None:
para.format = format
if need_compact is not None:
para.need_compact = need_compact
if is_static is not None: if is_static is not None:
para.is_static = is_static para.is_static = is_static
config_assert(not para.sparse_remote_update or not para.is_static, config_assert(not para.sparse_remote_update or not para.is_static,
"sparse_remote_update and is_static cannot both be true") "sparse_remote_update and is_static cannot both be true")
if is_shared is not None:
para.is_shared = default(is_shared, False) para.is_shared = is_shared
update_hooks = default(update_hooks, g_default_update_hooks) update_hooks = default(update_hooks, g_default_update_hooks)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
__all__ = ["TanhActivation", "SigmoidActivation", __all__ = ["TanhActivation", "SigmoidActivation",
"SoftmaxActivation", "IdentityActivation", "LinearActivation", "SoftmaxActivation", "IdentityActivation", "LinearActivation",
'SequenceSoftmaxActivation', 'SequenceSoftmaxActivation', 'ExpActivation',
"ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation", "ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation",
"AbsActivation", "SquareActivation", "BaseActivation"] "AbsActivation", "SquareActivation", "BaseActivation"]
...@@ -185,3 +185,12 @@ class SquareActivation(BaseActivation): ...@@ -185,3 +185,12 @@ class SquareActivation(BaseActivation):
""" """
def __init__(self): BaseActivation.__init__(self, 'square', False) def __init__(self): BaseActivation.__init__(self, 'square', False)
class ExpActivation(BaseActivation):
"""
Exponential Activation.
.. math::
f(z) = e^z.
"""
def __init__(self): BaseActivation.__init__(self, 'exponential', False)
...@@ -65,12 +65,12 @@ def evaluator_base( ...@@ -65,12 +65,12 @@ def evaluator_base(
name=None, name=None,
chunk_scheme=None, chunk_scheme=None,
num_chunk_types=None, num_chunk_types=None,
classification_threshold=0.5, classification_threshold=None,
positive_label=-1, positive_label=None,
dict_file="", dict_file=None,
result_file="", result_file=None,
num_results=1, num_results=None,
delimited=True): delimited=None):
""" """
Evaluator will evaluate the network status while training/testing. Evaluator will evaluate the network status while training/testing.
...@@ -105,9 +105,10 @@ def evaluator_base( ...@@ -105,9 +105,10 @@ def evaluator_base(
:type weight: LayerOutput. :type weight: LayerOutput.
""" """
# inputs type assertions. # inputs type assertions.
assert isinstance(classification_threshold, float) assert classification_threshold is None or isinstance(
assert isinstance(positive_label, int) classification_threshold, float)
assert isinstance(num_results, int) assert positive_label is None or isinstance(positive_label, int)
assert num_results is None or isinstance(num_results, int)
if not isinstance(input, list): if not isinstance(input, list):
input = [input] input = [input]
...@@ -136,7 +137,7 @@ def classification_error_evaluator( ...@@ -136,7 +137,7 @@ def classification_error_evaluator(
label, label,
name=None, name=None,
weight=None, weight=None,
threshold=0.5): threshold=None):
""" """
Classification Error Evaluator. It will print error rate for classification. Classification Error Evaluator. It will print error rate for classification.
...@@ -253,7 +254,7 @@ def pnpair_evaluator( ...@@ -253,7 +254,7 @@ def pnpair_evaluator(
def precision_recall_evaluator( def precision_recall_evaluator(
input, input,
label, label,
positive_label=-1, positive_label=None,
weight=None, weight=None,
name=None, name=None,
): ):
...@@ -494,7 +495,7 @@ def gradient_printer_evaluator( ...@@ -494,7 +495,7 @@ def gradient_printer_evaluator(
@wrap_name_default() @wrap_name_default()
def maxid_printer_evaluator( def maxid_printer_evaluator(
input, input,
num_results=1, num_results=None,
name=None, name=None,
): ):
""" """
...@@ -518,13 +519,14 @@ def maxid_printer_evaluator( ...@@ -518,13 +519,14 @@ def maxid_printer_evaluator(
""" """
evaluator_base(name=name, evaluator_base(name=name,
type="max_id_printer", type="max_id_printer",
input=input) input=input,
num_results=num_results)
@evaluator(EvaluatorAttribute.FOR_PRINT) @evaluator(EvaluatorAttribute.FOR_PRINT)
@wrap_name_default() @wrap_name_default()
def maxframe_printer_evaluator( def maxframe_printer_evaluator(
input, input,
num_results=1, num_results=None,
name=None, name=None,
): ):
""" """
...@@ -556,9 +558,9 @@ def maxframe_printer_evaluator( ...@@ -556,9 +558,9 @@ def maxframe_printer_evaluator(
@wrap_name_default() @wrap_name_default()
def seqtext_printer_evaluator( def seqtext_printer_evaluator(
input, input,
dict_file="", result_file,
result_file="", dict_file=None,
delimited=True, delimited=None,
name=None, name=None,
): ):
""" """
...@@ -616,6 +618,7 @@ def seqtext_printer_evaluator( ...@@ -616,6 +618,7 @@ def seqtext_printer_evaluator(
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
""" """
assert isinstance(result_file, basestring)
evaluator_base(name=name, evaluator_base(name=name,
type="seq_text_printer", type="seq_text_printer",
input=input, input=input,
......
...@@ -28,7 +28,7 @@ except ImportError: ...@@ -28,7 +28,7 @@ except ImportError:
import copy import copy
__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"identity_projection", "dotmul_projection", "identity_projection", "dotmul_projection", "dotmul_operator",
"table_projection", "mixed_layer", "data_layer", "table_projection", "mixed_layer", "data_layer",
"embedding_layer", "fc_layer", "grumemory", "embedding_layer", "fc_layer", "grumemory",
"pooling_layer", "lstmemory", "last_seq", "first_seq", "pooling_layer", "lstmemory", "last_seq", "first_seq",
...@@ -389,7 +389,7 @@ def identity_projection(input, offset=None): ...@@ -389,7 +389,7 @@ def identity_projection(input, offset=None):
@wrap_param_attr_default() @wrap_param_attr_default()
def dotmul_projection(input, param_attr=None, scale=1): def dotmul_projection(input, param_attr=None, scale=1):
""" """
1. DotMulProjection if input is a layer. DotMulProjection with a layer as input.
It performs element-wise multiplication with weight. It performs element-wise multiplication with weight.
.. math:: .. math::
...@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1): ...@@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1):
proj = dotmul_projection(input=layer) proj = dotmul_projection(input=layer)
2. DotMulOperator if input is a list or tuple.
It takes two inputs, performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_projection(input=[layer1, layer2],
scale=2.0)
:param input: Input layer. :param input: Input layer.
:type input: LayerOutput|list|tuple :type input: LayerOutput
:param param_attr: Parameter config, None if use default. :param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param scale: config scalar, default value is one. :param scale: config scalar, default value is one.
:type scale: float :type scale: float
:return: A DotMulProjection or DotMulOperator Object. :return: A DotMulProjection Object.
:rtype: DotMulProjection or DotMulOperator :rtype: DotMulProjection
""" """
if isinstance(input, LayerOutput): proj = DotMulProjection(input_layer_name=input.name,
proj = DotMulProjection(input_layer_name=input.name,
size=input.size, size=input.size,
**param_attr.attr) **param_attr.attr)
proj.origin = input proj.origin = input
proj.origin.projection = "dot_mul" return proj
return proj
else:
assert isinstance(input, list) or isinstance(input, tuple)
assert len(input) == 2
assert param_attr is None
op = DotMulOperator(input_layer_name=[x.name for x in input],
scale=scale)
op.origin = input
op.origin.operator = "dot_mul"
return op
def dotmul_operator(x, y, scale=1):
"""
DotMulOperator takes two inputs and performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_operator(x, y,
scale=1)
:param input: Input layer
:type input: LayerOutput
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulOperator Object.
:rtype: DotMulOperator
"""
assert isinstance(x, LayerOutput)
assert isinstance(y, LayerOutput)
op = DotMulOperator(input_layer_names=[x.name, y.name],
scale=scale)
op.origin = [x, y]
return op
@wrap_bias_attr_default(['padding_attr']) @wrap_bias_attr_default(['padding_attr'])
def context_projection(input, context_len, context_start=None, def context_projection(input, context_len, context_start=None,
...@@ -539,7 +536,10 @@ class MixedLayerType(LayerOutput): ...@@ -539,7 +536,10 @@ class MixedLayerType(LayerOutput):
if not self.finalized: if not self.finalized:
assert isinstance(other, Projection) or isinstance(other, Operator) assert isinstance(other, Projection) or isinstance(other, Operator)
self.inputs.append(other) self.inputs.append(other)
self.parents.append(other.origin) if isinstance(other, Projection):
self.parents.append(other.origin)
else:
self.parents.extend(other.origin)
return self return self
else: else:
raise MixedLayerType.AddToSealedMixedLayerException() raise MixedLayerType.AddToSealedMixedLayerException()
...@@ -565,7 +565,7 @@ class MixedLayerType(LayerOutput): ...@@ -565,7 +565,7 @@ class MixedLayerType(LayerOutput):
@wrap_act_default(act=LinearActivation()) @wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@layer_support(ERROR_CLIPPING, DROPOUT) @layer_support(ERROR_CLIPPING, DROPOUT)
def mixed_layer(size, input=None, name=None, act=None, bias_attr=False, def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
layer_attr=None): layer_attr=None):
""" """
Mixed Layer. A mixed layer will add all inputs together, then activate. Mixed Layer. A mixed layer will add all inputs together, then activate.
......
...@@ -79,7 +79,7 @@ class MomentumOptimizer(BaseSGDOptimizer): ...@@ -79,7 +79,7 @@ class MomentumOptimizer(BaseSGDOptimizer):
'learning_method': 'momentum' 'learning_method': 'momentum'
} }
def __init__(self, momentum=1e-3): def __init__(self, momentum=None):
self.momentum = momentum self.momentum = momentum
......
...@@ -38,8 +38,11 @@ print_layer(input=[out]) ...@@ -38,8 +38,11 @@ print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes))) outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1),
dotmul_projection(input=y1)])
# for ctc # for ctc
tmp = fc_layer(input=x1, tmp = fc_layer(input=[x1, dotmul],
size=num_classes + 1, size=num_classes + 1,
act=SoftmaxActivation()) act=SoftmaxActivation())
ctc = ctc_layer(input=tmp, ctc = ctc_layer(input=tmp,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册