Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
37806792
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
37806792
编写于
3月 06, 2017
作者:
Q
qiaolongfei
浏览文件
操作
浏览文件
下载
差异文件
fix conflict
上级
06915d0a
ca62c104
变更
38
隐藏空白更改
内联
并排
Showing
38 changed file
with
793 addition
and
227 deletion
+793
-227
demo/image_classification/api_v2_train.py
demo/image_classification/api_v2_train.py
+7
-6
demo/introduction/api_train_v2.py
demo/introduction/api_train_v2.py
+11
-11
demo/mnist/.gitignore
demo/mnist/.gitignore
+3
-0
demo/mnist/api_train_v2.py
demo/mnist/api_train_v2.py
+19
-6
demo/semantic_role_labeling/api_train_v2.py
demo/semantic_role_labeling/api_train_v2.py
+3
-3
demo/sentiment/train_v2.py
demo/sentiment/train_v2.py
+9
-14
demo/seqToseq/api_train_v2.py
demo/seqToseq/api_train_v2.py
+3
-2
doc/api/index_cn.rst
doc/api/index_cn.rst
+25
-1
doc/api/index_en.rst
doc/api/index_en.rst
+17
-1
doc/api/v2/data.rst
doc/api/v2/data.rst
+93
-0
doc/api/v2/model_configs.rst
doc/api/v2/model_configs.rst
+11
-0
doc/api/v2/run_logic.rst
doc/api/v2/run_logic.rst
+26
-0
doc/design/reader/README.md
doc/design/reader/README.md
+37
-37
doc/howto/usage/k8s/k8s_distributed_cn.md
doc/howto/usage/k8s/k8s_distributed_cn.md
+56
-22
python/paddle/trainer/PyDataProvider2.py
python/paddle/trainer/PyDataProvider2.py
+89
-4
python/paddle/v2/data_feeder.py
python/paddle/v2/data_feeder.py
+40
-31
python/paddle/v2/data_type.py
python/paddle/v2/data_type.py
+10
-6
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+3
-0
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+2
-0
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+4
-2
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+2
-0
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+2
-0
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+21
-0
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+5
-0
python/paddle/v2/dataset/sentiment.py
python/paddle/v2/dataset/sentiment.py
+6
-5
python/paddle/v2/dataset/uci_housing.py
python/paddle/v2/dataset/uci_housing.py
+5
-0
python/paddle/v2/event.py
python/paddle/v2/event.py
+4
-0
python/paddle/v2/inference.py
python/paddle/v2/inference.py
+4
-12
python/paddle/v2/minibatch.py
python/paddle/v2/minibatch.py
+16
-10
python/paddle/v2/optimizer.py
python/paddle/v2/optimizer.py
+8
-3
python/paddle/v2/parameters.py
python/paddle/v2/parameters.py
+76
-1
python/paddle/v2/reader/__init__.py
python/paddle/v2/reader/__init__.py
+57
-8
python/paddle/v2/reader/creator.py
python/paddle/v2/reader/creator.py
+5
-1
python/paddle/v2/reader/decorator.py
python/paddle/v2/reader/decorator.py
+25
-9
python/paddle/v2/tests/run_tests.sh
python/paddle/v2/tests/run_tests.sh
+1
-1
python/paddle/v2/tests/test_parameters.py
python/paddle/v2/tests/test_parameters.py
+60
-0
python/paddle/v2/tests/test_topology.py
python/paddle/v2/tests/test_topology.py
+3
-2
python/paddle/v2/trainer.py
python/paddle/v2/trainer.py
+25
-29
未找到文件。
demo/image_classification/api_v2_train.py
浏览文件 @
37806792
...
@@ -13,8 +13,9 @@
...
@@ -13,8 +13,9 @@
# limitations under the License
# limitations under the License
import
sys
import
sys
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
from
api_v2_vgg
import
vgg_bn_drop
from
api_v2_resnet
import
resnet_cifar10
from
api_v2_resnet
import
resnet_cifar10
...
@@ -23,7 +24,7 @@ def main():
...
@@ -23,7 +24,7 @@ def main():
classdim
=
10
classdim
=
10
# PaddlePaddle init
# PaddlePaddle init
paddle
.
init
(
use_gpu
=
Tru
e
,
trainer_count
=
1
)
paddle
.
init
(
use_gpu
=
Fals
e
,
trainer_count
=
1
)
image
=
paddle
.
layer
.
data
(
image
=
paddle
.
layer
.
data
(
name
=
"image"
,
type
=
paddle
.
data_type
.
dense_vector
(
datadim
))
name
=
"image"
,
type
=
paddle
.
data_type
.
dense_vector
(
datadim
))
...
@@ -68,8 +69,8 @@ def main():
...
@@ -68,8 +69,8 @@ def main():
result
=
trainer
.
test
(
result
=
trainer
.
test
(
reader
=
paddle
.
batch
(
reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
(),
batch_size
=
128
),
paddle
.
dataset
.
cifar
.
test10
(),
batch_size
=
128
),
reader_dict
=
{
'image'
:
0
,
feeding
=
{
'image'
:
0
,
'label'
:
1
})
'label'
:
1
})
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
# Create trainer
# Create trainer
...
@@ -83,8 +84,8 @@ def main():
...
@@ -83,8 +84,8 @@ def main():
batch_size
=
128
),
batch_size
=
128
),
num_passes
=
5
,
num_passes
=
5
,
event_handler
=
event_handler
,
event_handler
=
event_handler
,
reader_dict
=
{
'image'
:
0
,
feeding
=
{
'image'
:
0
,
'label'
:
1
})
'label'
:
1
})
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
demo/introduction/api_train_v2.py
浏览文件 @
37806792
...
@@ -30,26 +30,26 @@ def main():
...
@@ -30,26 +30,26 @@ def main():
def
event_handler
(
event
):
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d, Batch %d, Cost %f
, %s
"
%
(
print
"Pass %d, Batch %d, Cost %f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
)
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
if
(
event
.
pass_id
+
1
)
%
10
==
0
:
reader
=
paddle
.
reader
.
batched
(
result
=
trainer
.
test
(
uci_housing
.
test
(),
batch_size
=
2
),
reader
=
paddle
.
batch
(
reader_dict
=
{
'x'
:
0
,
uci_housing
.
test
(),
batch_size
=
2
),
feeding
=
{
'x'
:
0
,
'y'
:
1
})
'y'
:
1
})
if
event
.
pass_id
%
10
==
0
:
print
"Test %d, %.2f"
%
(
event
.
pass_id
,
result
.
cost
)
print
"Test %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
# training
# training
trainer
.
train
(
trainer
.
train
(
reader
=
paddle
.
reader
.
batched
(
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
uci_housing
.
train
(),
buf_size
=
500
),
uci_housing
.
train
(),
buf_size
=
500
),
batch_size
=
2
),
batch_size
=
2
),
reader_dict
=
{
'x'
:
0
,
feeding
=
{
'x'
:
0
,
'y'
:
1
},
'y'
:
1
},
event_handler
=
event_handler
,
event_handler
=
event_handler
,
num_passes
=
30
)
num_passes
=
30
)
...
...
demo/mnist/.gitignore
浏览文件 @
37806792
...
@@ -5,3 +5,6 @@ plot.png
...
@@ -5,3 +5,6 @@ plot.png
train.log
train.log
*pyc
*pyc
.ipynb_checkpoints
.ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
demo/mnist/api_train_v2.py
浏览文件 @
37806792
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
gzip
def
softmax_regression
(
img
):
def
softmax_regression
(
img
):
...
@@ -71,7 +72,11 @@ def main():
...
@@ -71,7 +72,11 @@ def main():
cost
=
paddle
.
layer
.
classification_cost
(
input
=
predict
,
label
=
label
)
cost
=
paddle
.
layer
.
classification_cost
(
input
=
predict
,
label
=
label
)
parameters
=
paddle
.
parameters
.
create
(
cost
)
try
:
with
gzip
.
open
(
'params.tar.gz'
,
'r'
)
as
f
:
parameters
=
paddle
.
parameters
.
Parameters
.
from_tar
(
f
)
except
IOError
:
parameters
=
paddle
.
parameters
.
create
(
cost
)
optimizer
=
paddle
.
optimizer
.
Momentum
(
optimizer
=
paddle
.
optimizer
.
Momentum
(
learning_rate
=
0.1
/
128.0
,
learning_rate
=
0.1
/
128.0
,
...
@@ -86,11 +91,19 @@ def main():
...
@@ -86,11 +91,19 @@ def main():
def
event_handler
(
event
):
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
if
event
.
batch_id
%
1000
==
0
:
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
result
=
trainer
.
test
(
reader
=
paddle
.
batch
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
256
))
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
reader
=
paddle
.
reader
.
batched
(
print
"Pass %d, Batch %d, Cost %f, %s, Testing metrics %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
,
result
.
metrics
)
with
gzip
.
open
(
'params.tar.gz'
,
'w'
)
as
f
:
parameters
.
to_tar
(
f
)
elif
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
128
))
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
128
))
print
"Test with Pass %d, Cost %f, %s
\n
"
%
(
print
"Test with Pass %d, Cost %f, %s
\n
"
%
(
event
.
pass_id
,
result
.
cost
,
result
.
metrics
)
event
.
pass_id
,
result
.
cost
,
result
.
metrics
)
...
...
demo/semantic_role_labeling/api_train_v2.py
浏览文件 @
37806792
...
@@ -163,11 +163,11 @@ def main():
...
@@ -163,11 +163,11 @@ def main():
update_equation
=
optimizer
)
update_equation
=
optimizer
)
parameters
.
set
(
'emb'
,
load_parameter
(
conll05
.
get_embedding
(),
44068
,
32
))
parameters
.
set
(
'emb'
,
load_parameter
(
conll05
.
get_embedding
(),
44068
,
32
))
trn_reader
=
paddle
.
reader
.
batched
(
trn_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
10
)
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
10
)
reader_dict
=
{
feeding
=
{
'word_data'
:
0
,
'word_data'
:
0
,
'ctx_n2_data'
:
1
,
'ctx_n2_data'
:
1
,
'ctx_n1_data'
:
2
,
'ctx_n1_data'
:
2
,
...
@@ -183,7 +183,7 @@ def main():
...
@@ -183,7 +183,7 @@ def main():
reader
=
trn_reader
,
reader
=
trn_reader
,
event_handler
=
event_handler
,
event_handler
=
event_handler
,
num_passes
=
10000
,
num_passes
=
10000
,
reader_dict
=
reader_dict
)
feeding
=
feeding
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
demo/sentiment/train_v2.py
浏览文件 @
37806792
...
@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling
...
@@ -18,11 +18,7 @@ from paddle.trainer_config_helpers.poolings import MaxPooling
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
def
convolution_net
(
input_dim
,
def
convolution_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
128
,
hid_dim
=
128
):
class_dim
=
2
,
emb_dim
=
128
,
hid_dim
=
128
,
is_predict
=
False
):
data
=
paddle
.
layer
.
data
(
"word"
,
data
=
paddle
.
layer
.
data
(
"word"
,
paddle
.
data_type
.
integer_value_sequence
(
input_dim
))
paddle
.
data_type
.
integer_value_sequence
(
input_dim
))
emb
=
paddle
.
layer
.
embedding
(
input
=
data
,
size
=
emb_dim
)
emb
=
paddle
.
layer
.
embedding
(
input
=
data
,
size
=
emb_dim
)
...
@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim,
...
@@ -42,8 +38,7 @@ def stacked_lstm_net(input_dim,
class_dim
=
2
,
class_dim
=
2
,
emb_dim
=
128
,
emb_dim
=
128
,
hid_dim
=
512
,
hid_dim
=
512
,
stacked_num
=
3
,
stacked_num
=
3
):
is_predict
=
False
):
"""
"""
A Wrapper for sentiment classification task.
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
This network uses bi-directional recurrent network,
...
@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim,
...
@@ -110,7 +105,7 @@ def stacked_lstm_net(input_dim,
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
# init
# init
paddle
.
init
(
use_gpu
=
Tru
e
,
trainer_count
=
4
)
paddle
.
init
(
use_gpu
=
Fals
e
,
trainer_count
=
4
)
# network config
# network config
print
'load dictionary...'
print
'load dictionary...'
...
@@ -143,11 +138,11 @@ if __name__ == '__main__':
...
@@ -143,11 +138,11 @@ if __name__ == '__main__':
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
result
=
trainer
.
test
(
reader
=
paddle
.
reader
.
batched
(
reader
=
paddle
.
batch
(
lambda
:
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
lambda
:
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
128
),
batch_size
=
128
),
reader_dict
=
{
'word'
:
0
,
feeding
=
{
'word'
:
0
,
'label'
:
1
})
'label'
:
1
})
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
# create trainer
# create trainer
...
@@ -156,11 +151,11 @@ if __name__ == '__main__':
...
@@ -156,11 +151,11 @@ if __name__ == '__main__':
update_equation
=
adam_optimizer
)
update_equation
=
adam_optimizer
)
trainer
.
train
(
trainer
.
train
(
reader
=
paddle
.
reader
.
batched
(
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
lambda
:
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
1000
),
lambda
:
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
1000
),
batch_size
=
100
),
batch_size
=
100
),
event_handler
=
event_handler
,
event_handler
=
event_handler
,
reader_dict
=
{
'word'
:
0
,
feeding
=
{
'word'
:
0
,
'label'
:
1
},
'label'
:
1
},
num_passes
=
10
)
num_passes
=
10
)
demo/seqToseq/api_train_v2.py
浏览文件 @
37806792
...
@@ -110,11 +110,12 @@ def main():
...
@@ -110,11 +110,12 @@ def main():
update_equation
=
optimizer
)
update_equation
=
optimizer
)
# define data reader
# define data reader
reader_dict
=
{
feeding
=
{
'source_language_word'
:
0
,
'source_language_word'
:
0
,
'target_language_word'
:
1
,
'target_language_word'
:
1
,
'target_language_next_word'
:
2
'target_language_next_word'
:
2
}
}
wmt14_reader
=
paddle
.
batch
(
wmt14_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
train
(
dict_size
=
dict_size
),
buf_size
=
8192
),
paddle
.
dataset
.
wmt14
.
train
(
dict_size
=
dict_size
),
buf_size
=
8192
),
...
@@ -132,7 +133,7 @@ def main():
...
@@ -132,7 +133,7 @@ def main():
reader
=
wmt14_reader
,
reader
=
wmt14_reader
,
event_handler
=
event_handler
,
event_handler
=
event_handler
,
num_passes
=
10000
,
num_passes
=
10000
,
reader_dict
=
reader_dict
)
feeding
=
feeding
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
doc/api/index_cn.rst
浏览文件 @
37806792
API
API
===
===
\ No newline at end of file
模型配置 API
------------
.. toctree::
:maxdepth: 1
v2/model_configs.rst
数据 API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
训练 API
--------
.. toctree::
:maxdepth: 1
v2/run_logic.rst
\ No newline at end of file
doc/api/index_en.rst
浏览文件 @
37806792
...
@@ -7,4 +7,20 @@ Model Config API
...
@@ -7,4 +7,20 @@ Model Config API
.. toctree::
.. toctree::
:maxdepth: 1
:maxdepth: 1
v2/model_configs.rst
v2/model_configs.rst
\ No newline at end of file
Data API
--------
.. toctree::
:maxdepth: 1
v2/data.rst
Train API
---------
.. toctree::
:maxdepth: 1
v2/run_logic.rst
\ No newline at end of file
doc/api/v2/data.rst
0 → 100644
浏览文件 @
37806792
================
Data Related API
================
#########
DataTypes
#########
.. automodule:: paddle.v2.data_type
:members:
##########
DataFeeder
##########
.. automodule:: paddle.v2.data_feeder
:members:
######
Reader
######
.. automodule:: paddle.v2.reader
:members:
.. automodule:: paddle.v2.reader.creator
:members:
#########
minibatch
#########
.. automodule:: paddle.v2.minibatch
:members:
#######
Dataset
#######
.. automodule:: paddle.v2.dataset
:members:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
doc/api/v2/model_configs.rst
浏览文件 @
37806792
#########################
Configuration Related API
#########################
======
======
Layers
Layers
======
======
...
@@ -33,3 +37,10 @@ Networks
...
@@ -33,3 +37,10 @@ Networks
.. automodule:: paddle.v2.networks
.. automodule:: paddle.v2.networks
:members:
:members:
==========
Optimizers
==========
.. automodule:: paddle.v2.optimizer
:members:
doc/api/v2/run_logic.rst
0 → 100644
浏览文件 @
37806792
###########
Trainer API
###########
==========
Parameters
==========
.. automodule:: paddle.v2.parameters
:members:
=======
Trainer
=======
.. automodule:: paddle.v2.trainer
:members:
=====
Event
=====
.. automodule:: paddle.v2.event
:members:
doc/design/reader/README.md
浏览文件 @
37806792
...
@@ -23,19 +23,19 @@ An example implementation for single item data reader creator:
...
@@ -23,19 +23,19 @@ An example implementation for single item data reader creator:
```
python
```
python
def
reader_creator_random_image
(
width
,
height
):
def
reader_creator_random_image
(
width
,
height
):
def
reader
():
def
reader
():
while
True
:
while
True
:
yield
numpy
.
random
.
uniform
(
-
1
,
1
,
size
=
width
*
height
)
yield
numpy
.
random
.
uniform
(
-
1
,
1
,
size
=
width
*
height
)
return
reader
return
reader
```
```
An example implementation for multiple item data reader creator:
An example implementation for multiple item data reader creator:
```
python
```
python
def
reader_creator_random_image
and_label
(
widht
,
height
,
label
):
def
reader_creator_random_image
_and_label
(
width
,
height
,
label
):
def
reader
():
def
reader
():
while
True
:
while
True
:
yield
numpy
.
random
.
uniform
(
-
1
,
1
,
size
=
width
*
height
),
label
yield
numpy
.
random
.
uniform
(
-
1
,
1
,
size
=
width
*
height
),
label
return
reader
return
reader
```
```
## Batch Reader Interface
## Batch Reader Interface
...
@@ -74,11 +74,11 @@ mnist_train_batch_reader = paddle.batch(mnist_train, 128)
...
@@ -74,11 +74,11 @@ mnist_train_batch_reader = paddle.batch(mnist_train, 128)
Also easy to create custom batch reader:
Also easy to create custom batch reader:
```
python
```
python
def
custom_batch_reader
():
def
custom_batch_reader
():
while
True
:
while
True
:
batch
=
[]
batch
=
[]
for
i
in
xrange
(
128
):
for
i
in
xrange
(
128
):
batch
.
append
((
numpy
.
random
.
uniform
(
-
1
,
1
,
28
*
28
),))
# note that it's a tuple being appended.
batch
.
append
((
numpy
.
random
.
uniform
(
-
1
,
1
,
28
*
28
),))
# note that it's a tuple being appended.
yield
batch
yield
batch
mnist_random_image_batch_reader
=
custom_batch_reader
mnist_random_image_batch_reader
=
custom_batch_reader
```
```
...
@@ -123,16 +123,16 @@ We can do:
...
@@ -123,16 +123,16 @@ We can do:
```
python
```
python
def
reader_creator_random_image
(
width
,
height
):
def
reader_creator_random_image
(
width
,
height
):
def
reader
():
def
reader
():
while
True
:
while
True
:
yield
numpy
.
random
.
uniform
(
-
1
,
1
,
size
=
width
*
height
)
yield
numpy
.
random
.
uniform
(
-
1
,
1
,
size
=
width
*
height
)
return
reader
return
reader
def
reader_creator_bool
(
t
):
def
reader_creator_bool
(
t
):
def
reader
:
def
reader
:
while
True
:
while
True
:
yield
t
yield
t
return
reader
return
reader
true_reader
=
reader_creator_bool
(
True
)
true_reader
=
reader_creator_bool
(
True
)
false_reader
=
reader_creator_bool
(
False
)
false_reader
=
reader_creator_bool
(
False
)
...
@@ -172,18 +172,18 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
...
@@ -172,18 +172,18 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
```
python
```
python
def
image_reader_creator
(
image_path
,
label_path
,
n
):
def
image_reader_creator
(
image_path
,
label_path
,
n
):
def
reader
():
def
reader
():
f
=
open
(
image_path
)
f
=
open
(
image_path
)
l
=
open
(
label_path
)
l
=
open
(
label_path
)
images
=
numpy
.
fromfile
(
images
=
numpy
.
fromfile
(
f
,
'ubyte'
,
count
=
n
*
28
*
28
).
reshape
((
n
,
28
*
28
)).
astype
(
'float32'
)
f
,
'ubyte'
,
count
=
n
*
28
*
28
).
reshape
((
n
,
28
*
28
)).
astype
(
'float32'
)
images
=
images
/
255.0
*
2.0
-
1.0
images
=
images
/
255.0
*
2.0
-
1.0
labels
=
numpy
.
fromfile
(
l
,
'ubyte'
,
count
=
n
).
astype
(
"int"
)
labels
=
numpy
.
fromfile
(
l
,
'ubyte'
,
count
=
n
).
astype
(
"int"
)
for
i
in
xrange
(
n
):
for
i
in
xrange
(
n
):
yield
images
[
i
,
:],
labels
[
i
]
# a single entry of data is created each time
yield
images
[
i
,
:],
labels
[
i
]
# a single entry of data is created each time
f
.
close
()
f
.
close
()
l
.
close
()
l
.
close
()
return
reader
return
reader
# images_reader_creator creates a reader
# images_reader_creator creates a reader
reader
=
image_reader_creator
(
"/path/to/image_file"
,
"/path/to/label_file"
,
1024
)
reader
=
image_reader_creator
(
"/path/to/image_file"
,
"/path/to/label_file"
,
1024
)
...
@@ -196,7 +196,7 @@ An example implementation of paddle.train could be:
...
@@ -196,7 +196,7 @@ An example implementation of paddle.train could be:
```
python
```
python
def
train
(
batch_reader
,
mapping
,
batch_size
,
total_pass
):
def
train
(
batch_reader
,
mapping
,
batch_size
,
total_pass
):
for
pass_idx
in
range
(
total_pass
):
for
pass_idx
in
range
(
total_pass
):
for
mini_batch
in
batch_reader
():
# this loop will never end in online learning.
for
mini_batch
in
batch_reader
():
# this loop will never end in online learning.
do_forward_backward
(
mini_batch
,
mapping
)
do_forward_backward
(
mini_batch
,
mapping
)
```
```
doc/howto/usage/k8s/k8s_distributed_cn.md
浏览文件 @
37806792
...
@@ -43,22 +43,55 @@ docker push [YOUR_REPO]/paddle:mypaddle
...
@@ -43,22 +43,55 @@ docker push [YOUR_REPO]/paddle:mypaddle
注意上述命令中
`[YOUR_REPO]`
表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用
`[YOUR_REPO]/paddle:mypaddle`
这个地址来表示此步骤所构建出的镜像。
注意上述命令中
`[YOUR_REPO]`
表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用
`[YOUR_REPO]/paddle:mypaddle`
这个地址来表示此步骤所构建出的镜像。
###
上传训练文件
###
准备训练数据
本文使用PaddlePaddle官方的
[
recommendation demo
](
http://www.paddlepaddle.org/doc/demo/index.html#recommendation
)
作为这次训练的内容,我们将训练文件与数据放在一个job name命名的目录中,上传到volume所在的共享存储(使用不同分布式存储会有不同的挂载方式,需要要先挂载这个目录,然后拷贝数据)。完成后volume中的文件内容大致如下:
这里我们通过在Kubernetes集群上启动一个Job来下载并切割数据,也可以通过修改
[
k8s_train
](
./src/k8s_train/README.md
)
的内容来定制image.
```
bash
在启动Job之前,需要根据不同的分布式存储来绑定一个
[
persistentVolumeClaim
](
https://kubernetes.io/docs/user-guide/persistent-volumes/
)
,生成的数据将会存储在这个volume下.
[
root@paddle-kubernetes-node0 mfs]# tree
-d
```
yaml
apiVersion
:
batch/v1
kind
:
Job
metadata
:
name
:
paddle-data
spec
:
template
:
metadata
:
name
:
pi
spec
:
hostNetwork
:
true
containers
:
-
name
:
paddle-data
image
:
paddledev/paddle-tutorial:k8s_data
imagePullPolicy
:
Always
volumeMounts
:
-
mountPath
:
"
/mnt"
name
:
nfs
env
:
-
name
:
OUT_DIR
value
:
/home/work/mfs/paddle-cluster-job
-
name
:
SPLIT_COUNT
value
:
"
3"
volumes
:
-
name
:
nfs
persistentVolumeClaim
:
claimName
:
mfs
restartPolicy
:
Never
```
完成后volume中的文件内容大致如下:
```
base
[root@paddle-kubernetes-node0 nfsdir]$ tree -d
.
.
└──
paddle-cluster-job
`--
paddle-cluster-job
├── data
|-- 0
│ ├── 0
| `-- data
│ │
|-- 1
│ ├── 1
| `-- data
│ │
|-- 2
│ └── 2
| `-- data
├──
output
|--
output
└── recommendation
|-- quick_start
```
```
目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。
目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。
...
@@ -118,15 +151,16 @@ spec:
...
@@ -118,15 +151,16 @@ spec:
`env`
字段表示容器的环境变量,我们将
`paddle`
运行的一些参数通过这种方式传递到容器内。
`env`
字段表示容器的环境变量,我们将
`paddle`
运行的一些参数通过这种方式传递到容器内。
`JOB_PATH`
表示共享存储挂载的路径,
`JOB_NAME`
表示job名字,
`TRAIN_CONFIG_DIR`
表示本次训练文件所在目录,这三个变量组合就可以找到本次训练需要的文件路径。
环境变量 | 说明
--- | ---
`CONF_PADDLE_NIC`
表示
`paddle pserver`
进程需要的
`--nics`
参数,即网卡名
JOB_PATH | 共享存储挂在的路径
JOB_NAME | Job的名字
`CONF_PADDLE_PORT`
表示
`paddle pserver`
的
`--port`
参数,
`CONF_PADDLE_PORTS_NUM`
则表示稠密更新的端口数量,也就是
`--ports_num`
参数。
TRAIN_CONFIG_DIR | 本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径
CONF_PADDLE_NIC |
`paddle pserver`
进程需要的
`--nics`
参数,即网卡名
`CONF_PADDLE_PORTS_NUM_SPARSE`
表示稀疏更新的端口数量,也就是
`--ports_num_for_sparse`
参数。
CONF_PADDLE_PORT |
`paddle paserver`
的
`--port`
参数
CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量,即
`--ports_num`
参数
`CONF_PADDLE_GRADIENT_NUM`
表示训练节点数量,即
`--num_gradient_servers`
参数
CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量,即
`--ports_num_for_sparse`
参数
CONF_PADDLE_GRADIENT_NUM | 训练节点数量,即
`--num_gradient_servers参数`
这些参数的具体描述,读者可以查看
[
这里
](
http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication
)
。
这些参数的具体描述,读者可以查看
[
这里
](
http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication
)
。
...
...
python/paddle/trainer/PyDataProvider2.py
浏览文件 @
37806792
...
@@ -45,6 +45,23 @@ class CacheType(object):
...
@@ -45,6 +45,23 @@ class CacheType(object):
class
InputType
(
object
):
class
InputType
(
object
):
"""
InputType is the base class for paddle input types.
.. note::
this is a base class, and should never be used by user.
:param dim: dimension of input. If the input is an integer, it means the
value range. Otherwise, it means the size of layer.
:type dim: int
:param seq_type: sequence type of input. 0 means it is not a sequence. 1
means it is a variable length sequence. 2 means it is a
nested sequence.
:type seq_type: int
:param type: data type of input.
:type type: int
"""
__slots__
=
[
'dim'
,
'seq_type'
,
'type'
]
__slots__
=
[
'dim'
,
'seq_type'
,
'type'
]
def
__init__
(
self
,
dim
,
seq_type
,
tp
):
def
__init__
(
self
,
dim
,
seq_type
,
tp
):
...
@@ -54,20 +71,61 @@ class InputType(object):
...
@@ -54,20 +71,61 @@ class InputType(object):
def
dense_slot
(
dim
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
def
dense_slot
(
dim
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
"""
Dense Vector. It means the input feature is dense float vector. For example,
if the input is an image with 28*28 pixels, the input of Paddle neural
network should be a dense vector with dimension 784.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return
InputType
(
dim
,
seq_type
,
DataType
.
Dense
)
return
InputType
(
dim
,
seq_type
,
DataType
.
Dense
)
def
sparse_non_value_slot
(
dim
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
def
sparse_non_value_slot
(
dim
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
"""
Sparse binary vector. It means the input feature is a sparse vector and the
every element in this vector is either zero or one.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return
InputType
(
dim
,
seq_type
,
DataType
.
SparseNonValue
)
return
InputType
(
dim
,
seq_type
,
DataType
.
SparseNonValue
)
def
sparse_value_slot
(
dim
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
def
sparse_value_slot
(
dim
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
"""
Sparse vector. It means the input feature is a sparse vector. Most of the
elements in this vector are zero, others could be any float value.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return
InputType
(
dim
,
seq_type
,
DataType
.
SparseValue
)
return
InputType
(
dim
,
seq_type
,
DataType
.
SparseValue
)
def
index_slot
(
value_range
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
def
index_slot
(
value_range
,
seq_type
=
SequenceType
.
NO_SEQUENCE
):
"""Data type of integer.
"""
Data type of integer.
:param seq_type: sequence type of this input.
:type seq_type: int
:param value_range: range of this integer.
:param value_range: range of this integer.
:type value_range: int
:return: An input type object
:rtype: InputType
"""
"""
return
InputType
(
value_range
,
seq_type
,
DataType
.
Index
)
return
InputType
(
value_range
,
seq_type
,
DataType
.
Index
)
...
@@ -76,10 +134,17 @@ dense_vector = dense_slot
...
@@ -76,10 +134,17 @@ dense_vector = dense_slot
sparse_binary_vector
=
sparse_non_value_slot
sparse_binary_vector
=
sparse_non_value_slot
sparse_vector
=
sparse_value_slot
sparse_vector
=
sparse_value_slot
integer_value
=
index_slot
integer_value
=
index_slot
integer_value
.
__doc__
=
index_slot
.
__doc__
def
dense_vector_sequence
(
dim
):
def
dense_vector_sequence
(
dim
):
"""
Data type of a sequence of dense vector.
:param dim: dimension of dense vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return
dense_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
return
dense_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
...
@@ -88,6 +153,15 @@ def dense_vector_sub_sequence(dim):
...
@@ -88,6 +153,15 @@ def dense_vector_sub_sequence(dim):
def
sparse_binary_vector_sequence
(
dim
):
def
sparse_binary_vector_sequence
(
dim
):
"""
Data type of a sequence of sparse vector, which every element is either zero
or one.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return
sparse_binary_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
return
sparse_binary_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
...
@@ -96,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
...
@@ -96,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
def
sparse_vector_sequence
(
dim
):
def
sparse_vector_sequence
(
dim
):
"""
Data type of a sequence of sparse vector, which most elements are zero,
others could be any float value.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return
sparse_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
return
sparse_vector
(
dim
,
seq_type
=
SequenceType
.
SEQUENCE
)
...
@@ -104,8 +187,11 @@ def sparse_vector_sub_sequence(dim):
...
@@ -104,8 +187,11 @@ def sparse_vector_sub_sequence(dim):
def
integer_value_sequence
(
value_range
):
def
integer_value_sequence
(
value_range
):
"""Data type of a sequence of integer.
"""
Data type of a sequence of integer.
:param value_range: range of each element.
:param value_range: range of each element.
:type value_range: int
"""
"""
return
integer_value
(
value_range
,
seq_type
=
SequenceType
.
SEQUENCE
)
return
integer_value
(
value_range
,
seq_type
=
SequenceType
.
SEQUENCE
)
...
@@ -115,7 +201,6 @@ def integer_value_sub_sequence(dim):
...
@@ -115,7 +201,6 @@ def integer_value_sub_sequence(dim):
integer_sequence
=
integer_value_sequence
integer_sequence
=
integer_value_sequence
integer_sequence
.
__doc__
=
integer_value_sequence
.
__doc__
class
SingleSlotWrapper
(
object
):
class
SingleSlotWrapper
(
object
):
...
...
python/paddle/v2/data_feeder.py
浏览文件 @
37806792
...
@@ -12,13 +12,20 @@
...
@@ -12,13 +12,20 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
py_paddle
import
swig_paddle
from
py_paddle
import
DataProviderConverter
from
py_paddle
import
DataProviderConverter
import
data_type
import
paddle.trainer.PyDataProvider2
as
pydp2
__all__
=
[
'DataFeeder'
]
__all__
=
[
'DataFeeder'
]
def
default_feeding_map
(
data_types
):
reader_dict
=
dict
()
for
i
,
tp
in
enumerate
(
data_types
):
reader_dict
[
tp
[
0
]]
=
i
return
reader_dict
class
DataFeeder
(
DataProviderConverter
):
class
DataFeeder
(
DataProviderConverter
):
"""
"""
DataFeeder converts the data returned by paddle.reader into a data structure
DataFeeder converts the data returned by paddle.reader into a data structure
...
@@ -29,7 +36,10 @@ class DataFeeder(DataProviderConverter):
...
@@ -29,7 +36,10 @@ class DataFeeder(DataProviderConverter):
to feed it to C++ interface.
to feed it to C++ interface.
The example usage:
The example usage:
.. code-block:: python
data_types = [('image', paddle.data_type.dense_vector(784)),
data_types = [('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
('label', paddle.data_type.integer_value(10))]
reader_dict = {'image':0, 'label':1}
reader_dict = {'image':0, 'label':1}
...
@@ -43,26 +53,35 @@ class DataFeeder(DataProviderConverter):
...
@@ -43,26 +53,35 @@ class DataFeeder(DataProviderConverter):
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ]
# ]
arg = feeder(minibatch_data)
arg = feeder(minibatch_data)
.. note::
This module is for internal use only. Users should use the `reader`
interface.
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type).
:type data_types: list
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type feeding: dict
"""
"""
def
__init__
(
self
,
data_types
,
reader_dict
):
def
__init__
(
self
,
data_types
,
feeding
=
None
):
"""
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type). For example:
[('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
:type data_types: A list of tuple
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict()
"""
self
.
input_names
=
[]
self
.
input_names
=
[]
input_types
=
[]
input_types
=
[]
self
.
reader_dict
=
reader_dict
if
feeding
is
None
:
feeding
=
default_feeding_map
(
data_types
)
self
.
feeding
=
feeding
for
each
in
data_types
:
for
each
in
data_types
:
self
.
input_names
.
append
(
each
[
0
])
self
.
input_names
.
append
(
each
[
0
])
assert
isinstance
(
each
[
1
],
data_type
.
InputType
)
if
not
isinstance
(
each
[
1
],
pydp2
.
InputType
):
raise
TypeError
(
"second item in each data_type should be an "
"InputType"
)
input_types
.
append
(
each
[
1
])
input_types
.
append
(
each
[
1
])
DataProviderConverter
.
__init__
(
self
,
input_types
)
DataProviderConverter
.
__init__
(
self
,
input_types
)
...
@@ -70,22 +89,12 @@ class DataFeeder(DataProviderConverter):
...
@@ -70,22 +89,12 @@ class DataFeeder(DataProviderConverter):
"""
"""
:param dat: A list of mini-batch data. Each sample is a list or tuple
:param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features.
one feature or multiple features.
for example:
[
:type dat: list
([0.2, 0.2], ), # first sample
([0.8, 0.3], ), # second sample
]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List
:param argument: An Arguments object contains this mini-batch data with
:param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is
one or multiple features. The Arguments definition is
in the API.
in the API.
:type argument: swig_paddle.Arguments
:type argument:
py_paddle.
swig_paddle.Arguments
"""
"""
def
reorder_data
(
data
):
def
reorder_data
(
data
):
...
@@ -93,7 +102,7 @@ class DataFeeder(DataProviderConverter):
...
@@ -93,7 +102,7 @@ class DataFeeder(DataProviderConverter):
for
each
in
data
:
for
each
in
data
:
reorder
=
[]
reorder
=
[]
for
name
in
self
.
input_names
:
for
name
in
self
.
input_names
:
reorder
.
append
(
each
[
self
.
reader_dict
[
name
]])
reorder
.
append
(
each
[
self
.
feeding
[
name
]])
retv
.
append
(
reorder
)
retv
.
append
(
reorder
)
return
retv
return
retv
...
...
python/paddle/v2/data_type.py
浏览文件 @
37806792
...
@@ -12,11 +12,15 @@
...
@@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
paddle.trainer.PyDataProvider2
import
\
import
paddle.trainer.PyDataProvider2
as
pydp2
InputType
,
DataType
,
dense_vector
,
sparse_binary_vector
,
\
sparse_vector
,
integer_value
,
integer_value_sequence
__all__
=
[
import_list
=
[
'InputType'
,
'DataType'
,
'dense_vector'
,
'sparse_binary_vector'
,
nm
for
nm
in
dir
(
pydp2
)
'sparse_vector'
,
'integer_value'
,
'integer_value_sequence'
if
'_'
in
nm
and
nm
[
0
]
!=
'_'
and
(
'value'
in
nm
or
'vector'
in
nm
)
]
]
import_list
.
extend
([
'InputType'
])
for
nm
in
import_list
:
globals
()[
nm
]
=
getattr
(
pydp2
,
nm
)
__all__
=
import_list
python/paddle/v2/dataset/__init__.py
浏览文件 @
37806792
...
@@ -11,6 +11,9 @@
...
@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
Dataset package.
"""
import
mnist
import
mnist
import
imikolov
import
imikolov
...
...
python/paddle/v2/dataset/cifar.py
浏览文件 @
37806792
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
# limitations under the License.
# limitations under the License.
"""
"""
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
TODO(yuyang18): Complete the comments.
"""
"""
import
cPickle
import
cPickle
...
...
python/paddle/v2/dataset/conll05.py
浏览文件 @
37806792
...
@@ -16,15 +16,17 @@ import tarfile
...
@@ -16,15 +16,17 @@ import tarfile
import
gzip
import
gzip
import
itertools
import
itertools
from
common
import
download
from
common
import
download
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
"""
"""
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
dataset as an example. Because Conll 2005 is not free in public, the default
dataset as an example. Because Conll 2005 is not free in public, the default
downloaded URL is test set of Conll 2005 (which is public). Users can change
downloaded URL is test set of Conll 2005 (which is public). Users can change
URL and MD5 to their Conll dataset.
URL and MD5 to their Conll dataset.
TODO(yuyang18): Complete comments.
"""
"""
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
WORDDICT_URL
=
'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
WORDDICT_URL
=
'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
...
...
python/paddle/v2/dataset/imdb.py
浏览文件 @
37806792
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
# limitations under the License.
# limitations under the License.
"""
"""
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
TODO(yuyang18): Complete comments.
"""
"""
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
...
...
python/paddle/v2/dataset/imikolov.py
浏览文件 @
37806792
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
# limitations under the License.
# limitations under the License.
"""
"""
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
"""
"""
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
import
tarfile
import
tarfile
...
...
python/paddle/v2/dataset/mnist.py
浏览文件 @
37806792
...
@@ -13,6 +13,9 @@
...
@@ -13,6 +13,9 @@
# limitations under the License.
# limitations under the License.
"""
"""
MNIST dataset.
MNIST dataset.
This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
parse train set and test set into paddle reader creators.
"""
"""
import
paddle.v2.dataset.common
import
paddle.v2.dataset.common
import
subprocess
import
subprocess
...
@@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size):
...
@@ -72,6 +75,15 @@ def reader_creator(image_filename, label_filename, buffer_size):
def
train
():
def
train
():
"""
MNIST train set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Train reader creator
:rtype: callable
"""
return
reader_creator
(
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
TRAIN_IMAGE_URL
,
'mnist'
,
paddle
.
v2
.
dataset
.
common
.
download
(
TRAIN_IMAGE_URL
,
'mnist'
,
TRAIN_IMAGE_MD5
),
TRAIN_IMAGE_MD5
),
...
@@ -80,6 +92,15 @@ def train():
...
@@ -80,6 +92,15 @@ def train():
def
test
():
def
test
():
"""
MNIST test set cretor.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Test reader creator.
:rtype: callable
"""
return
reader_creator
(
return
reader_creator
(
paddle
.
v2
.
dataset
.
common
.
download
(
TEST_IMAGE_URL
,
'mnist'
,
paddle
.
v2
.
dataset
.
common
.
download
(
TEST_IMAGE_URL
,
'mnist'
,
TEST_IMAGE_MD5
),
TEST_IMAGE_MD5
),
...
...
python/paddle/v2/dataset/movielens.py
浏览文件 @
37806792
...
@@ -11,6 +11,11 @@
...
@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
Movielens 1-M dataset.
TODO(yuyang18): Complete comments.
"""
import
zipfile
import
zipfile
from
common
import
download
from
common
import
download
...
...
python/paddle/v2/dataset/sentiment.py
浏览文件 @
37806792
...
@@ -15,18 +15,19 @@
...
@@ -15,18 +15,19 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
"""
The script fetch and preprocess movie_reviews data set
The script fetch and preprocess movie_reviews data set
that provided by NLTK
that provided by NLTK
TODO(yuyang18): Complete dataset.
"""
"""
import
common
import
collections
import
collections
import
nltk
import
numpy
as
np
from
itertools
import
chain
from
itertools
import
chain
import
nltk
from
nltk.corpus
import
movie_reviews
from
nltk.corpus
import
movie_reviews
import
common
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
]
__all__
=
[
'train'
,
'test'
,
'get_word_dict'
]
NUM_TRAINING_INSTANCES
=
1600
NUM_TRAINING_INSTANCES
=
1600
NUM_TOTAL_INSTANCES
=
2000
NUM_TOTAL_INSTANCES
=
2000
...
...
python/paddle/v2/dataset/uci_housing.py
浏览文件 @
37806792
...
@@ -11,6 +11,11 @@
...
@@ -11,6 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
UCI Housing dataset.
TODO(yuyang18): Complete comments.
"""
import
numpy
as
np
import
numpy
as
np
import
os
import
os
...
...
python/paddle/v2/event.py
浏览文件 @
37806792
...
@@ -34,6 +34,10 @@ class WithMetric(object):
...
@@ -34,6 +34,10 @@ class WithMetric(object):
class
TestResult
(
WithMetric
):
class
TestResult
(
WithMetric
):
"""
Result that trainer.test return.
"""
def
__init__
(
self
,
evaluator
,
cost
):
def
__init__
(
self
,
evaluator
,
cost
):
super
(
TestResult
,
self
).
__init__
(
evaluator
)
super
(
TestResult
,
self
).
__init__
(
evaluator
)
self
.
cost
=
cost
self
.
cost
=
cost
...
...
python/paddle/v2/inference.py
浏览文件 @
37806792
...
@@ -21,10 +21,8 @@ class Inference(object):
...
@@ -21,10 +21,8 @@ class Inference(object):
self
.
__gradient_machine__
=
gm
self
.
__gradient_machine__
=
gm
self
.
__data_types__
=
topo
.
data_type
()
self
.
__data_types__
=
topo
.
data_type
()
def
iter_infer
(
self
,
reader
,
reader_dict
=
None
):
def
iter_infer
(
self
,
reader
,
feeding
=
None
):
if
reader_dict
is
None
:
feeder
=
DataFeeder
(
self
.
__data_types__
,
feeding
)
reader_dict
=
self
.
default_reader_dict
()
feeder
=
DataFeeder
(
self
.
__data_types__
,
reader_dict
)
self
.
__gradient_machine__
.
start
()
self
.
__gradient_machine__
.
start
()
for
data_batch
in
reader
():
for
data_batch
in
reader
():
yield
self
.
__gradient_machine__
.
forwardTest
(
feeder
(
data_batch
))
yield
self
.
__gradient_machine__
.
forwardTest
(
feeder
(
data_batch
))
...
@@ -47,13 +45,7 @@ class Inference(object):
...
@@ -47,13 +45,7 @@ class Inference(object):
else
:
else
:
return
retv
return
retv
def
default_reader_dict
(
self
):
reader_dict
=
dict
()
for
i
,
tp
in
enumerate
(
self
.
__data_types__
):
reader_dict
[
tp
[
0
]]
=
i
return
reader_dict
def
infer
(
output
,
parameters
,
reader
,
feeding
=
None
,
field
=
'value'
):
def
infer
(
output
,
parameters
,
reader
,
reader_dict
=
None
,
field
=
'value'
):
inferer
=
Inference
(
output
=
output
,
parameters
=
parameters
)
inferer
=
Inference
(
output
=
output
,
parameters
=
parameters
)
return
inferer
.
infer
(
field
=
field
,
reader
=
reader
,
reader_dict
=
reader_dict
)
return
inferer
.
infer
(
field
=
field
,
reader
=
reader
,
feeding
=
feeding
)
python/paddle/v2/minibatch.py
浏览文件 @
37806792
...
@@ -12,24 +12,30 @@
...
@@ -12,24 +12,30 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
__all__
=
[
'batch'
]
def
batch
(
reader
,
batch_size
):
def
batch
(
reader
,
batch_size
):
"""
"""
Create a batch reader.
Create a batched reader.
:param reader: the data reader to read from.
:param reader: the data reader to read from.
:param batch_size: batch_size
:type reader: callable
:return: the batch reader.
:param batch_size: size of each mini-batch
:type batch_size: int
:return: the batched reader.
:rtype: callable
"""
"""
def
batch_reader
():
def
batch_reader
():
r
=
reader
()
r
=
reader
()
b
atch
=
[]
b
=
[]
for
instance
in
r
:
for
instance
in
r
:
b
atch
.
append
(
instance
)
b
.
append
(
instance
)
if
len
(
b
atch
)
==
batch_size
:
if
len
(
b
)
==
batch_size
:
yield
b
atch
yield
b
b
atch
=
[]
b
=
[]
if
b
atch
:
if
b
:
yield
b
atch
yield
b
return
batch_reader
return
batch_reader
python/paddle/v2/optimizer.py
浏览文件 @
37806792
import
py_paddle.swig_paddle
as
swig_api
import
py_paddle.swig_paddle
as
swig_api
import
paddle.trainer_config_helpers.optimizers
as
v1_optimizers
import
paddle.trainer_config_helpers.config_parser_utils
as
config_parser_utils
import
paddle.trainer_config_helpers.config_parser_utils
as
config_parser_utils
import
paddle.v2
import
paddle.trainer_config_helpers.optimizers
as
v1_optimizers
"""
Optimizers(update equation) for SGD method.
TODO(yuyang18): Complete comments.
"""
__all__
=
[
__all__
=
[
'Momentum'
,
'Adam'
,
'Adamax'
,
'AdaGrad'
,
'DecayedAdaGrad'
,
'AdaDelta'
,
'Momentum'
,
'Adam'
,
'Adamax'
,
'AdaGrad'
,
'DecayedAdaGrad'
,
'AdaDelta'
,
...
@@ -44,7 +49,7 @@ class Optimizer(object):
...
@@ -44,7 +49,7 @@ class Optimizer(object):
class
Momentum
(
Optimizer
):
class
Momentum
(
Optimizer
):
def
__init__
(
self
,
momentum
=
None
,
sparse
=
False
,
**
kwargs
):
def
__init__
(
self
,
momentum
=
None
,
sparse
=
False
,
**
kwargs
):
learning_method
=
v1_optimizers
.
MomentumOptimizer
(
learning_method
=
v1_optimizers
.
MomentumOptimizer
(
momentum
=
None
,
sparse
=
Fal
se
)
momentum
=
momentum
,
sparse
=
spar
se
)
super
(
Momentum
,
self
).
__init__
(
super
(
Momentum
,
self
).
__init__
(
learning_method
=
learning_method
,
**
kwargs
)
learning_method
=
learning_method
,
**
kwargs
)
...
...
python/paddle/v2/parameters.py
浏览文件 @
37806792
import
numpy
as
np
import
numpy
as
np
import
py_paddle.swig_paddle
as
api
import
py_paddle.swig_paddle
as
api
from
paddle.proto.ParameterConfig_pb2
import
ParameterConfig
from
paddle.proto.ParameterConfig_pb2
import
ParameterConfig
import
struct
import
tarfile
import
cStringIO
from
topology
import
Topology
from
topology
import
Topology
__all__
=
[
'Parameters'
,
'create'
]
__all__
=
[
'Parameters'
,
'create'
]
...
@@ -10,6 +12,7 @@ __all__ = ['Parameters', 'create']
...
@@ -10,6 +12,7 @@ __all__ = ['Parameters', 'create']
def
create
(
layers
):
def
create
(
layers
):
"""
"""
Create parameter pool by topology.
Create parameter pool by topology.
:param layers:
:param layers:
:return:
:return:
"""
"""
...
@@ -67,6 +70,7 @@ class Parameters(object):
...
@@ -67,6 +70,7 @@ class Parameters(object):
def
keys
(
self
):
def
keys
(
self
):
"""
"""
keys are the names of each parameter.
keys are the names of each parameter.
:return: list of parameter name
:return: list of parameter name
:rtype: list
:rtype: list
"""
"""
...
@@ -75,6 +79,7 @@ class Parameters(object):
...
@@ -75,6 +79,7 @@ class Parameters(object):
def
names
(
self
):
def
names
(
self
):
"""
"""
names of each parameter.
names of each parameter.
:return: list of parameter name
:return: list of parameter name
:rtype: list
:rtype: list
"""
"""
...
@@ -83,6 +88,7 @@ class Parameters(object):
...
@@ -83,6 +88,7 @@ class Parameters(object):
def
has_key
(
self
,
key
):
def
has_key
(
self
,
key
):
"""
"""
has_key return true if there are such parameter name == key
has_key return true if there are such parameter name == key
:param key: Parameter name
:param key: Parameter name
:type key: basestring
:type key: basestring
:return: True if contains such key
:return: True if contains such key
...
@@ -118,6 +124,12 @@ class Parameters(object):
...
@@ -118,6 +124,12 @@ class Parameters(object):
if
len
(
self
.
__gradient_machines__
)
==
0
:
if
len
(
self
.
__gradient_machines__
)
==
0
:
# create new parameter in python numpy.
# create new parameter in python numpy.
if
len
(
self
.
__tmp_params__
)
!=
0
:
ret_list
=
[
mat
for
name
,
mat
in
self
.
__tmp_params__
if
name
==
key
]
if
len
(
ret_list
)
==
1
:
return
ret_list
[
0
]
return
np
.
ndarray
(
shape
=
shape
,
dtype
=
np
.
float32
)
return
np
.
ndarray
(
shape
=
shape
,
dtype
=
np
.
float32
)
else
:
else
:
for
each_gradient_machine
in
self
.
__gradient_machines__
:
for
each_gradient_machine
in
self
.
__gradient_machines__
:
...
@@ -136,6 +148,7 @@ class Parameters(object):
...
@@ -136,6 +148,7 @@ class Parameters(object):
def
get_shape
(
self
,
key
):
def
get_shape
(
self
,
key
):
"""
"""
get shape of the parameter.
get shape of the parameter.
:param key: parameter name
:param key: parameter name
:type key: basestring
:type key: basestring
:return: parameter's shape
:return: parameter's shape
...
@@ -190,6 +203,7 @@ class Parameters(object):
...
@@ -190,6 +203,7 @@ class Parameters(object):
def
set
(
self
,
parameter_name
,
value
):
def
set
(
self
,
parameter_name
,
value
):
"""
"""
Set parameter by parameter name & matrix.
Set parameter by parameter name & matrix.
:param parameter_name: parameter name
:param parameter_name: parameter name
:type parameter_name: basestring
:type parameter_name: basestring
:param value: parameter matrix
:param value: parameter matrix
...
@@ -222,6 +236,67 @@ class Parameters(object):
...
@@ -222,6 +236,67 @@ class Parameters(object):
self
.
__gradient_machines__
.
append
(
gradient_machine
)
self
.
__gradient_machines__
.
append
(
gradient_machine
)
def
serialize
(
self
,
name
,
f
):
"""
:param name:
:param f:
:type f: file
:return:
"""
param
=
self
.
get
(
name
)
size
=
reduce
(
lambda
a
,
b
:
a
*
b
,
param
.
shape
)
f
.
write
(
struct
.
pack
(
"IIQ"
,
0
,
4
,
size
))
param
=
param
.
astype
(
np
.
float32
)
f
.
write
(
param
.
tobytes
())
def
deserialize
(
self
,
name
,
f
):
"""
:param name:
:param f:
:type f: file
:return:
"""
f
.
read
(
16
)
# header
arr
=
np
.
frombuffer
(
f
.
read
(),
dtype
=
np
.
float32
)
self
.
set
(
name
,
arr
.
reshape
(
self
.
get_shape
(
name
)))
def
to_tar
(
self
,
f
):
tar
=
tarfile
.
TarFile
(
fileobj
=
f
,
mode
=
'w'
)
for
nm
in
self
.
names
():
buf
=
cStringIO
.
StringIO
()
self
.
serialize
(
nm
,
buf
)
tarinfo
=
tarfile
.
TarInfo
(
name
=
nm
)
buf
.
seek
(
0
)
tarinfo
.
size
=
len
(
buf
.
getvalue
())
tar
.
addfile
(
tarinfo
,
buf
)
conf
=
self
.
__param_conf__
[
nm
]
confStr
=
conf
.
SerializeToString
()
tarinfo
=
tarfile
.
TarInfo
(
name
=
"%s.protobuf"
%
nm
)
tarinfo
.
size
=
len
(
confStr
)
buf
=
cStringIO
.
StringIO
(
confStr
)
buf
.
seek
(
0
)
tar
.
addfile
(
tarinfo
,
fileobj
=
buf
)
@
staticmethod
def
from_tar
(
f
):
params
=
Parameters
()
tar
=
tarfile
.
TarFile
(
fileobj
=
f
,
mode
=
'r'
)
for
finfo
in
tar
:
assert
isinstance
(
finfo
,
tarfile
.
TarInfo
)
if
finfo
.
name
.
endswith
(
'.protobuf'
):
f
=
tar
.
extractfile
(
finfo
)
conf
=
ParameterConfig
()
conf
.
ParseFromString
(
f
.
read
())
params
.
__append_config__
(
conf
)
for
param_name
in
params
.
names
():
f
=
tar
.
extractfile
(
param_name
)
params
.
deserialize
(
param_name
,
f
)
return
params
def
__get_parameter_in_gradient_machine__
(
gradient_machine
,
name
):
def
__get_parameter_in_gradient_machine__
(
gradient_machine
,
name
):
"""
"""
...
...
python/paddle/v2/reader/__init__.py
浏览文件 @
37806792
...
@@ -11,15 +11,64 @@
...
@@ -11,15 +11,64 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
At training and testing time, PaddlePaddle programs need to read data. To ease
the users' work to write data reading code, we define that
# It would be too lengthy to require our users to prefix decorators with `decorator`.
- A *reader* is a function that reads data (from file, network, random number
# For example, we want the following line
generator, etc) and yields data items.
#
- A *reader creator* is a function that returns a reader function.
# r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt"))
- A *reader decorator* is a function, which accepts one or more readers, and
#
returns a reader.
# to be a shorter version:
- A *batch reader* is a function that reads data (from *reader*, file, network,
#
random number generator, etc) and yields a batch of data items.
# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt"))
#####################
Data Reader Interface
#####################
Indeed, *data reader* doesn't have to be a function that reads and yields data
items. It can be any function with no parameter that creates a iterable
(anything can be used in :code:`for x in iterable`)\:
.. code-block:: python
iterable = data_reader()
Element produced from the iterable should be a **single** entry of data,
**not** a mini batch. That entry of data could be a single item, or a tuple of
items.
Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider
/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d
array of float32, int, list of int)
An example implementation for single item data reader creator:
.. code-block:: python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
An example implementation for multiple item data reader creator:
.. code-block:: python
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
TODO(yuyang18): Should we add whole design doc here?
"""
import
decorator
from
decorator
import
*
from
decorator
import
*
import
creator
import
creator
__all__
=
decorator
.
__all__
+
[
'creator'
]
python/paddle/v2/reader/creator.py
浏览文件 @
37806792
...
@@ -11,6 +11,10 @@
...
@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
Creator package contains some simple reader creator, which could be used in user
program.
"""
__all__
=
[
'np_array'
,
'text_file'
]
__all__
=
[
'np_array'
,
'text_file'
]
...
@@ -38,7 +42,7 @@ def np_array(x):
...
@@ -38,7 +42,7 @@ def np_array(x):
def
text_file
(
path
):
def
text_file
(
path
):
"""
"""
Creates a data reader that outputs text line by line from given text file.
Creates a data reader that outputs text line by line from given text file.
Trailing new line ('
\n
') of each line will be removed.
Trailing new line ('
\
\\\
n') of each line will be removed.
:path: path of the text file.
:path: path of the text file.
:returns: data reader of text file
:returns: data reader of text file
...
...
python/paddle/v2/reader/decorator.py
浏览文件 @
37806792
...
@@ -28,9 +28,11 @@ def map_readers(func, *readers):
...
@@ -28,9 +28,11 @@ def map_readers(func, *readers):
Creates a data reader that outputs return value of function using
Creates a data reader that outputs return value of function using
output of each data readers as arguments.
output of each data readers as arguments.
:param func: function to use.
:param func: function to use. The type of func should be (Sample) => Sample
:param *readers: readers whose outputs will be used as arguments of func.
:type: callable
:returns: the created data reader.
:param readers: readers whose outputs will be used as arguments of func.
:return: the created data reader.
:rtype: callable
"""
"""
def
reader
():
def
reader
():
...
@@ -45,16 +47,19 @@ def map_readers(func, *readers):
...
@@ -45,16 +47,19 @@ def map_readers(func, *readers):
def
shuffle
(
reader
,
buf_size
):
def
shuffle
(
reader
,
buf_size
):
"""
"""
Creates a data reader whose data output is suffled.
Creates a data reader whose data output is s
h
uffled.
Output from the iterator that created by original reader will be
Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size.
is determined by argument buf_size.
:param reader: the original reader whose output will be shuffled.
:param reader: the original reader whose output will be shuffled.
:type reader: callable
:param buf_size: shuffle buffer size.
:param buf_size: shuffle buffer size.
:type buf_size: int
:returns:the new reader whose output is shuffled.
:return: the new reader whose output is shuffled.
:rtype: callable
"""
"""
def
data_reader
():
def
data_reader
():
...
@@ -88,7 +93,8 @@ def chain(*readers):
...
@@ -88,7 +93,8 @@ def chain(*readers):
[0, 0, 0, 1, 1, 1, 2, 2, 2]
[0, 0, 0, 1, 1, 1, 2, 2, 2]
:param readers: input readers.
:param readers: input readers.
:returns: the new data reader.
:return: the new data reader.
:rtype: callable
"""
"""
def
reader
():
def
reader
():
...
@@ -115,12 +121,13 @@ def compose(*readers, **kwargs):
...
@@ -115,12 +121,13 @@ def compose(*readers, **kwargs):
The composed reader will output:
The composed reader will output:
(1, 2, 3, 4, 5)
(1, 2, 3, 4, 5)
:
*
readers: readers that will be composed together.
:
param
readers: readers that will be composed together.
:check_alignment: if True, will check if input readers are aligned
:
param
check_alignment: if True, will check if input readers are aligned
correctly. If False, will not check alignment and trailing outputs
correctly. If False, will not check alignment and trailing outputs
will be discarded. Defaults to True.
will be discarded. Defaults to True.
:type check_alignment: bool
:return
s
: the new data reader.
:return: the new data reader.
:raises ComposeNotAligned: outputs of readers are not aligned.
:raises ComposeNotAligned: outputs of readers are not aligned.
Will not raise when check_alignment is set to False.
Will not raise when check_alignment is set to False.
...
@@ -161,7 +168,9 @@ def buffered(reader, size):
...
@@ -161,7 +168,9 @@ def buffered(reader, size):
as the buffer is not empty.
as the buffer is not empty.
:param reader: the data reader to read from.
:param reader: the data reader to read from.
:type reader: callable
:param size: max buffer size.
:param size: max buffer size.
:type size: int
:returns: the buffered data reader.
:returns: the buffered data reader.
"""
"""
...
@@ -196,6 +205,13 @@ def buffered(reader, size):
...
@@ -196,6 +205,13 @@ def buffered(reader, size):
def
firstn
(
reader
,
n
):
def
firstn
(
reader
,
n
):
"""
"""
Limit the max number of samples that reader could return.
Limit the max number of samples that reader could return.
:param reader: the data reader to read from.
:type reader: callable
:param n: the max number of samples that return.
:type n: int
:return: the decorated reader.
:rtype: callable
"""
"""
# TODO(yuyang18): Check if just drop the reader, could clean the opened
# TODO(yuyang18): Check if just drop the reader, could clean the opened
...
...
python/paddle/v2/tests/run_tests.sh
浏览文件 @
37806792
...
@@ -22,7 +22,7 @@ cd $SCRIPTPATH
...
@@ -22,7 +22,7 @@ cd $SCRIPTPATH
$1
-m
pip
install
../../../../paddle/dist/
*
.whl
$1
-m
pip
install
../../../../paddle/dist/
*
.whl
test_list
=
"test_data_feeder.py"
test_list
=
"test_data_feeder.py
test_parameters.py
"
export
PYTHONPATH
=
$PWD
/../../../../python/
export
PYTHONPATH
=
$PWD
/../../../../python/
...
...
python/paddle/v2/tests/test_parameters.py
0 → 100644
浏览文件 @
37806792
import
unittest
import
sys
try
:
import
py_paddle
del
py_paddle
except
ImportError
:
print
>>
sys
.
stderr
,
"It seems swig of Paddle is not installed, this "
\
"unittest will not be run."
sys
.
exit
(
0
)
import
paddle.v2.parameters
as
parameters
from
paddle.proto.ParameterConfig_pb2
import
ParameterConfig
import
random
import
cStringIO
import
numpy
def
__rand_param_config__
(
name
):
conf
=
ParameterConfig
()
conf
.
name
=
name
size
=
1
for
i
in
xrange
(
2
):
dim
=
random
.
randint
(
1
,
1000
)
conf
.
dims
.
append
(
dim
)
size
*=
dim
conf
.
size
=
size
assert
conf
.
IsInitialized
()
return
conf
class
TestParameters
(
unittest
.
TestCase
):
def
test_serialization
(
self
):
params
=
parameters
.
Parameters
()
params
.
__append_config__
(
__rand_param_config__
(
"param_0"
))
params
.
__append_config__
(
__rand_param_config__
(
"param_1"
))
for
name
in
params
.
names
():
param
=
params
.
get
(
name
)
param
[:]
=
numpy
.
random
.
uniform
(
-
1.0
,
1.0
,
size
=
params
.
get_shape
(
name
))
params
.
set
(
name
,
param
)
tmp_file
=
cStringIO
.
StringIO
()
params
.
to_tar
(
tmp_file
)
tmp_file
.
seek
(
0
)
params_dup
=
parameters
.
Parameters
.
from_tar
(
tmp_file
)
self
.
assertEqual
(
params_dup
.
names
(),
params
.
names
())
for
name
in
params
.
names
():
self
.
assertEqual
(
params
.
get_shape
(
name
),
params_dup
.
get_shape
(
name
))
p0
=
params
.
get
(
name
)
p1
=
params_dup
.
get
(
name
)
self
.
assertTrue
(
numpy
.
isclose
(
p0
,
p1
).
all
())
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/v2/tests/test_topology.py
浏览文件 @
37806792
...
@@ -16,6 +16,7 @@ import paddle.v2.layer as layer
...
@@ -16,6 +16,7 @@ import paddle.v2.layer as layer
import
paddle.v2.topology
as
topology
import
paddle.v2.topology
as
topology
import
paddle.v2.data_type
as
data_type
import
paddle.v2.data_type
as
data_type
import
paddle.trainer_config_helpers
as
conf_helps
import
paddle.trainer_config_helpers
as
conf_helps
import
paddle.trainer.PyDataProvider2
as
pydp2
class
TestTopology
(
unittest
.
TestCase
):
class
TestTopology
(
unittest
.
TestCase
):
...
@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase):
...
@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase):
pixel_data_type
=
filter
(
lambda
type
:
type
[
0
]
==
"pixel"
,
data_types
)
pixel_data_type
=
filter
(
lambda
type
:
type
[
0
]
==
"pixel"
,
data_types
)
self
.
assertEqual
(
len
(
pixel_data_type
),
1
)
self
.
assertEqual
(
len
(
pixel_data_type
),
1
)
pixel_data_type
=
pixel_data_type
[
0
]
pixel_data_type
=
pixel_data_type
[
0
]
self
.
assertEqual
(
pixel_data_type
[
1
].
type
,
data_type
.
DataType
.
Dense
)
self
.
assertEqual
(
pixel_data_type
[
1
].
type
,
pydp2
.
DataType
.
Dense
)
self
.
assertEqual
(
pixel_data_type
[
1
].
dim
,
784
)
self
.
assertEqual
(
pixel_data_type
[
1
].
dim
,
784
)
label_data_type
=
filter
(
lambda
type
:
type
[
0
]
==
"label"
,
data_types
)
label_data_type
=
filter
(
lambda
type
:
type
[
0
]
==
"label"
,
data_types
)
self
.
assertEqual
(
len
(
label_data_type
),
1
)
self
.
assertEqual
(
len
(
label_data_type
),
1
)
label_data_type
=
label_data_type
[
0
]
label_data_type
=
label_data_type
[
0
]
self
.
assertEqual
(
label_data_type
[
1
].
type
,
data_type
.
DataType
.
Index
)
self
.
assertEqual
(
label_data_type
[
1
].
type
,
pydp2
.
DataType
.
Index
)
self
.
assertEqual
(
label_data_type
[
1
].
dim
,
10
)
self
.
assertEqual
(
label_data_type
[
1
].
dim
,
10
)
def
test_get_layer
(
self
):
def
test_get_layer
(
self
):
...
...
python/paddle/v2/trainer.py
浏览文件 @
37806792
...
@@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer
...
@@ -9,6 +9,10 @@ from . import optimizer as v2_optimizer
from
.
import
parameters
as
v2_parameters
from
.
import
parameters
as
v2_parameters
__all__
=
[
'SGD'
]
__all__
=
[
'SGD'
]
"""
Trainer package
TODO(yuyang18): Complete comments.
"""
def
default_event_handler
(
event
):
def
default_event_handler
(
event
):
...
@@ -22,14 +26,20 @@ def default_event_handler(event):
...
@@ -22,14 +26,20 @@ def default_event_handler(event):
pass
pass
class
SGD
():
class
SGD
(
object
):
def
__init__
(
self
,
cost
,
parameters
,
update_equation
):
"""
"""
Simple SGD Trainer.
Simple SGD Trainer.
TODO(yuyang18): Complete comments
:param update_equation: The optimizer object.
:type update_equation: paddle.v2.optimizer.Optimizer
:param cost: Target cost that neural network should be optimized.
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
"""
:param update_equation: The optimizer object.
def
__init__
(
self
,
cost
,
parameters
,
update_equation
):
:type update_equation: v2_optimizer.Optimizer
"""
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
if
not
isinstance
(
parameters
,
v2_parameters
.
Parameters
):
raise
TypeError
(
'parameters should be parameters'
)
raise
TypeError
(
'parameters should be parameters'
)
...
@@ -47,29 +57,26 @@ class SGD():
...
@@ -47,29 +57,26 @@ class SGD():
self
.
__topology_in_proto__
,
api
.
CREATE_MODE_NORMAL
,
self
.
__topology_in_proto__
,
api
.
CREATE_MODE_NORMAL
,
self
.
__optimizer__
.
enable_types
())
self
.
__optimizer__
.
enable_types
())
assert
isinstance
(
gm
,
api
.
GradientMachine
)
assert
isinstance
(
gm
,
api
.
GradientMachine
)
parameters
.
append_gradient_machine
(
gm
)
self
.
__gradient_machine__
=
gm
self
.
__gradient_machine__
=
gm
self
.
__gradient_machine__
.
randParameters
()
self
.
__gradient_machine__
.
randParameters
()
parameters
.
append_gradient_machine
(
gm
)
def
train
(
self
,
reader
,
num_passes
=
1
,
event_handler
=
None
,
reader_dict
=
None
):
def
train
(
self
,
reader
,
num_passes
=
1
,
event_handler
=
None
,
feeding
=
None
):
"""
"""
Training method. Will train num_passes of input data.
Training method. Will train num_passes of input data.
:param reader:
:param reader:
:param topology: Network Topology, use one or more Layers to represent it.
:param parameters: The parameter pools.
:param num_passes: The total train passes.
:param num_passes: The total train passes.
:param event_handler: Event handler. A method will be invoked when event
:param event_handler: Event handler. A method will be invoked when event
occurred.
occurred.
:type event_handler: (BaseEvent) => None
:type event_handler: (BaseEvent) => None
:param feeding: Feeding is a map of neural network input name and array
index that reader returns.
:type feeding: dict
:return:
:return:
"""
"""
if
event_handler
is
None
:
if
event_handler
is
None
:
event_handler
=
default_event_handler
event_handler
=
default_event_handler
if
reader_dict
is
None
:
reader_dict
=
self
.
default_reader_dict
()
__check_train_args__
(
**
locals
())
__check_train_args__
(
**
locals
())
updater
=
self
.
__optimizer__
.
create_local_updater
()
updater
=
self
.
__optimizer__
.
create_local_updater
()
...
@@ -81,9 +88,7 @@ class SGD():
...
@@ -81,9 +88,7 @@ class SGD():
pass_evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
pass_evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
assert
isinstance
(
pass_evaluator
,
api
.
Evaluator
)
assert
isinstance
(
pass_evaluator
,
api
.
Evaluator
)
out_args
=
api
.
Arguments
.
createArguments
(
0
)
out_args
=
api
.
Arguments
.
createArguments
(
0
)
feeder
=
DataFeeder
(
self
.
__data_types__
,
feeding
)
feeder
=
DataFeeder
(
self
.
__data_types__
,
reader_dict
)
for
pass_id
in
xrange
(
num_passes
):
for
pass_id
in
xrange
(
num_passes
):
event_handler
(
v2_event
.
BeginPass
(
pass_id
))
event_handler
(
v2_event
.
BeginPass
(
pass_id
))
pass_evaluator
.
start
()
pass_evaluator
.
start
()
...
@@ -117,17 +122,8 @@ class SGD():
...
@@ -117,17 +122,8 @@ class SGD():
event_handler
(
v2_event
.
EndPass
(
pass_id
,
evaluator
=
pass_evaluator
))
event_handler
(
v2_event
.
EndPass
(
pass_id
,
evaluator
=
pass_evaluator
))
self
.
__gradient_machine__
.
finish
()
self
.
__gradient_machine__
.
finish
()
def
default_reader_dict
(
self
):
def
test
(
self
,
reader
,
feeding
=
None
):
reader_dict
=
dict
()
feeder
=
DataFeeder
(
self
.
__data_types__
,
feeding
)
for
i
,
tp
in
enumerate
(
self
.
__data_types__
):
reader_dict
[
tp
[
0
]]
=
i
return
reader_dict
def
test
(
self
,
reader
,
reader_dict
=
None
):
if
reader_dict
is
None
:
reader_dict
=
self
.
default_reader_dict
()
feeder
=
DataFeeder
(
self
.
__data_types__
,
reader_dict
)
evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
evaluator
=
self
.
__gradient_machine__
.
makeEvaluator
()
out_args
=
api
.
Arguments
.
createArguments
(
0
)
out_args
=
api
.
Arguments
.
createArguments
(
0
)
evaluator
.
start
()
evaluator
.
start
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录