Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
62ff19e3
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
62ff19e3
编写于
3月 02, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fit_a_line
上级
80c9f661
061e743c
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
775 addition
and
19 deletion
+775
-19
python/paddle/v2/__init__.py
python/paddle/v2/__init__.py
+1
-0
python/paddle/v2/config_base.py
python/paddle/v2/config_base.py
+28
-5
python/paddle/v2/dataset/__init__.py
python/paddle/v2/dataset/__init__.py
+14
-0
python/paddle/v2/dataset/cifar.py
python/paddle/v2/dataset/cifar.py
+14
-0
python/paddle/v2/dataset/common.py
python/paddle/v2/dataset/common.py
+14
-0
python/paddle/v2/dataset/conll05.py
python/paddle/v2/dataset/conll05.py
+205
-0
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+1
-3
python/paddle/v2/dataset/imikolov.py
python/paddle/v2/dataset/imikolov.py
+13
-0
python/paddle/v2/dataset/mnist.py
python/paddle/v2/dataset/mnist.py
+13
-0
python/paddle/v2/dataset/movielens.py
python/paddle/v2/dataset/movielens.py
+14
-0
python/paddle/v2/dataset/tests/cifar_test.py
python/paddle/v2/dataset/tests/cifar_test.py
+14
-0
python/paddle/v2/dataset/tests/common_test.py
python/paddle/v2/dataset/tests/common_test.py
+14
-0
python/paddle/v2/dataset/tests/imdb_test.py
python/paddle/v2/dataset/tests/imdb_test.py
+14
-0
python/paddle/v2/dataset/tests/mnist_test.py
python/paddle/v2/dataset/tests/mnist_test.py
+14
-0
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+240
-8
python/paddle/v2/tests/CMakeLists.txt
python/paddle/v2/tests/CMakeLists.txt
+7
-3
python/paddle/v2/tests/test_rnn_layer.py
python/paddle/v2/tests/test_rnn_layer.py
+155
-0
未找到文件。
python/paddle/v2/__init__.py
浏览文件 @
62ff19e3
...
...
@@ -20,6 +20,7 @@ import event
import
data_type
import
topology
import
data_feeder
import
networks
from
.
import
dataset
from
.
import
reader
import
attr
...
...
python/paddle/v2/config_base.py
浏览文件 @
62ff19e3
...
...
@@ -22,6 +22,7 @@ class Layer(object):
def
__init__
(
self
,
name
=
None
,
parent_layers
=
None
):
assert
isinstance
(
parent_layers
,
dict
)
self
.
name
=
name
self
.
__contex__
=
{}
self
.
__parent_layers__
=
parent_layers
def
to_proto
(
self
,
context
):
...
...
@@ -39,16 +40,38 @@ class Layer(object):
self
.
__parent_layers__
[
layer_name
])
kwargs
[
layer_name
]
=
v1_layer
if
self
.
name
is
None
:
if
self
.
context_name
()
is
None
:
return
self
.
to_proto_impl
(
**
kwargs
)
elif
self
.
name
not
in
context
:
context
[
self
.
name
]
=
self
.
to_proto_impl
(
**
kwargs
)
return
context
[
self
.
name
]
elif
self
.
context_name
()
not
in
context
:
context
[
self
.
context_name
()]
=
self
.
to_proto_impl
(
**
kwargs
)
self
.
__contex__
=
context
if
self
.
use_context_name
():
return
context
[
self
.
context_name
()]
else
:
return
context
[
self
.
name
]
def
to_proto_impl
(
self
,
**
kwargs
):
raise
NotImplementedError
()
def
context_name
(
self
):
"""
Context name means the context which stores `to_proto_impl` result.
If multiple layer share same context_name, the `to_proto_impl` of them
will be invoked only once.
"""
return
self
.
name
def
use_context_name
(
self
):
return
False
def
calculate_size
(
self
):
"""
lazy calculate size of the layer, should be called when to_proto_impl of
this layer is called.
:return:
"""
return
self
.
__contex__
[
self
.
context_name
()].
size
def
__convert_to_v2__
(
method_name
,
parent_names
,
is_default_name
=
True
):
if
is_default_name
:
...
...
python/paddle/v2/dataset/__init__.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
mnist
import
imikolov
import
imdb
...
...
python/paddle/v2/dataset/cifar.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
"""
import
cPickle
import
itertools
import
numpy
...
...
python/paddle/v2/dataset/common.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
requests
import
hashlib
import
os
...
...
python/paddle/v2/dataset/conll05.py
0 → 100644
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2.dataset.common
import
tarfile
import
gzip
import
itertools
__all__
=
[
'test, get_dict'
,
'get_embedding'
]
"""
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
dataset as an example. Because Conll 2005 is not free in public, the default
downloaded URL is test set of Conll 2005 (which is public). Users can change
URL and MD5 to their Conll dataset.
"""
DATA_URL
=
'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5
=
'387719152ae52d60422c016e92a742fc'
WORDDICT_URL
=
'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
WORDDICT_MD5
=
'ea7fb7d4c75cc6254716f0177a506baa'
VERBDICT_URL
=
'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt'
VERBDICT_MD5
=
'0d2977293bbb6cbefab5b0f97db1e77c'
TRGDICT_URL
=
'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt'
TRGDICT_MD5
=
'd8c7f03ceb5fc2e5a0fa7503a4353751'
EMB_URL
=
'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb'
EMB_MD5
=
'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX
=
0
def
load_dict
(
filename
):
d
=
dict
()
with
open
(
filename
,
'r'
)
as
f
:
for
i
,
line
in
enumerate
(
f
):
d
[
line
.
strip
()]
=
i
return
d
def
corpus_reader
(
data_path
,
words_name
,
props_name
):
"""
Read one corpus. It returns an iterator. Each element of
this iterator is a tuple including sentence and labels. The sentence is
consist of a list of word IDs. The labels include a list of label IDs.
:return: a iterator of data.
:rtype: iterator
"""
def
reader
():
tf
=
tarfile
.
open
(
data_path
)
wf
=
tf
.
extractfile
(
words_name
)
pf
=
tf
.
extractfile
(
props_name
)
with
gzip
.
GzipFile
(
fileobj
=
wf
)
as
words_file
,
gzip
.
GzipFile
(
fileobj
=
pf
)
as
props_file
:
sentences
=
[]
labels
=
[]
one_seg
=
[]
for
word
,
label
in
itertools
.
izip
(
words_file
,
props_file
):
word
=
word
.
strip
()
label
=
label
.
strip
().
split
()
if
len
(
label
)
==
0
:
# end of sentence
for
i
in
xrange
(
len
(
one_seg
[
0
])):
a_kind_lable
=
[
x
[
i
]
for
x
in
one_seg
]
labels
.
append
(
a_kind_lable
)
if
len
(
labels
)
>=
1
:
verb_list
=
[]
for
x
in
labels
[
0
]:
if
x
!=
'-'
:
verb_list
.
append
(
x
)
for
i
,
lbl
in
enumerate
(
labels
[
1
:]):
cur_tag
=
'O'
is_in_bracket
=
False
lbl_seq
=
[]
verb_word
=
''
for
l
in
lbl
:
if
l
==
'*'
and
is_in_bracket
==
False
:
lbl_seq
.
append
(
'O'
)
elif
l
==
'*'
and
is_in_bracket
==
True
:
lbl_seq
.
append
(
'I-'
+
cur_tag
)
elif
l
==
'*)'
:
lbl_seq
.
append
(
'I-'
+
cur_tag
)
is_in_bracket
=
False
elif
l
.
find
(
'('
)
!=
-
1
and
l
.
find
(
')'
)
!=
-
1
:
cur_tag
=
l
[
1
:
l
.
find
(
'*'
)]
lbl_seq
.
append
(
'B-'
+
cur_tag
)
is_in_bracket
=
False
elif
l
.
find
(
'('
)
!=
-
1
and
l
.
find
(
')'
)
==
-
1
:
cur_tag
=
l
[
1
:
l
.
find
(
'*'
)]
lbl_seq
.
append
(
'B-'
+
cur_tag
)
is_in_bracket
=
True
else
:
raise
RuntimeError
(
'Unexpected label: %s'
%
l
)
yield
sentences
,
verb_list
[
i
],
lbl_seq
sentences
=
[]
labels
=
[]
one_seg
=
[]
else
:
sentences
.
append
(
word
)
one_seg
.
append
(
label
)
pf
.
close
()
wf
.
close
()
tf
.
close
()
return
reader
def
reader_creator
(
corpus_reader
,
word_dict
=
None
,
predicate_dict
=
None
,
label_dict
=
None
):
def
reader
():
for
sentence
,
predicate
,
labels
in
corpus_reader
():
sen_len
=
len
(
sentence
)
verb_index
=
labels
.
index
(
'B-V'
)
mark
=
[
0
]
*
len
(
labels
)
if
verb_index
>
0
:
mark
[
verb_index
-
1
]
=
1
ctx_n1
=
sentence
[
verb_index
-
1
]
else
:
ctx_n1
=
'bos'
if
verb_index
>
1
:
mark
[
verb_index
-
2
]
=
1
ctx_n2
=
sentence
[
verb_index
-
2
]
else
:
ctx_n2
=
'bos'
mark
[
verb_index
]
=
1
ctx_0
=
sentence
[
verb_index
]
if
verb_index
<
len
(
labels
)
-
1
:
mark
[
verb_index
+
1
]
=
1
ctx_p1
=
sentence
[
verb_index
+
1
]
else
:
ctx_p1
=
'eos'
if
verb_index
<
len
(
labels
)
-
2
:
mark
[
verb_index
+
2
]
=
1
ctx_p2
=
sentence
[
verb_index
+
2
]
else
:
ctx_p2
=
'eos'
word_idx
=
[
word_dict
.
get
(
w
,
UNK_IDX
)
for
w
in
sentence
]
pred_idx
=
[
predicate_dict
.
get
(
predicate
)]
*
sen_len
ctx_n2_idx
=
[
word_dict
.
get
(
ctx_n2
,
UNK_IDX
)]
*
sen_len
ctx_n1_idx
=
[
word_dict
.
get
(
ctx_n1
,
UNK_IDX
)]
*
sen_len
ctx_0_idx
=
[
word_dict
.
get
(
ctx_0
,
UNK_IDX
)]
*
sen_len
ctx_p1_idx
=
[
word_dict
.
get
(
ctx_p1
,
UNK_IDX
)]
*
sen_len
ctx_p2_idx
=
[
word_dict
.
get
(
ctx_p2
,
UNK_IDX
)]
*
sen_len
label_idx
=
[
label_dict
.
get
(
w
)
for
w
in
labels
]
yield
word_idx
,
pred_idx
,
ctx_n2_idx
,
ctx_n1_idx
,
\
ctx_0_idx
,
ctx_p1_idx
,
ctx_p2_idx
,
mark
,
label_idx
return
reader
()
def
get_dict
():
word_dict
=
load_dict
(
common
.
download
(
WORDDICT_URL
,
'conll05st'
,
WORDDICT_MD5
))
verb_dict
=
load_dict
(
common
.
download
(
VERBDICT_URL
,
'conll05st'
,
VERBDICT_MD5
))
label_dict
=
load_dict
(
common
.
download
(
TRGDICT_URL
,
'conll05st'
,
TRGDICT_MD5
))
return
word_dict
,
verb_dict
,
label_dict
def
get_embedding
():
return
common
.
download
(
EMB_URL
,
'conll05st'
,
EMB_MD5
)
def
test
():
word_dict
,
verb_dict
,
label_dict
=
get_dict
()
reader
=
corpus_reader
(
common
.
download
(
DATA_URL
,
'conll05st'
,
DATA_MD5
),
words_name
=
'conll05st-release/test.wsj/words/test.wsj.words.gz'
,
props_name
=
'conll05st-release/test.wsj/props/test.wsj.props.gz'
)
return
reader_creator
(
reader
,
word_dict
,
verb_dict
,
label_dict
)
if
__name__
==
'__main__'
:
print
get_embedding
()
for
f
in
test
():
print
f
python/paddle/v2/dataset/imdb.py
浏览文件 @
62ff19e3
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
@@ -17,6 +14,7 @@
"""
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
"""
import
paddle.v2.dataset.common
import
tarfile
import
Queue
...
...
python/paddle/v2/dataset/imikolov.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
"""
...
...
python/paddle/v2/dataset/mnist.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MNIST dataset.
"""
...
...
python/paddle/v2/dataset/movielens.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
zipfile
from
common
import
download
import
re
...
...
python/paddle/v2/dataset/tests/cifar_test.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2.dataset.cifar
import
unittest
...
...
python/paddle/v2/dataset/tests/common_test.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2.dataset.common
import
unittest
import
tempfile
...
...
python/paddle/v2/dataset/tests/imdb_test.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2.dataset.imdb
import
unittest
import
re
...
...
python/paddle/v2/dataset/tests/mnist_test.py
浏览文件 @
62ff19e3
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2.dataset.mnist
import
unittest
...
...
python/paddle/v2/layer.py
浏览文件 @
62ff19e3
...
...
@@ -65,19 +65,24 @@ to be in a Python function but could be anywhere.
Also, the creation of a protobuf message is hidden in the invocation of
paddle.v2.parameters.create, no longer exposed to users.
"""
import
collections
import
inspect
from
config_base
import
Layer
,
__convert_to_v2__
import
paddle.trainer_config_helpers
as
conf_helps
from
paddle.trainer_config_helpers.config_parser_utils
import
\
parse_network_config
as
__parse__
from
paddle.trainer_config_helpers.default_decorators
import
wrap_name_default
from
paddle.trainer_config_helpers.default_decorators
import
wrap_act_default
from
paddle.trainer_config_helpers.default_decorators
import
\
wrap_bias_attr_default
from
paddle.trainer_config_helpers.default_decorators
import
wrap_name_default
from
paddle.trainer_config_helpers.layers
import
layer_support
from
paddle.trainer.config_parser
import
\
RecurrentLayerGroupWithoutOutLinksBegin
,
RecurrentLayerGroupSetOutLink
,
\
RecurrentLayerGroupEnd
,
model_type
import
data_type
import
activation
import
data_type
__all__
=
[
'parse_network'
,
'data'
]
...
...
@@ -130,6 +135,137 @@ class DataLayerV2(Layer):
return
getattr
(
conf_helps
,
self
.
__method_name__
)(
name
=
self
.
name
,
**
args
)
class
WithExtraParent
(
Layer
):
def
extra_parent
(
self
):
return
self
.
__extra_parent__
def
__init__
(
self
,
name
=
None
,
parent_layers
=
None
):
self
.
__extra_parent__
=
[]
super
(
WithExtraParent
,
self
).
__init__
(
name
=
name
,
parent_layers
=
parent_layers
)
def
append_extra_parent
(
self
,
parent
):
self
.
__extra_parent__
.
append
(
parent
)
def
to_proto
(
self
,
context
):
"""
function to set proto attribute
"""
kwargs
=
dict
()
for
p
in
self
.
__extra_parent__
:
p
.
to_proto
(
context
=
context
)
for
layer_name
in
self
.
__parent_layers__
:
if
not
isinstance
(
self
.
__parent_layers__
[
layer_name
],
collections
.
Sequence
):
v1_layer
=
self
.
__parent_layers__
[
layer_name
].
to_proto
(
context
=
context
)
else
:
v1_layer
=
map
(
lambda
x
:
x
.
to_proto
(
context
=
context
),
self
.
__parent_layers__
[
layer_name
])
kwargs
[
layer_name
]
=
v1_layer
if
self
.
context_name
()
is
None
:
return
self
.
to_proto_impl
(
context
=
context
,
**
kwargs
)
elif
self
.
context_name
()
not
in
context
:
context
[
self
.
context_name
()]
=
self
.
to_proto_impl
(
context
=
context
,
**
kwargs
)
if
self
.
use_context_name
():
return
context
[
self
.
context_name
()]
else
:
return
context
[
self
.
name
]
class
MemoryV2
(
WithExtraParent
):
def
__init__
(
self
,
name
,
**
kwargs
):
self
.
name
=
name
super
(
MemoryV2
,
self
).
__init__
(
name
=
name
,
parent_layers
=
dict
())
self
.
__kwargs__
=
kwargs
self
.
__boot_layer_name__
=
None
if
'boot_layer'
in
kwargs
:
begin_of_current_rnn
=
[]
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
# function inside step.
st
=
inspect
.
stack
()
for
i
in
xrange
(
len
(
st
)):
locs
=
inspect
.
stack
()[
i
][
0
].
f_locals
keys
=
locs
.
keys
()
for
key
in
keys
:
val
=
locs
[
key
]
if
isinstance
(
val
,
RecurrentLayerInput
):
begin_of_current_rnn
.
append
(
val
)
elif
isinstance
(
val
,
collections
.
Sequence
):
for
v
in
val
:
if
isinstance
(
v
,
RecurrentLayerInput
):
begin_of_current_rnn
.
append
(
v
)
if
begin_of_current_rnn
:
break
assert
begin_of_current_rnn
is
not
None
for
extra
in
begin_of_current_rnn
:
self
.
append_extra_parent
(
extra
)
assert
isinstance
(
extra
,
WithExtraParent
)
extra
.
append_extra_parent
(
kwargs
[
'boot_layer'
])
self
.
__boot_layer_name__
=
kwargs
[
'boot_layer'
].
name
def
to_proto_impl
(
self
,
context
,
**
kwargs
):
args
=
dict
()
for
each
in
kwargs
:
args
[
each
]
=
kwargs
[
each
]
for
each
in
self
.
__kwargs__
:
args
[
each
]
=
self
.
__kwargs__
[
each
]
if
self
.
__boot_layer_name__
is
not
None
:
args
[
'boot_layer'
]
=
context
[
self
.
__boot_layer_name__
]
size
=
args
.
get
(
'size'
,
None
)
if
size
is
not
None
:
if
callable
(
size
):
real_size
=
size
()
else
:
real_size
=
size
args
[
'size'
]
=
real_size
return
conf_helps
.
memory
(
name
=
self
.
name
,
**
args
)
def
context_name
(
self
):
return
self
.
name
+
"#memory"
def
use_context_name
(
self
):
"""
memory layer will have the same name with some layer
:return:
"""
return
True
class
LayerOutputV2
(
Layer
):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def
__init__
(
self
,
layer_output
):
assert
isinstance
(
layer_output
,
conf_helps
.
LayerOutput
)
self
.
layer_output
=
layer_output
super
(
LayerOutputV2
,
self
).
__init__
(
name
=
layer_output
.
name
,
parent_layers
=
dict
())
def
to_proto_impl
(
self
):
return
self
.
layer_output
class
StaticInputV2
(
object
):
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerV2
)
self
.
name
=
input
.
name
self
.
input
=
input
self
.
is_seq
=
is_seq
self
.
size
=
size
# TODO(qiaolongfei): add size
# assert input.size is not None or size is not None
class
MixedLayerV2
(
Layer
):
"""
This class is use to support `with` grammar. If not, the following code
...
...
@@ -161,7 +297,6 @@ class MixedLayerV2(Layer):
other_kwargs
[
'act'
]
=
act
other_kwargs
[
'bias_attr'
]
=
bias_attr
other_kwargs
[
'layer_attr'
]
=
layer_attr
parent_layers
=
{
"input"
:
self
.
__inputs__
}
super
(
MixedLayerV2
,
self
).
__init__
(
name
,
parent_layers
)
self
.
__other_kwargs__
=
other_kwargs
...
...
@@ -171,7 +306,7 @@ class MixedLayerV2(Layer):
self
.
__inputs__
.
append
(
other
)
return
self
else
:
raise
MixedLayer
Type
V2
.
AddToSealedMixedLayerExceptionV2
()
raise
MixedLayerV2
.
AddToSealedMixedLayerExceptionV2
()
def
__enter__
(
self
):
assert
len
(
self
.
__inputs__
)
==
0
...
...
@@ -186,6 +321,13 @@ class MixedLayerV2(Layer):
args
[
each
]
=
kwargs
[
each
]
for
each
in
self
.
__other_kwargs__
:
args
[
each
]
=
self
.
__other_kwargs__
[
each
]
size
=
args
.
get
(
'size'
,
None
)
if
size
is
not
None
:
if
callable
(
size
):
real_size
=
size
()
else
:
real_size
=
size
args
[
'size'
]
=
real_size
return
getattr
(
conf_helps
,
self
.
__method_name__
)(
**
args
)
...
...
@@ -202,14 +344,51 @@ def mixed(size=0,
return
MixedLayerV2
(
size
,
input
,
name
,
act
,
bias_attr
,
layer_attr
)
class
RecurrentLayerInput
(
WithExtraParent
):
def
__init__
(
self
,
recurrent_name
,
index
,
parent_layers
):
assert
len
(
parent_layers
)
==
1
self
.
__parents__
=
parent_layers
.
values
()[
0
]
super
(
RecurrentLayerInput
,
self
).
__init__
(
name
=
self
.
__parents__
[
index
].
name
,
parent_layers
=
parent_layers
)
self
.
__recurrent_name__
=
recurrent_name
def
context_name
(
self
):
return
self
.
__recurrent_name__
+
".begin"
def
to_proto_impl
(
self
,
context
,
**
kwargs
):
model_type
(
'recurrent_nn'
)
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
self
.
__recurrent_name__
,
in_links
=
map
(
lambda
x
:
x
.
name
,
self
.
__parents__
))
return
self
class
RecurrentLayerOutput
(
Layer
):
def
__init__
(
self
,
recurrent_name
,
index
,
parent_layers
):
assert
len
(
parent_layers
)
==
1
self
.
__parents__
=
parent_layers
.
values
()[
0
]
super
(
RecurrentLayerOutput
,
self
).
__init__
(
name
=
self
.
__parents__
[
index
].
name
,
parent_layers
=
parent_layers
)
self
.
__recurrent_name__
=
recurrent_name
def
context_name
(
self
):
return
self
.
__recurrent_name__
+
".end"
def
to_proto_impl
(
self
,
**
kwargs
):
for
l
in
self
.
__parents__
:
RecurrentLayerGroupSetOutLink
(
l
.
name
)
RecurrentLayerGroupEnd
(
name
=
self
.
__recurrent_name__
)
LayerV2
=
Layer
data
=
DataLayerV2
AggregateLevel
=
conf_helps
.
layers
.
AggregateLevel
ExpandLevel
=
conf_helps
.
layers
.
ExpandLevel
memory
=
MemoryV2
def
__layer_name_mapping__
(
inname
):
if
inname
in
[
'data_layer'
,
'memory'
,
'mixed_layer'
]:
if
inname
in
[
'data_layer'
,
'memory'
,
'mixed_layer'
,
'recurrent_group'
]:
# Do Not handle these layers
return
elif
inname
==
'maxid_layer'
:
...
...
@@ -231,8 +410,10 @@ def __layer_name_mapping__(inname):
def
__layer_name_mapping_parent_names__
(
inname
):
all_args
=
getattr
(
conf_helps
,
inname
).
argspec
.
args
return
filter
(
lambda
x
:
x
in
[
'input1'
,
'input2'
,
'label'
,
'input'
,
'a'
,
'b'
,
'expand_as'
,
'weights'
,
'vectors'
,
'weight'
,
'score'
,
'left'
,
'right'
],
lambda
x
:
x
in
[
'input1'
,
'input2'
,
'label'
,
'input'
,
'a'
,
'b'
,
'expand_as'
,
'weights'
,
'vectors'
,
'weight'
,
'score'
,
'left'
,
'right'
,
'output_mem'
],
all_args
)
...
...
@@ -267,3 +448,54 @@ operator_list = [
for
op
in
operator_list
:
globals
()[
op
[
0
]]
=
__convert_to_v2__
(
op
[
0
],
parent_names
=
op
[
1
],
is_default_name
=
False
)
@
wrap_name_default
()
def
recurrent_group
(
step
,
input
,
name
=
None
):
if
not
isinstance
(
input
,
collections
.
Sequence
):
input
=
[
input
]
non_static_inputs
=
filter
(
lambda
x
:
not
isinstance
(
x
,
StaticInputV2
),
input
)
actual_input
=
[
RecurrentLayerInput
(
recurrent_name
=
name
,
index
=
i
,
parent_layers
=
{
'recurrent_inputs'
:
non_static_inputs
})
for
i
in
xrange
(
len
(
non_static_inputs
))
]
def
__real_step__
(
*
args
):
rnn_input
=
list
(
args
)
static_inputs
=
filter
(
lambda
x
:
isinstance
(
x
,
StaticInputV2
),
input
)
for
static_input
in
static_inputs
:
mem_name
=
"__%s_memory__"
%
static_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
is_seq
=
static_input
.
is_seq
,
size
=
static_input
.
input
.
calculate_size
,
boot_layer
=
static_input
.
input
)
with
mixed
(
name
=
mem_name
,
size
=
static_input
.
input
.
calculate_size
,
act
=
activation
.
Identity
())
as
mix
:
mix
+=
identity_projection
(
input
=
mem
)
rnn_input
.
insert
(
input
.
index
(
static_input
),
mix
)
return
step
(
*
rnn_input
)
actual_output
=
__real_step__
(
*
actual_input
)
if
not
isinstance
(
actual_output
,
collections
.
Sequence
):
actual_output
=
[
actual_output
]
retv
=
[
RecurrentLayerOutput
(
recurrent_name
=
name
,
index
=
i
,
parent_layers
=
{
'recurrent_outputs'
:
actual_output
})
for
i
in
xrange
(
len
(
actual_output
))
]
if
len
(
retv
)
==
1
:
return
retv
[
0
]
else
:
return
retv
python/paddle/v2/tests/CMakeLists.txt
浏览文件 @
62ff19e3
add_test
(
NAME test_v2_api
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
add_test
(
NAME test_v2_layer
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
add_test
(
NAME test_v2_api
COMMAND bash
${
PROJ_ROOT
}
/python/paddle/v2/tests/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
add_test
(
NAME test_v2_rnn_layer
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/v2/tests/test_rnn_layer.py
)
add_test
(
NAME t
opology_test
add_test
(
NAME t
est_topology
COMMAND
${
PROJ_ROOT
}
/paddle/.set_python_path.sh -d
${
PROJ_ROOT
}
/python/
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/v2/tests/test_topology.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
python/paddle/v2/tests/test_rnn_layer.py
0 → 100644
浏览文件 @
62ff19e3
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
difflib
import
unittest
import
paddle.trainer_config_helpers
as
conf_helps
import
paddle.v2.activation
as
activation
import
paddle.v2.data_type
as
data_type
import
paddle.v2.layer
as
layer
from
paddle.trainer_config_helpers.config_parser_utils
import
\
parse_network_config
as
parse_network
class
RNNTest
(
unittest
.
TestCase
):
def
test_simple_rnn
(
self
):
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
def
parse_old_rnn
():
def
step
(
y
):
mem
=
conf_helps
.
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
out
=
conf_helps
.
fc_layer
(
input
=
[
y
,
mem
],
size
=
hidden_dim
,
act
=
activation
.
Tanh
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
return
out
def
test
():
data
=
conf_helps
.
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
embd
=
conf_helps
.
embedding_layer
(
input
=
data
,
size
=
word_dim
)
conf_helps
.
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
input
=
embd
)
return
str
(
parse_network
(
test
))
def
parse_new_rnn
():
def
new_step
(
y
):
mem
=
layer
.
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
out
=
layer
.
fc
(
input
=
[
y
,
mem
],
size
=
hidden_dim
,
act
=
activation
.
Tanh
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
return
out
data
=
layer
.
data
(
name
=
"word"
,
type
=
data_type
.
integer_value
(
dict_dim
))
embd
=
layer
.
embedding
(
input
=
data
,
size
=
word_dim
)
rnn_layer
=
layer
.
recurrent_group
(
name
=
"rnn"
,
step
=
new_step
,
input
=
embd
)
return
str
(
layer
.
parse_network
(
rnn_layer
))
diff
=
difflib
.
unified_diff
(
parse_old_rnn
().
splitlines
(
1
),
parse_new_rnn
().
splitlines
(
1
))
print
''
.
join
(
diff
)
def
test_sequence_rnn_multi_input
(
self
):
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
def
parse_old_rnn
():
def
test
():
data
=
conf_helps
.
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
label
=
conf_helps
.
data_layer
(
name
=
"label"
,
size
=
label_dim
)
emb
=
conf_helps
.
embedding_layer
(
input
=
data
,
size
=
word_dim
)
boot_layer
=
conf_helps
.
data_layer
(
name
=
"boot"
,
size
=
10
)
boot_layer
=
conf_helps
.
fc_layer
(
name
=
'boot_fc'
,
input
=
boot_layer
,
size
=
10
)
def
step
(
y
,
wid
):
z
=
conf_helps
.
embedding_layer
(
input
=
wid
,
size
=
word_dim
)
mem
=
conf_helps
.
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
boot_layer
)
out
=
conf_helps
.
fc_layer
(
input
=
[
y
,
z
,
mem
],
size
=
hidden_dim
,
act
=
conf_helps
.
TanhActivation
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
return
out
out
=
conf_helps
.
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
input
=
[
emb
,
data
])
rep
=
conf_helps
.
last_seq
(
input
=
out
)
prob
=
conf_helps
.
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
conf_helps
.
SoftmaxActivation
(),
bias_attr
=
True
)
conf_helps
.
outputs
(
conf_helps
.
classification_cost
(
input
=
prob
,
label
=
label
))
return
str
(
parse_network
(
test
))
def
parse_new_rnn
():
data
=
layer
.
data
(
name
=
"word"
,
type
=
data_type
.
dense_vector
(
dict_dim
))
label
=
layer
.
data
(
name
=
"label"
,
type
=
data_type
.
dense_vector
(
label_dim
))
emb
=
layer
.
embedding
(
input
=
data
,
size
=
word_dim
)
boot_layer
=
layer
.
data
(
name
=
"boot"
,
type
=
data_type
.
dense_vector
(
10
))
boot_layer
=
layer
.
fc
(
name
=
'boot_fc'
,
input
=
boot_layer
,
size
=
10
)
def
step
(
y
,
wid
):
z
=
layer
.
embedding
(
input
=
wid
,
size
=
word_dim
)
mem
=
layer
.
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
boot_layer
)
out
=
layer
.
fc
(
input
=
[
y
,
z
,
mem
],
size
=
hidden_dim
,
act
=
activation
.
Tanh
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
return
out
out
=
layer
.
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
input
=
[
emb
,
data
])
rep
=
layer
.
last_seq
(
input
=
out
)
prob
=
layer
.
fc
(
size
=
label_dim
,
input
=
rep
,
act
=
activation
.
Softmax
(),
bias_attr
=
True
)
cost
=
layer
.
classification_cost
(
input
=
prob
,
label
=
label
)
return
str
(
layer
.
parse_network
(
cost
))
diff
=
difflib
.
unified_diff
(
parse_old_rnn
().
splitlines
(
1
),
parse_new_rnn
().
splitlines
(
1
))
print
''
.
join
(
diff
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录