Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9a9de924
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9a9de924
编写于
9月 09, 2016
作者:
X
xuwei06
提交者:
Yu Yang
9月 14, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Correctly handle memory in RecurrentGradientMachine for hirarchical RNN
Change-Id: I8e0a8ea6fc2760652d9c76440a539c90860062d3
上级
699d5f26
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
207 addition
and
9 deletion
+207
-9
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+9
-2
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+4
-0
paddle/gserver/tests/Sequence/dummy.list
paddle/gserver/tests/Sequence/dummy.list
+1
-0
paddle/gserver/tests/rnn_data_provider.py
paddle/gserver/tests/rnn_data_provider.py
+35
-0
paddle/gserver/tests/sequenceGen.py
paddle/gserver/tests/sequenceGen.py
+0
-3
paddle/gserver/tests/sequence_nest_rnn.conf
paddle/gserver/tests/sequence_nest_rnn.conf
+75
-0
paddle/gserver/tests/sequence_rnn.conf
paddle/gserver/tests/sequence_rnn.conf
+57
-0
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+17
-4
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+9
-0
未找到文件。
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
9a9de924
...
...
@@ -519,7 +519,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
gatherAgent
->
addRealLayer
(
outFrameLine
.
frames
[
i
]);
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
...
...
@@ -529,7 +528,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
i
],
i
==
0
?
memoryFrameLine
.
bootLayer
:
memoryFrameLine
.
frames
[
i
-
1
],
idSize
/*height of agent*/
);
numSeqs_
[
i
]
/*height of agent*/
);
}
}
...
...
@@ -622,6 +621,8 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
// numSequences: # samples(sequences) in a batch
size_t
numSequences
=
input
.
getNumSequences
();
std
::
vector
<
int
>
allIds
;
numSeqs_
.
clear
();
Info
*
inlink_info
=
&
info_
[
inlinks_id
];
inlink_info
->
idIndex
.
clear
();
inlink_info
->
idIndex
.
push_back
(
0
);
// first idIndex = 0
...
...
@@ -634,10 +635,12 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
// maxSequenceLength_: max number of sentences(subseq) in allsamples
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
int
numSeqs
=
0
;
for
(
size_t
j
=
0
;
j
<
numSubSequences
;
++
j
)
{
// for each sentence
// seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if
(
std
::
get
<
3
>
(
seqLengthAndStart_
[
inlinks_id
][
j
])
==
i
)
{
++
numSeqs
;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int
subSeqStart
=
std
::
get
<
1
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
int
subSeqLength
=
std
::
get
<
0
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
...
...
@@ -650,6 +653,7 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
}
inlink_info
->
idIndex
.
push_back
(
allIds
.
size
());
inlink_info
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
...
...
@@ -659,16 +663,19 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
createSeqPos
(
sequenceStartPositions
,
&
inlink_info
->
sequenceStartPositions
);
}
else
{
// for scatterAgentLayer
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
int
numSeqs
=
0
;
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
seqLength
=
std
::
get
<
0
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
if
(
i
>=
seqLength
)
{
break
;
}
++
numSeqs
;
int
seqStart
=
std
::
get
<
1
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
inlink_info
->
idIndex
.
push_back
(
allIds
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
}
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
9a9de924
...
...
@@ -333,6 +333,10 @@ protected:
};
std
::
vector
<
Info
>
info_
;
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std
::
vector
<
int
>
numSeqs_
;
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
...
...
paddle/gserver/tests/Sequence/dummy.list
0 → 100644
浏览文件 @
9a9de924
dummy_file_no_use
paddle/gserver/tests/rnn_data_provider.py
0 → 100644
浏览文件 @
9a9de924
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer.PyDataProvider2
import
*
data
=
[
[[[
1
,
3
,
2
],
[
4
,
5
,
2
]],
0
],
[[[
0
,
2
],
[
2
,
5
],
[
0
,
1
,
2
]],
1
],
]
@
provider
(
input_types
=
[
integer_value_sub_sequence
(
10
),
integer_value
(
2
)])
def
process_subseq
(
settings
,
file_name
):
for
d
in
data
:
yield
d
@
provider
(
input_types
=
[
integer_value_sequence
(
10
),
integer_value
(
2
)])
def
process_seq
(
settings
,
file_name
):
for
d
in
data
:
seq
=
[]
for
subseq
in
d
[
0
]:
seq
+=
subseq
yield
seq
,
d
[
1
]
paddle/gserver/tests/sequenceGen.py
浏览文件 @
9a9de924
#!/usr/bin/env python
#coding=utf-8
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
...
...
paddle/gserver/tests/sequence_nest_rnn.conf
0 → 100644
浏览文件 @
9a9de924
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
)
# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf
def
outer_step
(
x
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
y
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
return
fc_layer
(
input
=[
y
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
"inner_rnn_state"
)
inner_rnn_output
=
recurrent_group
(
step
=
inner_step
,
input
=
x
)
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return
inner_rnn_output
out
=
recurrent_group
(
step
=
outer_step
,
input
=
SubsequenceInput
(
emb
))
value_printer_evaluator
(
input
=
out
)
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)))
paddle/gserver/tests/sequence_rnn.conf
0 → 100644
浏览文件 @
9a9de924
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
.
trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
8
hidden_dim
=
8
label_dim
=
3
data
=
data_layer
(
name
=
"word"
,
size
=
dict_dim
)
emb
=
embedding_layer
(
input
=
data
,
size
=
word_dim
)
def
step
(
y
):
mem
=
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
return
fc_layer
(
input
=[
y
,
mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
"rnn_state"
)
out
=
recurrent_group
(
step
=
step
,
input
=
emb
)
value_printer_evaluator
(
input
=
out
)
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)))
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
9a9de924
...
...
@@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h>
P_DECLARE_int32
(
seed
);
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
class
TrainerForTest
:
public
paddle
::
Trainer
{
...
...
@@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector
vecMomentum
(
dim
);
// vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW
.
zeroMem
();
*
ThreadLocalRand
::
getSeed
()
=
FLAGS_seed
;
vecW
.
randnorm
(
0
,
0.1
);
trainer
.
startTrain
();
for
(
int
i
=
0
;
i
<
num_passes
;
++
i
)
{
...
...
@@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir
(
dir
.
c_str
());
}
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
void
test
(
const
string
&
conf1
,
const
string
&
conf2
)
{
int
num_passes
=
5
;
real
*
cost1
=
new
real
[
num_passes
];
const
string
conf1
=
"gserver/tests/sequence_layer_group.conf"
;
const
string
dir1
=
"gserver/tests/t1"
;
CalCost
(
conf1
,
dir1
,
cost1
,
num_passes
);
real
*
cost2
=
new
real
[
num_passes
];
const
string
conf2
=
"gserver/tests/sequence_nest_layer_group.conf"
;
const
string
dir2
=
"gserver/tests/t2"
;
CalCost
(
conf2
,
dir2
,
cost2
,
num_passes
);
...
...
@@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete
[]
cost2
;
}
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
test
(
"gserver/tests/sequence_layer_group.conf"
,
"gserver/tests/sequence_nest_layer_group.conf"
);
}
TEST
(
RecurrentGradientMachine
,
rnn
)
{
test
(
"gserver/tests/sequence_rnn.conf"
,
"gserver/tests/sequence_nest_rnn.conf"
);
}
int
main
(
int
argc
,
char
**
argv
)
{
if
(
paddle
::
version
::
isWithPyDataProvider
())
{
if
(
!
paddle
::
version
::
isWithGpu
())
{
...
...
paddle/parameter/Argument.h
浏览文件 @
9a9de924
...
...
@@ -255,6 +255,15 @@ struct Argument {
/*
Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/
void
getSeqLengthAndStart
(
std
::
vector
<
std
::
tuple
<
int
,
int
,
int
,
int
>>*
seqLengthAndStart
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录