Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
09f34c4b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
09f34c4b
编写于
6月 19, 2017
作者:
E
emailweixu
提交者:
GitHub
6月 19, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2479 from emailweixu/mixed_input_rnn
RecurrentGroup with mixed input sequence types
上级
17fe8322
ef61288f
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
662 addition
and
484 deletion
+662
-484
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+320
-177
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+44
-11
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+42
-53
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+21
-63
paddle/gserver/layers/SequencePoolLayer.cpp
paddle/gserver/layers/SequencePoolLayer.cpp
+3
-0
paddle/gserver/tests/rnn_data_provider.py
paddle/gserver/tests/rnn_data_provider.py
+19
-0
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+2
-2
paddle/gserver/tests/sequence_rnn_matched_inputs.py
paddle/gserver/tests/sequence_rnn_matched_inputs.py
+85
-0
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
+79
-0
paddle/gserver/tests/sequence_rnn_multi_input.conf
paddle/gserver/tests/sequence_rnn_multi_input.conf
+1
-1
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+9
-0
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+4
-3
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+1
-0
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+2
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+7
-58
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+20
-70
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
...config_helpers/tests/configs/protostr/shared_gru.protostr
+0
-8
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
...onfig_helpers/tests/configs/protostr/shared_lstm.protostr
+0
-10
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+2
-27
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+1
-1
未找到文件。
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
09f34c4b
...
...
@@ -214,7 +214,6 @@ void RecurrentGradientMachine::init(
inFrameLines_
[
i
].
linkName
=
subModelConfig
->
in_links
(
i
).
link_name
();
inFrameLines_
[
i
].
inLayer
=
rootNetwork_
->
getLayer
(
subModelConfig
->
in_links
(
i
).
layer_name
());
inFrameLines_
[
i
].
hasSubseq
=
subModelConfig
->
in_links
(
i
).
has_subseq
();
}
outFrameLines_
.
resize
(
subModelConfig
->
out_links_size
());
...
...
@@ -241,11 +240,8 @@ void RecurrentGradientMachine::init(
rootNetwork_
->
getLayer
(
memoryConfig
.
boot_layer_name
());
LayerConfig
scatterConfig
=
*
agentConfig
;
memoryFrameLines_
[
i
].
is_sequence
=
memoryConfig
.
is_sequence
();
memoryFrameLines_
[
i
].
rootAgent
.
reset
(
memoryConfig
.
is_sequence
()
?
new
SequenceScatterAgentLayer
(
scatterConfig
)
:
new
ScatterAgentLayer
(
scatterConfig
));
new
ScatterAgentLayer
(
scatterConfig
));
memoryFrameLines_
[
i
].
rootAgent
->
init
(
LayerMap
(),
parameterMap_
);
memoryFrameLines_
[
i
].
bootLayer
=
memoryFrameLines_
[
i
].
rootAgent
;
...
...
@@ -267,9 +263,7 @@ void RecurrentGradientMachine::init(
if
(
subModelConfig
->
has_generator
())
{
memoryFrameLines_
[
i
].
scatterAgents
.
resize
(
2
);
for
(
auto
&
agent
:
memoryFrameLines_
[
i
].
scatterAgents
)
{
agent
.
reset
(
memoryConfig
.
is_sequence
()
?
new
SequenceScatterAgentLayer
(
*
agentConfig
)
:
new
ScatterAgentLayer
(
*
agentConfig
));
agent
.
reset
(
new
ScatterAgentLayer
(
*
agentConfig
));
agent
->
init
(
LayerMap
(),
parameterMap_
);
}
}
...
...
@@ -297,8 +291,6 @@ void RecurrentGradientMachine::init(
if
(
subModelConfig
->
evaluator_names_size
()
>
0
)
{
evaluator_
.
reset
(
frames_
[
0
]
->
makeEvaluator
());
}
targetInfoInlinkId_
=
subModelConfig
->
target_inlinkid
();
}
void
RecurrentGradientMachine
::
resizeOrCreateFrames
(
int
numFrames
)
{
...
...
@@ -376,108 +368,102 @@ void RecurrentGradientMachine::prefetch(const std::vector<Argument>& inArgs) {
LOG
(
FATAL
)
<<
"should not use this function"
;
}
void
RecurrentGradientMachine
::
forward
(
const
std
::
vector
<
Argument
>&
inArgs
,
std
::
vector
<
Argument
>*
outArgs
,
PassType
passType
)
{
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
generateSequence
();
return
;
}
// else forward..
const
Argument
&
input
=
inFrameLines_
[
0
].
inLayer
->
getOutput
();
CHECK
(
input
.
sequenceStartPositions
);
int
batchSize
=
input
.
getBatchSize
();
size_t
numSequences
=
input
.
getNumSequences
();
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
bool
hasSubseq
=
input
.
hasSubseq
();
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
// same inframe info
bool
shareInlinkInfo
=
!
hasSubseq
||
targetInfoInlinkId_
==
-
1
;
// Defaultly, share info with the first inlink
if
(
shareInlinkInfo
)
{
targetInfoInlinkId_
=
0
;
}
// check hasSubseq in both config and input are the same
CHECK_EQ
(
hasSubseq
,
inFrameLines_
[
0
].
hasSubseq
);
CHECK_EQ
(
starts
[
numSequences
],
batchSize
);
CHECK
(
input
.
sequenceStartPositions
);
// check other inputs has same sequence length and start
for
(
size_t
i
=
1
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
CHECK_EQ
((
size_t
)
input1
.
getNumSequences
(),
numSequences
);
// check all inputs should have same hasSubseq flag
CHECK_EQ
(
input
.
hasSubseq
(),
inFrameLines_
[
0
].
hasSubseq
);
// if shareInlinkInfo, checks:
// 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if
(
shareInlinkInfo
)
{
CHECK_EQ
(
input1
.
getBatchSize
(),
batchSize
);
CHECK
(
std
::
equal
(
starts
,
starts
+
numSequences
+
1
,
input1
.
sequenceStartPositions
->
getData
(
false
)));
void
RecurrentGradientMachine
::
checkInputConsistency
(
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
)
{
if
(
commonSeqInfo_
.
empty
())
{
commonSeqInfo_
.
resize
(
seqInfo
.
size
());
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
commonSeqInfo_
[
i
].
topLevelLength
=
seqInfo
[
i
].
topLevelLength
;
commonSeqInfo_
[
i
].
seqId
=
seqInfo
[
i
].
seqId
;
}
}
else
{
CHECK_EQ
(
commonSeqInfo_
.
size
(),
seqInfo
.
size
())
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched number of sequences"
;
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
CHECK_EQ
(
commonSeqInfo_
[
i
].
topLevelLength
,
seqInfo
[
i
].
topLevelLength
)
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched sequence length"
;
CHECK_EQ
(
commonSeqInfo_
[
i
].
seqId
,
seqInfo
[
i
].
seqId
)
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched sequence length"
;
}
}
}
if
(
hasSubseq
)
{
CHECK
(
input
.
subSequenceStartPositions
);
size_t
numSubSequences
=
input
.
getNumSubSequences
();
const
int
*
subStarts
=
input
.
subSequenceStartPositions
->
getData
(
false
);
CHECK_EQ
(
subStarts
[
numSubSequences
],
batchSize
);
// if hasSubseq, check other inputs has same sub-sequence and sub-start
for
(
size_t
i
=
1
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
CHECK_EQ
((
size_t
)
input1
.
getNumSubSequences
(),
numSubSequences
);
if
(
shareInlinkInfo
)
{
CHECK
(
std
::
equal
(
subStarts
,
subStarts
+
numSubSequences
+
1
,
input1
.
subSequenceStartPositions
->
getData
(
false
)));
}
void
RecurrentGradientMachine
::
calcNumSequencesAtEachStep
()
{
int
numSequences
=
commonSeqInfo_
.
size
();
numSeqs_
.
resize
(
maxSequenceLength_
);
for
(
int
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
int
j
=
0
;
j
<
commonSeqInfo_
[
i
].
topLevelLength
;
++
j
)
{
numSeqs_
[
j
]
=
i
+
1
;
}
}
}
void
RecurrentGradientMachine
::
reorganizeInput
(
PassType
passType
)
{
info_
.
clear
();
info_
.
resize
(
inFrameLines_
.
size
());
commonSeqInfo_
.
clear
();
seqInfos_
.
clear
();
seqInfos_
.
resize
(
inFrameLines_
.
size
());
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
const
Argument
&
input
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
if
(
!
input
.
hasSeq
())
{
continue
;
}
input
.
getSeqInfo
(
&
seqInfos_
[
i
]);
checkInputConsistency
(
i
,
seqInfos_
[
i
]);
}
CHECK
(
!
commonSeqInfo_
.
empty
())
<<
"At least one input needs to be sequence or subsequence"
;
maxSequenceLength_
=
commonSeqInfo_
[
0
].
topLevelLength
;
calcNumSequencesAtEachStep
();
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
if
(
!
input
.
hasSeq
())
{
seqInfos_
[
i
]
=
commonSeqInfo_
;
}
createInFrameInfo
(
i
,
input
,
passType
);
}
{
AsyncGpuBlock
asyncGpuBlock
;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if
(
shareInlinkInfo
)
{
input
.
getSeqInfo
(
&
seqInfos_
[
0
]);
maxSequenceLength_
=
seqInfos_
[
0
][
0
].
topLevelLength
;
createInFrameInfo
(
0
,
input
,
passType
);
}
else
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
input1
.
getSeqInfo
(
&
seqInfos_
[
i
]);
maxSequenceLength_
=
seqInfos_
[
i
][
0
].
topLevelLength
;
createInFrameInfo
(
i
,
input1
,
passType
);
}
}
// inFrameLine select rows in real layer one time
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
int
curInlinkId
=
shareInlinkInfo
?
0
:
i
;
selectRowsOneTime
(
inFrameLines_
[
i
].
inLayer
,
info_
[
curInlinkId
].
allIds
,
info_
[
i
].
allIds
,
&
(
inFrameLines_
[
i
].
outArg
),
passType
);
}
}
resizeOrCreateFrames
(
maxSequenceLength_
);
resizeBootFrame
(
numSequences
);
}
void
RecurrentGradientMachine
::
reorganizeOutput
(
PassType
passType
)
{
calcSequenceStartPositions
();
for
(
size_t
i
=
0
;
i
<
outFrameLines_
.
size
();
++
i
)
{
Info
info
;
auto
&
outFrameLine
=
outFrameLines_
[
i
];
ICpuGpuVectorPtr
sequenceStartPositions
;
ICpuGpuVectorPtr
subSequenceStartPositions
;
createOutFrameInfo
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
auto
gatherAgent
=
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
CHECK_NOTNULL
(
gatherAgent
);
gatherAgent
->
copyIdAndSequenceInfo
(
sequenceStartPositions
,
subSequenceStartPositions
,
info
.
allIds
,
info
.
idIndex
);
}
}
void
RecurrentGradientMachine
::
connectFrames
(
PassType
passType
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
if
(
memoryFrameLine
.
rootAgent
)
{
auto
scatterAgent
=
...
...
@@ -487,8 +473,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
memoryFrameLine
.
outArg
,
memoryFrameLine
.
allIds
,
/* idIndex */
0
,
memoryFrameLine
.
allIds
->
getSize
());
if
(
memoryFrameLine
.
is_sequence
)
{
// memoryConfig is sequence
memoryFrameLine
.
allIds
->
getSize
(),
/* handleBackward */
true
);
if
(
memoryFrameLine
.
sequenceStartPositions
)
{
int
size
=
memoryFrameLine
.
sequenceStartPositions
->
getSize
();
scatterAgent
->
setSequenceStartPositions
(
memoryFrameLine
.
sequenceStartPositions
,
...
...
@@ -501,28 +488,26 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
auto
gatherAgent
=
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
CHECK_NOTNULL
(
gatherAgent
);
gatherAgent
->
copyIdAndSequenceInfo
(
input
,
info_
[
targetInfoInlinkId_
].
allIds
,
info_
[
targetInfoInlinkId_
].
idIndex
);
gatherAgent
->
clearRealLayers
();
}
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
int
idSize
=
0
;
// connect in_links
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
Info
&
info
=
info_
[
shareInlinkInfo
?
0
:
j
];
Info
&
info
=
info_
[
j
];
// idSize denotes the sum number of tokens in each length i
idSize
=
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
int
idIndex
=
info
.
idIndex
.
empty
()
?
0
:
info
.
idIndex
[
i
];
int
idSize
=
info
.
idIndex
.
empty
()
?
numSeqs_
[
i
]
:
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
inFrameLine
.
outArg
,
info
.
allIds
,
info
.
idIndex
[
i
],
idSize
);
if
(
hasSubseq
)
{
idIndex
,
idSize
,
i
==
0
);
if
(
info
.
sequenceStartPositions
)
{
// size: the length of subsequence
int
size
=
info
.
seqStartPosIndex
[
i
+
1
]
-
info
.
seqStartPosIndex
[
i
];
scatterAgent
->
setSequenceStartPositions
(
...
...
@@ -536,11 +521,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
gatherAgent
->
addRealLayer
(
outFrameLine
.
frames
[
i
]);
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize
=
info_
[
0
].
idIndex
[
i
+
1
]
-
info_
[
0
].
idIndex
[
i
];
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
i
],
...
...
@@ -548,6 +528,28 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
numSeqs_
[
i
]
/*height of agent*/
);
}
}
}
void
RecurrentGradientMachine
::
forward
(
const
std
::
vector
<
Argument
>&
inArgs
,
std
::
vector
<
Argument
>*
outArgs
,
PassType
passType
)
{
/* inArgs and outArgs are not used.
The inputs are inFrameLines_[i].inLayer.
The outputs are outFramesLines_[i].agentLayer
*/
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
generateSequence
();
return
;
}
// else forward..
reorganizeInput
(
passType
);
int
numSequences
=
commonSeqInfo_
.
size
();
resizeOrCreateFrames
(
maxSequenceLength_
);
resizeBootFrame
(
numSequences
);
connectFrames
(
passType
);
REGISTER_TIMER_INFO
(
"RecurrentFwTime"
,
"RecurrentFwTime"
);
// forward
...
...
@@ -558,16 +560,12 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
const
std
::
vector
<
Argument
>
inArgs
;
std
::
vector
<
Argument
>
outArgs
;
frames_
[
i
]
->
forward
(
inArgs
,
&
outArgs
,
passType
);
if
(
hasSubseq
)
{
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
CHECK
(
outFrameLine
.
frames
[
i
]
->
getOutput
().
sequenceStartPositions
)
<<
"In hierachical RNN, all out links should be from sequences."
;
}
}
}
if
(
evaluator_
&&
passType
==
PASS_TEST
)
{
this
->
eval
(
evaluator_
.
get
());
}
reorganizeOutput
(
passType
);
}
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -634,76 +632,228 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
this
->
beamSearchStatistics_
=
nullptr
;
}
}
namespace
{
void
lenToStarts
(
std
::
vector
<
int
>&
starts
)
{
int
pos
=
0
;
starts
.
back
()
=
0
;
for
(
auto
&
start
:
starts
)
{
int
tmp
=
start
;
start
=
pos
;
pos
+=
tmp
;
}
starts
.
back
()
=
pos
;
}
}
void
RecurrentGradientMachine
::
calcSequenceStartPositions
()
{
std
::
vector
<
int
>
starts
(
commonSeqInfo_
.
size
()
+
1
);
for
(
auto
&
seqInfo
:
commonSeqInfo_
)
{
starts
[
seqInfo
.
seqId
]
=
seqInfo
.
topLevelLength
;
}
lenToStarts
(
starts
);
ICpuGpuVector
::
resizeOrCreate
(
sequenceStartPositions_
,
starts
.
size
(),
false
);
std
::
copy
(
starts
.
begin
(),
starts
.
end
(),
sequenceStartPositions_
->
getMutableData
(
false
));
}
void
RecurrentGradientMachine
::
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
)
{
bool
hasSeq
=
outFrameLine
.
frames
[
0
]
->
getOutput
().
hasSeq
();
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
CHECK_EQ
(
hasSeq
,
frame
->
getOutput
().
hasSeq
());
int
numSequences
=
frame
->
getOutput
().
getNumSequences
();
CHECK_EQ
(
numSeqs_
[
i
],
numSequences
);
}
}
void
RecurrentGradientMachine
::
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
checkOutputConsistency
(
outFrameLine
);
if
(
!
outFrameLine
.
frames
[
0
]
->
getOutput
().
hasSeq
())
{
createOutFrameInfo_seq
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
}
else
{
createOutFrameInfo_subseq
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
}
}
void
RecurrentGradientMachine
::
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
std
::
vector
<
int
>
allIds
;
info
.
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
const
int
*
starts
=
sequenceStartPositions_
->
getData
(
false
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
seqStart
=
starts
[
commonSeqInfo_
[
j
].
seqId
];
int
seqLength
=
commonSeqInfo_
[
j
].
topLevelLength
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
info
.
idIndex
.
push_back
(
allIds
.
size
());
}
sequenceStartPositions
=
sequenceStartPositions_
;
copyScattedId
(
allIds
,
&
info
.
allIds
,
allIds
.
size
());
CHECK_EQ
(
info
.
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
void
RecurrentGradientMachine
::
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
size_t
numSequences
=
commonSeqInfo_
.
size
();
std
::
vector
<
int
>
allIds
;
info
.
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
const
int
*
starts
=
sequenceStartPositions_
->
getData
(
false
);
std
::
vector
<
int
>
subStarts
(
starts
[
numSequences
]
+
1
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
const
int
*
seqStarts
=
frame
->
getOutput
().
sequenceStartPositions
->
getData
(
false
);
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
subStarts
[
starts
[
commonSeqInfo_
[
j
].
seqId
]
+
i
]
=
seqStarts
[
j
+
1
]
-
seqStarts
[
j
];
}
}
lenToStarts
(
subStarts
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
pos
=
starts
[
commonSeqInfo_
[
j
].
seqId
]
+
i
;
int
subSeqStart
=
subStarts
[
pos
];
int
subSeqEnd
=
subStarts
[
pos
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
}
info
.
idIndex
.
push_back
(
allIds
.
size
());
}
ICpuGpuVector
::
resizeOrCreate
(
subSequenceStartPositions
,
subStarts
.
size
(),
false
);
int
*
cpuSubSequenceStartPositions
=
subSequenceStartPositions
->
getMutableData
(
false
);
std
::
copy
(
subStarts
.
begin
(),
subStarts
.
end
(),
cpuSubSequenceStartPositions
);
ICpuGpuVector
::
resizeOrCreate
(
sequenceStartPositions
,
numSequences
+
1
,
false
);
int
*
cpuSequenceStartPositions
=
sequenceStartPositions
->
getMutableData
(
false
);
for
(
size_t
i
=
0
;
i
<=
numSequences
;
++
i
)
{
cpuSequenceStartPositions
[
i
]
=
subStarts
[
starts
[
i
]];
}
copyScattedId
(
allIds
,
&
info
.
allIds
,
allIds
.
size
());
CHECK_EQ
(
info
.
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
bool
hasSubseq
=
input
.
hasSubseq
();
// numSequences: # samples(sequences) in a batch
size_t
numSequences
=
input
.
getNumSequences
();
if
(
!
input
.
hasSeq
())
{
createInFrameInfo_nonseq
(
inlinkId
,
input
,
passType
);
}
else
if
(
!
input
.
hasSubseq
())
{
createInFrameInfo_seq
(
inlinkId
,
input
,
passType
);
}
else
{
createInFrameInfo_subseq
(
inlinkId
,
input
,
passType
);
}
}
void
RecurrentGradientMachine
::
createInFrameInfo_nonseq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
numSeqs_
.
clear
();
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
clear
();
inlinkInfo
->
idIndex
.
push_back
(
0
);
// first idIndex = 0
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
allIds
.
push_back
(
seqInfo
[
i
].
seqId
);
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
}
void
RecurrentGradientMachine
::
createInFrameInfo_seq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
numSeqs_
[
i
];
++
j
)
{
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
CHECK_EQ
(
inlinkInfo
->
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
void
RecurrentGradientMachine
::
createInFrameInfo_subseq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
std
::
vector
<
int
>
sequenceStartPositions
;
const
int
*
subSequenceStartPositions
=
nullptr
;
if
(
hasSubseq
)
{
// for sequenceScatterAgentLayer
subSequenceStartPositions
=
input
.
subSequenceStartPositions
->
getData
(
false
);
inlinkInfo
->
seqStartPosIndex
.
clear
();
inlinkInfo
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
}
// maxSequenceLength_: max topLevelLength in allsamples
subSequenceStartPositions
=
input
.
subSequenceStartPositions
->
getData
(
false
);
inlinkInfo
->
seqStartPosIndex
.
clear
();
inlinkInfo
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
if
(
hasSubseq
)
{
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
}
int
numSeqs
=
0
;
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
if
(
i
>=
seqLength
)
{
break
;
}
++
numSeqs
;
if
(
hasSubseq
)
{
int
subSeqStart
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
];
int
subSeqEnd
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
subSeqEnd
-
subSeqStart
);
}
else
{
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
for
(
int
j
=
0
;
j
<
numSeqs_
[
i
];
++
j
)
{
int
subSeqStart
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
];
int
subSeqEnd
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
subSeqEnd
-
subSeqStart
);
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
if
(
hasSubseq
)
{
inlinkInfo
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
}
}
if
(
hasSubseq
)
{
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
input
.
getNumSubSequences
()));
CHECK_EQ
(
inlinkInfo
->
seqStartPosIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
createSeqPos
(
sequenceStartPositions
,
&
inlinkInfo
->
sequenceStartPositions
);
inlinkInfo
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
input
.
getNumSubSequences
()));
CHECK_EQ
(
inlinkInfo
->
seqStartPosIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
createSeqPos
(
sequenceStartPositions
,
&
inlinkInfo
->
sequenceStartPositions
);
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
...
...
@@ -717,11 +867,11 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const
Argument
&
input
=
(
*
memoryFrameLine
).
rootLayer
->
getOutput
();
size_t
numSequences
=
input
.
getNumSequences
();
std
::
vector
<
int
>
allIds
;
bool
seqFlag
=
(
*
memoryFrameLine
).
is_sequence
;
bool
seqFlag
=
input
.
hasSeq
();
CHECK
(
!
input
.
hasSubseq
())
<<
"Subsequence boot layer for memory is not supported"
;
if
(
seqFlag
)
{
// for sequenceScatterAgentLayer
CHECK
(
input
.
sequenceStartPositions
)
<<
"boot layer must be a sequence when is_sequence = true"
;
std
::
vector
<
int
>
sequenceStartPositions
;
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
...
...
@@ -804,8 +954,7 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
if
(
!
memoryFrameLine
.
rootLayer
)
continue
;
Argument
&
bootArg
=
memoryFrameLine
.
rootLayer
->
getOutput
();
size_t
batchSize
=
memoryFrameLine
.
is_sequence
?
bootArg
.
getNumSequences
()
:
bootArg
.
getBatchSize
();
size_t
batchSize
=
bootArg
.
getNumSequences
();
if
(
numSequences
)
{
CHECK_EQ
(
numSequences
,
batchSize
);
}
else
{
...
...
@@ -845,12 +994,7 @@ void RecurrentGradientMachine::generateSequence() {
if
(
memoryFrameLine
.
rootAgent
)
{
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
rootAgent
.
get
());
bool
seqFlag
=
memoryFrameLine
.
is_sequence
;
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
rootLayer
,
ids
,
seqFlag
);
if
(
seqFlag
)
{
CHECK
(
memoryFrameLine
.
rootLayer
->
getOutput
().
sequenceStartPositions
)
<<
"boot layer must be a sequence when is_sequence = true"
;
}
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
rootLayer
,
ids
);
}
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
0
],
memoryFrameLine
.
bootLayer
,
ids
.
size
());
...
...
@@ -858,6 +1002,7 @@ void RecurrentGradientMachine::generateSequence() {
// boot layer forward
AsyncGpuBlock
asyncGpuBlock
;
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
memoryFrameLine
.
bootLayer
->
forward
(
PASS_TEST
);
}
...
...
@@ -930,8 +1075,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
scatterIds
,
memoryFrameLine
.
is_sequence
);
scatterIds
);
scatterAgent
->
forward
(
PASS_TEST
);
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
memoryFrameLine
.
scatterAgents
[
machineCur
]);
...
...
@@ -1003,8 +1147,7 @@ void RecurrentGradientMachine::connectPrevFrame(int stepId,
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
isOutIds
?
topIds_
:
machineIds_
,
memoryFrameLine
.
is_sequence
);
isOutIds
?
topIds_
:
machineIds_
);
scatterAgent
->
forward
(
PASS_TEST
);
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
memoryFrameLine
.
scatterAgents
[
machineCur
]);
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
09f34c4b
...
...
@@ -284,6 +284,16 @@ public:
}
protected:
std
::
vector
<
Argument
::
SeqInfo
>
commonSeqInfo_
;
ICpuGpuVectorPtr
sequenceStartPositions_
;
void
calcSequenceStartPositions
();
void
checkInputConsistency
(
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
);
void
reorganizeInput
(
PassType
passType
);
void
reorganizeOutput
(
PassType
passType
);
void
connectFrames
(
PassType
passType
);
void
calcNumSequencesAtEachStep
();
void
resizeOrCreateFrames
(
int
numFrames
);
void
resizeBootFrame
(
int
numSequences
);
...
...
@@ -295,8 +305,7 @@ protected:
std
::
string
linkName
;
LayerPtr
inLayer
;
std
::
vector
<
LayerPtr
>
agents
;
// Scatter Agents to reform batch input
bool
hasSubseq
;
Argument
outArg
;
// scatter output argument
Argument
outArg
;
// scatter output argument
};
std
::
vector
<
InFrameLine
>
inFrameLines_
;
...
...
@@ -318,7 +327,6 @@ protected:
std
::
vector
<
LayerPtr
>
agents
;
std
::
vector
<
LayerPtr
>
scatterAgents
;
// scatter agent used by beam search
Argument
outArg
;
// scatter output argument
bool
is_sequence
;
// Different memoryFrameLine have different element as follows
IVectorPtr
allIds
;
// scattered id of realLayer
ICpuGpuVectorPtr
...
...
@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct
Info
{
IVectorPtr
allIds
;
// scattered id of realLayer
std
::
vector
<
int
>
idIndex
;
// index of allIds
// The original positions in the original batch
IVectorPtr
allIds
;
// scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std
::
vector
<
int
>
idIndex
;
ICpuGpuVectorPtr
sequenceStartPositions
;
// scattered sequenceStartPositions
std
::
vector
<
int
>
seqStartPosIndex
;
// index of sequenceStartPositions
};
std
::
vector
<
Info
>
info_
;
std
::
vector
<
Info
>
info_
;
// for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std
::
vector
<
int
>
numSeqs_
;
std
::
vector
<
std
::
vector
<
Argument
::
SeqInfo
>>
seqInfos_
;
// the id of inlink which share info with outlinks
int
targetInfoInlinkId_
;
void
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
...
...
@@ -354,6 +367,28 @@ protected:
void
createInFrameInfo
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_nonseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_seq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_subseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createMemoryFrameInfo
(
MemoryFrameLine
*
memoryFrameLine
,
PassType
passType
);
...
...
@@ -386,9 +421,7 @@ protected:
NeuralNetwork
*
rootNetwork_
;
bool
reversed_
;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int
maxSequenceLength_
;
int
maxSequenceLength_
;
// Max top-level length
bool
useGpu_
;
bool
stopBeamSearch_
;
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
09f34c4b
...
...
@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
real
Height
=
realOutput
.
getBatchSize
();
CHECK_LE
(
numSamples_
,
real
Height
);
int
real
NumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
real
NumSequences
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realHeight
)
{
if
(
realOutput
.
ids
)
{
output_
.
ids
=
IVector
::
create
(
realOutput
.
ids
->
getData
(),
numSamples_
,
useGpu_
);
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
if
(
realOutput
.
hasSeq
())
{
int
numRows
=
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
getSize
(),
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numSamples_
,
getSize
(),
useGpu_
);
...
...
@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
}
}
void
SequenceAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
realNumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
realNumSequences
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
int
numRows
=
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
CHECK
(
!
realOutput
.
ids
)
<<
"Not supported"
;
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
getSize
(),
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
output_
=
realOutput
;
}
}
REGISTER_LAYER
(
sequence_agent
,
SequenceAgentLayer
);
bool
GatherAgentLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK_EQ
(
config_
.
inputs_size
(),
0
);
...
...
@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return
true
;
}
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
const
Argument
&
input
,
const
IVectorPtr
&
ids
,
const
std
::
vector
<
int
>&
idIndex
)
{
output_
.
sequenceStartPositions
=
input
.
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
input
.
subSequenceStartPositions
;
realLayers_
.
clear
();
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
ids
,
const
std
::
vector
<
int
>&
idIndex
)
{
output_
.
sequenceStartPositions
=
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
subSequenceStartPositions
;
allIds_
=
ids
;
idIndex_
=
idIndex
;
}
void
GatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
forwardIds
(
passType
);
forwardValue
(
passType
);
}
void
GatherAgentLayer
::
forwardValue
(
PassType
passType
)
{
MatrixPtr
valueReal
=
realLayers_
[
0
]
->
getOutputValue
();
if
(
!
valueReal
)
return
;
int
height
=
allIds_
->
getSize
();
int
width
=
this
->
getSize
();
...
...
@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
if
(
realOutArg_
.
hasSeq
())
{
forwardSequence
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
...
...
@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if
(
realGrad
)
{
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if
(
idIndex_
==
0
)
{
if
(
handleBackward_
)
{
outputGrad
->
addToRows
(
*
realGrad
,
*
ids_
);
}
}
...
...
@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
SequenceGatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
int
height
=
0
;
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
idReal
)
{
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
...
...
@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
else
{
// Gather output.value, same as GatherAgentLayer
CHECK
(
output_
.
subSequenceStartPositions
);
GatherAgentLayer
::
forward
(
passType
);
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
void
S
equenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
void
S
catterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
...
@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
}
else
{
// Putting the generation logic here is really an ugly hack!
// used in generation
int
height
=
0
;
size_t
numSequences
=
ids_
->
getSize
();
...
...
@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER
(
sequence_gather_agent
,
SequenceGatherAgentLayer
);
REGISTER_LAYER
(
sequence_scatter_agent
,
SequenceScatterAgentLayer
);
}
// namespace paddle
paddle/gserver/layers/AgentLayer.h
浏览文件 @
09f34c4b
...
...
@@ -49,18 +49,6 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class
SequenceAgentLayer
:
public
AgentLayer
{
public:
explicit
SequenceAgentLayer
(
const
LayerConfig
&
config
)
:
AgentLayer
(
config
)
{}
~
SequenceAgentLayer
()
{}
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
...
...
@@ -83,7 +71,10 @@ public:
const
ParameterMap
&
parameterMap
)
override
;
// call before addRealLayer
void
copyIdAndSequenceInfo
(
const
Argument
&
input
,
void
clearRealLayers
()
{
realLayers_
.
clear
();
}
void
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
allIds
,
const
std
::
vector
<
int
>&
idIndex
);
...
...
@@ -92,24 +83,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class
SequenceGatherAgentLayer
:
public
GatherAgentLayer
{
public:
explicit
SequenceGatherAgentLayer
(
const
LayerConfig
&
config
)
:
GatherAgentLayer
(
config
)
{}
virtual
~
SequenceGatherAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
// same as GatherAgentLayer
GatherAgentLayer
::
backward
(
callback
);
}
void
forwardValue
(
PassType
passType
);
void
forwardIds
(
PassType
passType
);
};
/**
...
...
@@ -129,6 +104,11 @@ protected:
int
idSize_
;
int
seqStartPosIndex_
;
int
numSequences_
;
// number of sequences in this scatterAgentLayer
bool
handleBackward_
;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
...
@@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
,
bool
copyId
=
false
)
{
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
)
{
realLayer_
=
layer
;
IVector
::
resizeOrCreate
(
ids_
,
ids
.
size
(),
useGpu_
);
ids_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
if
(
copyId
)
{
if
(
useGpu_
)
{
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
}
else
{
cpuIds_
=
ids_
;
}
if
(
useGpu_
)
{
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
}
else
{
cpuIds_
=
ids_
;
}
}
...
...
@@ -169,12 +145,14 @@ public:
const
Argument
&
outArg
,
const
IVectorPtr
&
ids
,
int
idIndex
,
int
idSize
)
{
int
idSize
,
bool
handleBackward
)
{
realLayer_
=
layer
;
realOutArg_
=
outArg
;
ids_
=
ids
;
idIndex_
=
idIndex
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
...
@@ -187,28 +165,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class
SequenceScatterAgentLayer
:
public
ScatterAgentLayer
{
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
SequenceScatterAgentLayer
(
const
LayerConfig
&
config
)
:
ScatterAgentLayer
(
config
)
{}
virtual
~
SequenceScatterAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
ScatterAgentLayer
::
backward
(
callback
);
}
void
forwardSequence
(
PassType
passType
);
};
}
// namespace paddle
paddle/gserver/layers/SequencePoolLayer.cpp
浏览文件 @
09f34c4b
...
...
@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
const
Argument
&
input
=
getInput
(
0
);
CHECK
(
input
.
hasSeq
()
||
input
.
hasSubseq
())
<<
"Input should be a sequence or subsequence for layer "
<<
getName
();
newBatchSize_
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
size_t
dim
=
getSize
();
// check
...
...
paddle/gserver/tests/rnn_data_provider.py
浏览文件 @
09f34c4b
...
...
@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
0
])
words2
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
1
])
yield
words1
,
words2
,
d
[
2
]
###########################################################
data3
=
[
[[[
1
,
2
],
[
4
,
5
,
2
]],
[
1
,
2
],
0
],
[[[
0
,
2
],
[
2
,
5
],
[
0
,
1
,
2
]],
[
2
,
3
,
0
],
1
],
]
# Used for sequence_nest_mixed_inputs.conf
@
provider
(
input_types
=
[
integer_value_sub_sequence
(
10
),
integer_value_sequence
(
10
),
integer_value
(
2
)
],
should_shuffle
=
False
)
def
process_mixed
(
settings
,
file_name
):
for
d
in
data3
:
yield
d
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
浏览文件 @
09f34c4b
...
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq
2
'
)
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
@@ -57,7 +57,7 @@ def outer_step(wid, x):
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it, and will report error: In hierachical RNN, all out
# does not handle it, and will report error: In hierachical RNN, all out
# links should be from sequences now.
return
inner_rnn_output
...
...
paddle/gserver/tests/sequence_rnn_matched_inputs.py
0 → 100644
浏览文件 @
09f34c4b
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
subseq
,
seq
,
nonseq
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
seq
,
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
subseq
,
expand_layer
(
seq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
nonseq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_NO_SEQUENCE
),
StaticInput
(
encoding
)
])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
0 → 100644
浏览文件 @
09f34c4b
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
data1
,
data2
,
label
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
print_layer
(
input
=
[
data1
,
seq
,
label
,
inner_mem
])
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
StaticInput
(
seq
),
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
data1
,
data2
,
StaticInput
(
label
),
StaticInput
(
encoding
)])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_multi_input.conf
浏览文件 @
09f34c4b
...
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq
2
'
)
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
09f34c4b
...
...
@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
TEST
(
RecurrentGradientMachine
,
rnn_mixed_input
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn_mixed_inputs.py"
,
"gserver/tests/sequence_rnn_matched_inputs.py"
,
1e-6
,
useGpu
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
...
...
paddle/math/Vector.cpp
浏览文件 @
09f34c4b
...
...
@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
setSync(useGpu); \
if (useGpu) { \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
} else { \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
} \
} while (0)
...
...
@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case
DATA_AT_GPU
:
CHECK
(
gpuVectorT_
);
this
->
resizeOrCreate
(
gpuVectorT_
->
getSize
(),
false
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
,
HPPL_STREAM_DEFAULT
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
);
setSync
(
SYNCED
);
break
;
case
DATA_AT_CPU
:
...
...
@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case
DATA_AT_CPU
:
CHECK
(
cpuVectorT_
);
this
->
resizeOrCreate
(
cpuVectorT_
->
getSize
(),
true
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
,
HPPL_STREAM_DEFAULT
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
);
setSync
(
SYNCED
);
break
;
case
DATA_AT_GPU
:
...
...
paddle/parameter/Argument.h
浏览文件 @
09f34c4b
...
...
@@ -149,6 +149,7 @@ struct Argument {
:
getBatchSize
();
}
bool
hasSeq
()
const
{
return
sequenceStartPositions
!=
nullptr
;
}
bool
hasSubseq
()
const
{
return
subSequenceStartPositions
!=
nullptr
;
}
const
int
*
getCpuStartPositions
()
const
{
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
09f34c4b
...
...
@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool
beam_search
)
{
FLAGS_config_args
=
beam_search
?
"beam_search=1"
:
"beam_search=0"
;
for
(
auto
useGpu
:
useGpuConfs
)
{
LOG
(
INFO
)
<<
configFile
<<
" useGpu="
<<
useGpu
<<
" beam_search="
<<
beam_search
;
testGeneration
(
configFile
,
useGpu
,
hasSubseq
,
expRetFile
);
}
};
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
09f34c4b
...
...
@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin
(
name
)
g_current_submodel
.
is_recurrent_layer_group
=
True
g_current_submodel
.
reversed
=
seq_reversed
g_current_submodel
.
target_inlinkid
=
-
1
in_links_count
=
0
for
linkid
,
link
in
enumerate
(
in_links
):
if
isinstance
(
link
,
basestring
):
name
=
link
has_subseq
=
False
else
:
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
# assign target_inlinkid according to target_inlinkname
if
target_inlinkname
==
name
:
g_current_submodel
.
target_inlinkid
=
linkid
if
in_links_count
==
0
:
in_links_has_subseq
=
has_subseq
else
:
config_assert
(
in_links_has_subseq
==
has_subseq
,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count
+=
1
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer
=
g_layer_map
[
layer_name
]
if
has_subseq
:
SequenceScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
else
:
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
pair
=
g_current_submodel
.
in_links
.
add
()
pair
.
layer_name
=
layer_name
pair
.
link_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
has_subseq
=
has_subseq
@
config_func
def
RecurrentLayerGroupSetOutLink
(
link
):
if
isinstance
(
link
,
basestring
):
name
=
link
has_subseq
=
False
else
:
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
pair
=
g_current_submodel
.
out_links
.
add
()
pair
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
link_name
=
layer_name
pair
.
has_subseq
=
has_subseq
def
RecurrentLayerGroupSetGenerator
(
generator
=
None
):
...
...
@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
generator
=
None
,
target_inlinkname
=
""
,
seq_reversed
=
False
):
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
,
target_inlinkname
)
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
)
for
link
in
out_links
:
RecurrentLayerGroupSetOutLink
(
link
)
...
...
@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
agent_name
=
GetLayerBaseName
(
pair
.
link_name
)
if
prev_submodel
.
HasField
(
"generator"
):
DataLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
elif
pair
.
has_subseq
:
SequenceGatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
else
:
GatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
...
...
@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_agent'
)
class
SequenceAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceAgentLayer
,
self
).
__init__
(
name
,
'sequence_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'gather_agent'
)
class
GatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
...
...
@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
name
,
'scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_gather_agent'
)
class
SequenceGatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceGatherAgentLayer
,
self
).
__init__
(
name
,
'sequence_gather_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_scatter_agent'
)
class
SequenceScatterAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceScatterAgentLayer
,
self
).
__init__
(
name
,
'sequence_scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'multiplex'
)
class
MultiplexLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
...
...
@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
@
config_func
def
Link
(
name
,
has_subseq
=
False
,
):
def
Link
(
name
,
has_subseq
=
False
):
"""
Still keeping has_subseq for backward compatibility
"""
link_config
=
LinkConfig
()
link_config
.
link_name
=
name
link_config
.
has_subseq
=
has_subseq
return
link_config
...
...
@@ -2341,20 +2297,13 @@ def Memory(name,
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
memory_name
=
name
+
"+delay1"
agent_name
=
memory_name
if
is_sequence
:
config_assert
(
boot_layer
is
not
None
,
"there must be boot_layer in network when is_sequence = True"
)
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
else
:
agent_layer
=
AgentLayer
(
agent_name
,
size
)
agent_layer
=
AgentLayer
(
agent_name
,
size
)
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
'Memory should be used in recurrent layer group only'
)
memory
=
g_current_submodel
.
memories
.
add
()
if
name
is
not
None
:
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
is_sequence
=
is_sequence
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
boot_with_const_id
is
not
None
))
config_assert
(
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
09f34c4b
...
...
@@ -311,18 +311,6 @@ class LayerOutput(object):
self
.
outputs
=
outputs
self
.
reverse
=
reverse
def
__repr__
(
self
):
"""
Disable __repr__ for debug reason. Will be implemented when release
"""
assert
False
,
"this method should not be invoked"
def
__str__
(
self
):
"""
Disable __str__ for debug reason. Will be implemented when release
"""
assert
False
,
"this method should not be invoked"
def
set_input
(
self
,
input
):
"""
Set the input for a memory layer. Can only be used for memory layer
...
...
@@ -2944,7 +2932,7 @@ def memory(name,
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:param is_seq:
DEPRECATED.
is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None
...
...
@@ -2971,7 +2959,6 @@ def memory(name,
memory_name
=
Memory
(
name
,
size
,
is_sequence
=
is_seq
,
boot_layer
=
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
=
boot_bias
,
boot_bias_active_type
=
boot_bias_active_type
.
name
,
...
...
@@ -3318,19 +3305,21 @@ class StaticInput(object):
"""
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
"""
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerOutput
)
self
.
input
=
input
self
.
is_seq
=
is_seq
assert
input
.
size
is
not
None
or
size
is
not
None
assert
input
.
size
is
not
None
if
size
is
not
None
:
input
.
size
=
size
assert
input
.
size
=
=
size
class
SubsequenceInput
(
objec
t
):
def
SubsequenceInput
(
inpu
t
):
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
...
...
@@ -3339,11 +3328,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
"""
def
__init__
(
self
,
input
):
assert
isinstance
(
input
,
LayerOutput
)
assert
input
.
size
is
not
None
self
.
input
=
input
return
input
@
wrap_name_default
(
"recurrent_group"
)
...
...
@@ -3407,7 +3392,8 @@ def recurrent_group(step,
input sequence in a reverse order.
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
...
...
@@ -3429,46 +3415,21 @@ def recurrent_group(step,
model_type
(
'recurrent_nn'
)
def
is_single_input
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
\
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
if
is_single_input
(
input
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
def
is_in_links
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
in_links
=
filter
(
is_in_links
,
input
)
def
targetInlink_in_inlinks
():
for
inlink
in
in_links
:
if
isinstance
(
inlink
,
SubsequenceInput
):
if
targetInlink
==
inlink
.
input
:
return
True
elif
targetInlink
==
inlink
:
return
True
return
False
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
targetInlinkName
=
None
if
targetInlink
==
None
\
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
contains_sub_seq
=
[
False
]
def
map_in_links
(
x
):
if
isinstance
(
x
,
SubsequenceInput
):
contains_sub_seq
[
0
]
=
True
return
Link
(
name
=
x
.
input
.
name
,
has_subseq
=
True
)
else
:
return
x
.
name
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
name
,
in_links
=
map
(
map_in_links
,
in_links
),
seq_reversed
=
reverse
,
target_inlinkname
=
targetInlinkName
)
in_links
=
map
(
lambda
x
:
x
.
name
,
in_links
),
seq_reversed
=
reverse
)
in_args
=
[]
has_LayerOutput
=
False
for
each_input
in
input
:
...
...
@@ -3476,21 +3437,13 @@ def recurrent_group(step,
if
isinstance
(
each_input
,
LayerOutput
):
in_args
.
append
(
each_input
)
has_LayerOutput
=
True
elif
isinstance
(
each_input
,
SubsequenceInput
):
in_args
.
append
(
each_input
.
input
)
has_LayerOutput
=
True
else
:
else
:
# StaticInput
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
is_seq
=
each_input
.
is_seq
,
name
=
None
,
size
=
each_input
.
input
.
size
,
boot_layer
=
each_input
.
input
)
with
mixed_layer
(
name
=
mem_name
,
size
=
each_input
.
input
.
size
,
act
=
IdentityActivation
())
as
mix
:
mix
+=
identity_projection
(
mem
)
mem
.
set_input
(
mem
)
in_args
.
append
(
mem
)
assert
(
is_generating
!=
has_LayerOutput
)
...
...
@@ -3503,10 +3456,7 @@ def recurrent_group(step,
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
if
contains_sub_seq
[
0
]:
RecurrentLayerGroupSetOutLink
(
Link
(
ot
.
name
,
has_subseq
=
True
))
else
:
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
...
...
@@ -5608,13 +5558,13 @@ def row_conv_layer(input,
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
浏览文件 @
09f34c4b
...
...
@@ -256,19 +256,15 @@ sub_models {
memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__simple_gru_1___recurrent_group"
...
...
@@ -280,18 +276,14 @@ sub_models {
memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
浏览文件 @
09f34c4b
...
...
@@ -341,24 +341,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_1___recurrent_group"
...
...
@@ -373,23 +368,18 @@ sub_models {
memories {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
09f34c4b
...
...
@@ -155,7 +155,7 @@ layers {
}
layers {
name: "sub_seq_input@__recurrent_group_2__"
type: "s
equence_s
catter_agent"
type: "scatter_agent"
size: 100
active_type: ""
}
...
...
@@ -182,7 +182,7 @@ layers {
}
layers {
name: "rnn_subseq_forward"
type: "
sequence_
gather_agent"
type: "gather_agent"
size: 200
active_type: ""
}
...
...
@@ -618,19 +618,15 @@ sub_models {
memories {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_1__"
...
...
@@ -642,19 +638,15 @@ sub_models {
memories {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_2__"
...
...
@@ -666,19 +658,15 @@ sub_models {
memories {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
is_sequence: false
}
in_links {
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
has_subseq: true
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_0___recurrent_group"
...
...
@@ -693,24 +681,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__gru_group_0___recurrent_group"
...
...
@@ -722,19 +705,15 @@ sub_models {
memories {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
...
...
@@ -746,18 +725,14 @@ sub_models {
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/v2/layer.py
浏览文件 @
09f34c4b
...
...
@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
else
:
extra_layers
=
[]
layer_names
=
__get_used_layers__
(
output_layers
+
extra_layers
)
layer_names
=
__get_used_layers__
(
list
(
output_layers
)
+
list
(
extra_layers
)
)
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
.
add
(
'root'
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录