Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
17994e38
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
17994e38
编写于
6月 14, 2017
作者:
X
xuwei06
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
RecurrentGroup with mixed input sequence types
No longer need to use SubsequenceInput. The framework will detect.
上级
14c0e71d
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
652 addition
and
429 deletion
+652
-429
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+320
-177
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+44
-11
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+42
-53
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+21
-63
paddle/gserver/layers/SequencePoolLayer.cpp
paddle/gserver/layers/SequencePoolLayer.cpp
+3
-0
paddle/gserver/tests/rnn_data_provider.py
paddle/gserver/tests/rnn_data_provider.py
+19
-0
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+2
-2
paddle/gserver/tests/sequence_rnn_matched_inputs.py
paddle/gserver/tests/sequence_rnn_matched_inputs.py
+85
-0
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
+78
-0
paddle/gserver/tests/sequence_rnn_multi_input.conf
paddle/gserver/tests/sequence_rnn_multi_input.conf
+1
-1
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+9
-0
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+4
-3
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+1
-0
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+2
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+6
-51
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+13
-46
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
...config_helpers/tests/configs/protostr/shared_gru.protostr
+0
-4
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
...onfig_helpers/tests/configs/protostr/shared_lstm.protostr
+0
-4
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+2
-14
未找到文件。
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
17994e38
...
...
@@ -214,7 +214,6 @@ void RecurrentGradientMachine::init(
inFrameLines_
[
i
].
linkName
=
subModelConfig
->
in_links
(
i
).
link_name
();
inFrameLines_
[
i
].
inLayer
=
rootNetwork_
->
getLayer
(
subModelConfig
->
in_links
(
i
).
layer_name
());
inFrameLines_
[
i
].
hasSubseq
=
subModelConfig
->
in_links
(
i
).
has_subseq
();
}
outFrameLines_
.
resize
(
subModelConfig
->
out_links_size
());
...
...
@@ -241,11 +240,8 @@ void RecurrentGradientMachine::init(
rootNetwork_
->
getLayer
(
memoryConfig
.
boot_layer_name
());
LayerConfig
scatterConfig
=
*
agentConfig
;
memoryFrameLines_
[
i
].
is_sequence
=
memoryConfig
.
is_sequence
();
memoryFrameLines_
[
i
].
rootAgent
.
reset
(
memoryConfig
.
is_sequence
()
?
new
SequenceScatterAgentLayer
(
scatterConfig
)
:
new
ScatterAgentLayer
(
scatterConfig
));
new
ScatterAgentLayer
(
scatterConfig
));
memoryFrameLines_
[
i
].
rootAgent
->
init
(
LayerMap
(),
parameterMap_
);
memoryFrameLines_
[
i
].
bootLayer
=
memoryFrameLines_
[
i
].
rootAgent
;
...
...
@@ -267,9 +263,7 @@ void RecurrentGradientMachine::init(
if
(
subModelConfig
->
has_generator
())
{
memoryFrameLines_
[
i
].
scatterAgents
.
resize
(
2
);
for
(
auto
&
agent
:
memoryFrameLines_
[
i
].
scatterAgents
)
{
agent
.
reset
(
memoryConfig
.
is_sequence
()
?
new
SequenceScatterAgentLayer
(
*
agentConfig
)
:
new
ScatterAgentLayer
(
*
agentConfig
));
agent
.
reset
(
new
ScatterAgentLayer
(
*
agentConfig
));
agent
->
init
(
LayerMap
(),
parameterMap_
);
}
}
...
...
@@ -297,8 +291,6 @@ void RecurrentGradientMachine::init(
if
(
subModelConfig
->
evaluator_names_size
()
>
0
)
{
evaluator_
.
reset
(
frames_
[
0
]
->
makeEvaluator
());
}
targetInfoInlinkId_
=
subModelConfig
->
target_inlinkid
();
}
void
RecurrentGradientMachine
::
resizeOrCreateFrames
(
int
numFrames
)
{
...
...
@@ -376,108 +368,102 @@ void RecurrentGradientMachine::prefetch(const std::vector<Argument>& inArgs) {
LOG
(
FATAL
)
<<
"should not use this function"
;
}
void
RecurrentGradientMachine
::
forward
(
const
std
::
vector
<
Argument
>&
inArgs
,
std
::
vector
<
Argument
>*
outArgs
,
PassType
passType
)
{
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
generateSequence
();
return
;
}
// else forward..
const
Argument
&
input
=
inFrameLines_
[
0
].
inLayer
->
getOutput
();
CHECK
(
input
.
sequenceStartPositions
);
int
batchSize
=
input
.
getBatchSize
();
size_t
numSequences
=
input
.
getNumSequences
();
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
bool
hasSubseq
=
input
.
hasSubseq
();
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
// same inframe info
bool
shareInlinkInfo
=
!
hasSubseq
||
targetInfoInlinkId_
==
-
1
;
// Defaultly, share info with the first inlink
if
(
shareInlinkInfo
)
{
targetInfoInlinkId_
=
0
;
void
RecurrentGradientMachine
::
checkInputConsistency
(
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
)
{
if
(
commonSeqInfo_
.
empty
())
{
commonSeqInfo_
.
resize
(
seqInfo
.
size
());
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
commonSeqInfo_
[
i
].
topLevelLength
=
seqInfo
[
i
].
topLevelLength
;
commonSeqInfo_
[
i
].
seqId
=
seqInfo
[
i
].
seqId
;
}
// check hasSubseq in both config and input are the same
CHECK_EQ
(
hasSubseq
,
inFrameLines_
[
0
].
hasSubseq
);
CHECK_EQ
(
starts
[
numSequences
],
batchSize
);
CHECK
(
input
.
sequenceStartPositions
);
// check other inputs has same sequence length and start
for
(
size_t
i
=
1
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
CHECK_EQ
((
size_t
)
input1
.
getNumSequences
(),
numSequences
);
// check all inputs should have same hasSubseq flag
CHECK_EQ
(
input
.
hasSubseq
(),
inFrameLines_
[
0
].
hasSubseq
);
// if shareInlinkInfo, checks:
// 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if
(
shareInlinkInfo
)
{
CHECK_EQ
(
input1
.
getBatchSize
(),
batchSize
);
CHECK
(
std
::
equal
(
starts
,
starts
+
numSequences
+
1
,
input1
.
sequenceStartPositions
->
getData
(
false
)));
}
else
{
CHECK_EQ
(
commonSeqInfo_
.
size
(),
seqInfo
.
size
())
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched number of sequences"
;
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
CHECK_EQ
(
commonSeqInfo_
[
i
].
topLevelLength
,
seqInfo
[
i
].
topLevelLength
)
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched sequence length"
;
CHECK_EQ
(
commonSeqInfo_
[
i
].
seqId
,
seqInfo
[
i
].
seqId
)
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched sequence length"
;
}
}
}
if
(
hasSubseq
)
{
CHECK
(
input
.
subSequenceStartPositions
);
size_t
numSubSequences
=
input
.
getNumSubSequences
();
const
int
*
subStarts
=
input
.
subSequenceStartPositions
->
getData
(
false
);
CHECK_EQ
(
subStarts
[
numSubSequences
],
batchSize
);
// if hasSubseq, check other inputs has same sub-sequence and sub-start
for
(
size_t
i
=
1
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
CHECK_EQ
((
size_t
)
input1
.
getNumSubSequences
(),
numSubSequences
);
if
(
shareInlinkInfo
)
{
CHECK
(
std
::
equal
(
subStarts
,
subStarts
+
numSubSequences
+
1
,
input1
.
subSequenceStartPositions
->
getData
(
false
)));
}
void
RecurrentGradientMachine
::
calcNumSequencesAtEachStep
()
{
int
numSequences
=
commonSeqInfo_
.
size
();
numSeqs_
.
resize
(
maxSequenceLength_
);
for
(
int
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
int
j
=
0
;
j
<
commonSeqInfo_
[
i
].
topLevelLength
;
++
j
)
{
numSeqs_
[
j
]
=
i
+
1
;
}
}
}
void
RecurrentGradientMachine
::
reorganizeInput
(
PassType
passType
)
{
info_
.
clear
();
info_
.
resize
(
inFrameLines_
.
size
());
commonSeqInfo_
.
clear
();
seqInfos_
.
clear
();
seqInfos_
.
resize
(
inFrameLines_
.
size
());
{
AsyncGpuBlock
asyncGpuBlock
;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if
(
shareInlinkInfo
)
{
input
.
getSeqInfo
(
&
seqInfos_
[
0
]);
maxSequenceLength_
=
seqInfos_
[
0
][
0
].
topLevelLength
;
createInFrameInfo
(
0
,
input
,
passType
);
}
else
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
input1
.
getSeqInfo
(
&
seqInfos_
[
i
]);
maxSequenceLength_
=
seqInfos_
[
i
][
0
].
topLevelLength
;
createInFrameInfo
(
i
,
input1
,
passType
);
const
Argument
&
input
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
if
(
!
input
.
hasSeq
())
{
continue
;
}
input
.
getSeqInfo
(
&
seqInfos_
[
i
]);
checkInputConsistency
(
i
,
seqInfos_
[
i
]);
}
CHECK
(
!
commonSeqInfo_
.
empty
())
<<
"At least one input needs to be sequence or subsequence"
;
maxSequenceLength_
=
commonSeqInfo_
[
0
].
topLevelLength
;
calcNumSequencesAtEachStep
();
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
if
(
!
input
.
hasSeq
())
{
seqInfos_
[
i
]
=
commonSeqInfo_
;
}
createInFrameInfo
(
i
,
input
,
passType
);
}
{
AsyncGpuBlock
asyncGpuBlock
;
// inFrameLine select rows in real layer one time
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
int
curInlinkId
=
shareInlinkInfo
?
0
:
i
;
selectRowsOneTime
(
inFrameLines_
[
i
].
inLayer
,
info_
[
curInlinkId
].
allIds
,
info_
[
i
].
allIds
,
&
(
inFrameLines_
[
i
].
outArg
),
passType
);
}
}
resizeOrCreateFrames
(
maxSequenceLength_
);
resizeBootFrame
(
numSequences
);
}
void
RecurrentGradientMachine
::
reorganizeOutput
(
PassType
passType
)
{
calcSequenceStartPositions
();
for
(
size_t
i
=
0
;
i
<
outFrameLines_
.
size
();
++
i
)
{
Info
info
;
auto
&
outFrameLine
=
outFrameLines_
[
i
];
ICpuGpuVectorPtr
sequenceStartPositions
;
ICpuGpuVectorPtr
subSequenceStartPositions
;
createOutFrameInfo
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
auto
gatherAgent
=
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
CHECK_NOTNULL
(
gatherAgent
);
gatherAgent
->
copyIdAndSequenceInfo
(
sequenceStartPositions
,
subSequenceStartPositions
,
info
.
allIds
,
info
.
idIndex
);
}
}
void
RecurrentGradientMachine
::
connectFrames
(
PassType
passType
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
if
(
memoryFrameLine
.
rootAgent
)
{
auto
scatterAgent
=
...
...
@@ -487,8 +473,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
memoryFrameLine
.
outArg
,
memoryFrameLine
.
allIds
,
/* idIndex */
0
,
memoryFrameLine
.
allIds
->
getSize
());
if
(
memoryFrameLine
.
is_sequence
)
{
// memoryConfig is sequence
memoryFrameLine
.
allIds
->
getSize
(),
/* handleBackward */
true
);
if
(
memoryFrameLine
.
sequenceStartPositions
)
{
int
size
=
memoryFrameLine
.
sequenceStartPositions
->
getSize
();
scatterAgent
->
setSequenceStartPositions
(
memoryFrameLine
.
sequenceStartPositions
,
...
...
@@ -501,28 +488,26 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
auto
gatherAgent
=
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
CHECK_NOTNULL
(
gatherAgent
);
gatherAgent
->
copyIdAndSequenceInfo
(
input
,
info_
[
targetInfoInlinkId_
].
allIds
,
info_
[
targetInfoInlinkId_
].
idIndex
);
gatherAgent
->
clearRealLayers
();
}
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
int
idSize
=
0
;
// connect in_links
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
Info
&
info
=
info_
[
shareInlinkInfo
?
0
:
j
];
Info
&
info
=
info_
[
j
];
// idSize denotes the sum number of tokens in each length i
idSize
=
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
int
idIndex
=
info
.
idIndex
.
empty
()
?
0
:
info
.
idIndex
[
i
];
int
idSize
=
info
.
idIndex
.
empty
()
?
numSeqs_
[
i
]
:
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
inFrameLine
.
outArg
,
info
.
allIds
,
info
.
idIndex
[
i
],
idSize
);
if
(
hasSubseq
)
{
idIndex
,
idSize
,
i
==
0
);
if
(
info
.
sequenceStartPositions
)
{
// size: the length of subsequence
int
size
=
info
.
seqStartPosIndex
[
i
+
1
]
-
info
.
seqStartPosIndex
[
i
];
scatterAgent
->
setSequenceStartPositions
(
...
...
@@ -536,11 +521,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
gatherAgent
->
addRealLayer
(
outFrameLine
.
frames
[
i
]);
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize
=
info_
[
0
].
idIndex
[
i
+
1
]
-
info_
[
0
].
idIndex
[
i
];
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
i
],
...
...
@@ -548,6 +528,28 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
numSeqs_
[
i
]
/*height of agent*/
);
}
}
}
void
RecurrentGradientMachine
::
forward
(
const
std
::
vector
<
Argument
>&
inArgs
,
std
::
vector
<
Argument
>*
outArgs
,
PassType
passType
)
{
/* inArgs and outArgs are not used.
The inputs are inFrameLines_[i].inLayer.
The outputs are outFramesLines_[i].agentLayer
*/
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
generateSequence
();
return
;
}
// else forward..
reorganizeInput
(
passType
);
int
numSequences
=
commonSeqInfo_
.
size
();
resizeOrCreateFrames
(
maxSequenceLength_
);
resizeBootFrame
(
numSequences
);
connectFrames
(
passType
);
REGISTER_TIMER_INFO
(
"RecurrentFwTime"
,
"RecurrentFwTime"
);
// forward
...
...
@@ -558,16 +560,12 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
const
std
::
vector
<
Argument
>
inArgs
;
std
::
vector
<
Argument
>
outArgs
;
frames_
[
i
]
->
forward
(
inArgs
,
&
outArgs
,
passType
);
if
(
hasSubseq
)
{
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
CHECK
(
outFrameLine
.
frames
[
i
]
->
getOutput
().
sequenceStartPositions
)
<<
"In hierachical RNN, all out links should be from sequences."
;
}
}
}
if
(
evaluator_
&&
passType
==
PASS_TEST
)
{
this
->
eval
(
evaluator_
.
get
());
}
reorganizeOutput
(
passType
);
}
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
...
...
@@ -634,68 +632,221 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
this
->
beamSearchStatistics_
=
nullptr
;
}
}
namespace
{
void
lenToStarts
(
std
::
vector
<
int
>&
starts
)
{
int
pos
=
0
;
starts
.
back
()
=
0
;
for
(
auto
&
start
:
starts
)
{
int
tmp
=
start
;
start
=
pos
;
pos
+=
tmp
;
}
starts
.
back
()
=
pos
;
}
}
void
RecurrentGradientMachine
::
calcSequenceStartPositions
()
{
std
::
vector
<
int
>
starts
(
commonSeqInfo_
.
size
()
+
1
);
for
(
auto
&
seqInfo
:
commonSeqInfo_
)
{
starts
[
seqInfo
.
seqId
]
=
seqInfo
.
topLevelLength
;
}
lenToStarts
(
starts
);
ICpuGpuVector
::
resizeOrCreate
(
sequenceStartPositions_
,
starts
.
size
(),
false
);
std
::
copy
(
starts
.
begin
(),
starts
.
end
(),
sequenceStartPositions_
->
getMutableData
(
false
));
}
void
RecurrentGradientMachine
::
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
)
{
bool
hasSeq
=
outFrameLine
.
frames
[
0
]
->
getOutput
().
hasSeq
();
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
CHECK_EQ
(
hasSeq
,
frame
->
getOutput
().
hasSeq
());
int
numSequences
=
frame
->
getOutput
().
getNumSequences
();
CHECK_EQ
(
numSeqs_
[
i
],
numSequences
);
}
}
void
RecurrentGradientMachine
::
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
checkOutputConsistency
(
outFrameLine
);
if
(
!
outFrameLine
.
frames
[
0
]
->
getOutput
().
hasSeq
())
{
createOutFrameInfo_seq
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
}
else
{
createOutFrameInfo_subseq
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
}
}
void
RecurrentGradientMachine
::
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
std
::
vector
<
int
>
allIds
;
info
.
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
const
int
*
starts
=
sequenceStartPositions_
->
getData
(
false
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
seqStart
=
starts
[
commonSeqInfo_
[
j
].
seqId
];
int
seqLength
=
commonSeqInfo_
[
j
].
topLevelLength
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
info
.
idIndex
.
push_back
(
allIds
.
size
());
}
sequenceStartPositions
=
sequenceStartPositions_
;
copyScattedId
(
allIds
,
&
info
.
allIds
,
allIds
.
size
());
CHECK_EQ
(
info
.
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
void
RecurrentGradientMachine
::
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
size_t
numSequences
=
commonSeqInfo_
.
size
();
std
::
vector
<
int
>
allIds
;
info
.
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
const
int
*
starts
=
sequenceStartPositions_
->
getData
(
false
);
std
::
vector
<
int
>
subStarts
(
starts
[
numSequences
]
+
1
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
const
int
*
seqStarts
=
frame
->
getOutput
().
sequenceStartPositions
->
getData
(
false
);
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
subStarts
[
starts
[
commonSeqInfo_
[
j
].
seqId
]
+
i
]
=
seqStarts
[
j
+
1
]
-
seqStarts
[
j
];
}
}
lenToStarts
(
subStarts
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
pos
=
starts
[
commonSeqInfo_
[
j
].
seqId
]
+
i
;
int
subSeqStart
=
subStarts
[
pos
];
int
subSeqEnd
=
subStarts
[
pos
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
}
info
.
idIndex
.
push_back
(
allIds
.
size
());
}
ICpuGpuVector
::
resizeOrCreate
(
subSequenceStartPositions
,
subStarts
.
size
(),
false
);
int
*
cpuSubSequenceStartPositions
=
subSequenceStartPositions
->
getMutableData
(
false
);
std
::
copy
(
subStarts
.
begin
(),
subStarts
.
end
(),
cpuSubSequenceStartPositions
);
ICpuGpuVector
::
resizeOrCreate
(
sequenceStartPositions
,
numSequences
+
1
,
false
);
int
*
cpuSequenceStartPositions
=
sequenceStartPositions
->
getMutableData
(
false
);
for
(
size_t
i
=
0
;
i
<=
numSequences
;
++
i
)
{
cpuSequenceStartPositions
[
i
]
=
subStarts
[
starts
[
i
]];
}
copyScattedId
(
allIds
,
&
info
.
allIds
,
allIds
.
size
());
CHECK_EQ
(
info
.
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
*/
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
bool
hasSubseq
=
input
.
hasSubseq
();
// numSequences: # samples(sequences) in a batch
size_t
numSequences
=
input
.
getNumSequences
();
if
(
!
input
.
hasSeq
())
{
createInFrameInfo_nonseq
(
inlinkId
,
input
,
passType
);
}
else
if
(
!
input
.
hasSubseq
())
{
createInFrameInfo_seq
(
inlinkId
,
input
,
passType
);
}
else
{
createInFrameInfo_subseq
(
inlinkId
,
input
,
passType
);
}
}
void
RecurrentGradientMachine
::
createInFrameInfo_nonseq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
numSeqs_
.
clear
();
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
clear
();
inlinkInfo
->
idIndex
.
push_back
(
0
);
// first idIndex = 0
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
allIds
.
push_back
(
seqInfo
[
i
].
seqId
);
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
}
void
RecurrentGradientMachine
::
createInFrameInfo_seq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
numSeqs_
[
i
];
++
j
)
{
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
CHECK_EQ
(
inlinkInfo
->
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
void
RecurrentGradientMachine
::
createInFrameInfo_subseq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
std
::
vector
<
int
>
sequenceStartPositions
;
const
int
*
subSequenceStartPositions
=
nullptr
;
if
(
hasSubseq
)
{
// for sequenceScatterAgentLayer
subSequenceStartPositions
=
input
.
subSequenceStartPositions
->
getData
(
false
);
inlinkInfo
->
seqStartPosIndex
.
clear
();
inlinkInfo
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
}
// maxSequenceLength_: max topLevelLength in allsamples
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
if
(
hasSubseq
)
{
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
}
int
numSeqs
=
0
;
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
if
(
i
>=
seqLength
)
{
break
;
}
++
numSeqs
;
if
(
hasSubseq
)
{
for
(
int
j
=
0
;
j
<
numSeqs_
[
i
];
++
j
)
{
int
subSeqStart
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
];
int
subSeqEnd
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
];
int
subSeqEnd
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
subSeqEnd
-
subSeqStart
);
}
else
{
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
if
(
hasSubseq
)
{
inlinkInfo
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
}
}
if
(
hasSubseq
)
{
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
...
...
@@ -703,7 +854,6 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
CHECK_EQ
(
inlinkInfo
->
seqStartPosIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
createSeqPos
(
sequenceStartPositions
,
&
inlinkInfo
->
sequenceStartPositions
);
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
...
...
@@ -717,11 +867,11 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const
Argument
&
input
=
(
*
memoryFrameLine
).
rootLayer
->
getOutput
();
size_t
numSequences
=
input
.
getNumSequences
();
std
::
vector
<
int
>
allIds
;
bool
seqFlag
=
(
*
memoryFrameLine
).
is_sequence
;
bool
seqFlag
=
input
.
hasSeq
();
CHECK
(
!
input
.
hasSubseq
())
<<
"Subsequence boot layer for memory is not supported"
;
if
(
seqFlag
)
{
// for sequenceScatterAgentLayer
CHECK
(
input
.
sequenceStartPositions
)
<<
"boot layer must be a sequence when is_sequence = true"
;
std
::
vector
<
int
>
sequenceStartPositions
;
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
...
...
@@ -804,8 +954,7 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
if
(
!
memoryFrameLine
.
rootLayer
)
continue
;
Argument
&
bootArg
=
memoryFrameLine
.
rootLayer
->
getOutput
();
size_t
batchSize
=
memoryFrameLine
.
is_sequence
?
bootArg
.
getNumSequences
()
:
bootArg
.
getBatchSize
();
size_t
batchSize
=
bootArg
.
getNumSequences
();
if
(
numSequences
)
{
CHECK_EQ
(
numSequences
,
batchSize
);
}
else
{
...
...
@@ -845,12 +994,7 @@ void RecurrentGradientMachine::generateSequence() {
if
(
memoryFrameLine
.
rootAgent
)
{
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
rootAgent
.
get
());
bool
seqFlag
=
memoryFrameLine
.
is_sequence
;
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
rootLayer
,
ids
,
seqFlag
);
if
(
seqFlag
)
{
CHECK
(
memoryFrameLine
.
rootLayer
->
getOutput
().
sequenceStartPositions
)
<<
"boot layer must be a sequence when is_sequence = true"
;
}
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
rootLayer
,
ids
);
}
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
0
],
memoryFrameLine
.
bootLayer
,
ids
.
size
());
...
...
@@ -858,6 +1002,7 @@ void RecurrentGradientMachine::generateSequence() {
// boot layer forward
AsyncGpuBlock
asyncGpuBlock
;
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
memoryFrameLine
.
bootLayer
->
forward
(
PASS_TEST
);
}
...
...
@@ -930,8 +1075,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
scatterIds
,
memoryFrameLine
.
is_sequence
);
scatterIds
);
scatterAgent
->
forward
(
PASS_TEST
);
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
memoryFrameLine
.
scatterAgents
[
machineCur
]);
...
...
@@ -1003,8 +1147,7 @@ void RecurrentGradientMachine::connectPrevFrame(int stepId,
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
isOutIds
?
topIds_
:
machineIds_
,
memoryFrameLine
.
is_sequence
);
isOutIds
?
topIds_
:
machineIds_
);
scatterAgent
->
forward
(
PASS_TEST
);
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
memoryFrameLine
.
scatterAgents
[
machineCur
]);
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
17994e38
...
...
@@ -284,6 +284,16 @@ public:
}
protected:
std
::
vector
<
Argument
::
SeqInfo
>
commonSeqInfo_
;
ICpuGpuVectorPtr
sequenceStartPositions_
;
void
calcSequenceStartPositions
();
void
checkInputConsistency
(
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
);
void
reorganizeInput
(
PassType
passType
);
void
reorganizeOutput
(
PassType
passType
);
void
connectFrames
(
PassType
passType
);
void
calcNumSequencesAtEachStep
();
void
resizeOrCreateFrames
(
int
numFrames
);
void
resizeBootFrame
(
int
numSequences
);
...
...
@@ -295,7 +305,6 @@ protected:
std
::
string
linkName
;
LayerPtr
inLayer
;
std
::
vector
<
LayerPtr
>
agents
;
// Scatter Agents to reform batch input
bool
hasSubseq
;
Argument
outArg
;
// scatter output argument
};
std
::
vector
<
InFrameLine
>
inFrameLines_
;
...
...
@@ -318,7 +327,6 @@ protected:
std
::
vector
<
LayerPtr
>
agents
;
std
::
vector
<
LayerPtr
>
scatterAgents
;
// scatter agent used by beam search
Argument
outArg
;
// scatter output argument
bool
is_sequence
;
// Different memoryFrameLine have different element as follows
IVectorPtr
allIds
;
// scattered id of realLayer
ICpuGpuVectorPtr
...
...
@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct
Info
{
IVectorPtr
allIds
;
// scattered id of realLayer
std
::
vector
<
int
>
idIndex
;
// index of allIds
// The original positions in the original batch
IVectorPtr
allIds
;
// scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std
::
vector
<
int
>
idIndex
;
ICpuGpuVectorPtr
sequenceStartPositions
;
// scattered sequenceStartPositions
std
::
vector
<
int
>
seqStartPosIndex
;
// index of sequenceStartPositions
};
std
::
vector
<
Info
>
info_
;
std
::
vector
<
Info
>
info_
;
// for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std
::
vector
<
int
>
numSeqs_
;
std
::
vector
<
std
::
vector
<
Argument
::
SeqInfo
>>
seqInfos_
;
// the id of inlink which share info with outlinks
int
targetInfoInlinkId_
;
void
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
...
...
@@ -354,6 +367,28 @@ protected:
void
createInFrameInfo
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_nonseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_seq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_subseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createMemoryFrameInfo
(
MemoryFrameLine
*
memoryFrameLine
,
PassType
passType
);
...
...
@@ -386,9 +421,7 @@ protected:
NeuralNetwork
*
rootNetwork_
;
bool
reversed_
;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int
maxSequenceLength_
;
int
maxSequenceLength_
;
// Max top-level length
bool
useGpu_
;
bool
stopBeamSearch_
;
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
17994e38
...
...
@@ -35,36 +35,15 @@ bool AgentLayer::init(const LayerMap& layerMap,
void
AgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
realHeight
=
realOutput
.
getBatchSize
();
CHECK_LE
(
numSamples_
,
realHeight
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realHeight
)
{
if
(
realOutput
.
ids
)
{
output_
.
ids
=
IVector
::
create
(
realOutput
.
ids
->
getData
(),
numSamples_
,
useGpu_
);
}
else
{
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numSamples_
,
getSize
(),
useGpu_
);
}
}
else
{
output_
=
realOutput
;
}
}
void
SequenceAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
realNumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
realNumSequences
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
if
(
realOutput
.
hasSeq
())
{
int
numRows
=
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
CHECK
(
!
realOutput
.
ids
)
<<
"Not supported"
;
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
...
...
@@ -74,13 +53,15 @@ void SequenceAgentLayer::forward(PassType passType) {
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numSamples_
,
getSize
(),
useGpu_
);
}
}
else
{
output_
=
realOutput
;
}
}
REGISTER_LAYER
(
sequence_agent
,
SequenceAgentLayer
);
bool
GatherAgentLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK_EQ
(
config_
.
inputs_size
(),
0
);
...
...
@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return
true
;
}
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
const
Argument
&
input
,
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
ids
,
const
std
::
vector
<
int
>&
idIndex
)
{
output_
.
sequenceStartPositions
=
input
.
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
input
.
subSequenceStartPositions
;
realLayers_
.
clear
();
output_
.
sequenceStartPositions
=
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
subSequenceStartPositions
;
allIds_
=
ids
;
idIndex_
=
idIndex
;
}
void
GatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
forwardIds
(
passType
);
forwardValue
(
passType
);
}
void
GatherAgentLayer
::
forwardValue
(
PassType
passType
)
{
MatrixPtr
valueReal
=
realLayers_
[
0
]
->
getOutputValue
();
if
(
!
valueReal
)
return
;
int
height
=
allIds_
->
getSize
();
int
width
=
this
->
getSize
();
...
...
@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
if
(
realOutArg_
.
hasSeq
())
{
forwardSequence
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
...
...
@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if
(
realGrad
)
{
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if
(
idIndex_
==
0
)
{
if
(
handleBackward_
)
{
outputGrad
->
addToRows
(
*
realGrad
,
*
ids_
);
}
}
...
...
@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
SequenceGatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
int
height
=
0
;
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
idReal
)
{
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
...
...
@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
else
{
// Gather output.value, same as GatherAgentLayer
CHECK
(
output_
.
subSequenceStartPositions
);
GatherAgentLayer
::
forward
(
passType
);
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
void
S
equenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
void
S
catterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
...
@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
}
else
{
// Putting the generation logic here is really an ugly hack!
// used in generation
int
height
=
0
;
size_t
numSequences
=
ids_
->
getSize
();
...
...
@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER
(
sequence_gather_agent
,
SequenceGatherAgentLayer
);
REGISTER_LAYER
(
sequence_scatter_agent
,
SequenceScatterAgentLayer
);
}
// namespace paddle
paddle/gserver/layers/AgentLayer.h
浏览文件 @
17994e38
...
...
@@ -49,18 +49,6 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class
SequenceAgentLayer
:
public
AgentLayer
{
public:
explicit
SequenceAgentLayer
(
const
LayerConfig
&
config
)
:
AgentLayer
(
config
)
{}
~
SequenceAgentLayer
()
{}
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
...
...
@@ -83,7 +71,10 @@ public:
const
ParameterMap
&
parameterMap
)
override
;
// call before addRealLayer
void
copyIdAndSequenceInfo
(
const
Argument
&
input
,
void
clearRealLayers
()
{
realLayers_
.
clear
();
}
void
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
allIds
,
const
std
::
vector
<
int
>&
idIndex
);
...
...
@@ -92,24 +83,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class
SequenceGatherAgentLayer
:
public
GatherAgentLayer
{
public:
explicit
SequenceGatherAgentLayer
(
const
LayerConfig
&
config
)
:
GatherAgentLayer
(
config
)
{}
virtual
~
SequenceGatherAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
// same as GatherAgentLayer
GatherAgentLayer
::
backward
(
callback
);
}
void
forwardValue
(
PassType
passType
);
void
forwardIds
(
PassType
passType
);
};
/**
...
...
@@ -129,6 +104,11 @@ protected:
int
idSize_
;
int
seqStartPosIndex_
;
int
numSequences_
;
// number of sequences in this scatterAgentLayer
bool
handleBackward_
;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
...
@@ -147,13 +127,10 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
,
bool
copyId
=
false
)
{
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
)
{
realLayer_
=
layer
;
IVector
::
resizeOrCreate
(
ids_
,
ids
.
size
(),
useGpu_
);
ids_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
if
(
copyId
)
{
if
(
useGpu_
)
{
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
...
...
@@ -161,7 +138,6 @@ public:
cpuIds_
=
ids_
;
}
}
}
// set real layer and output, [idIndex, idIndex + idSize) of *ids*
// are selected row for realOutArg in realLayer
...
...
@@ -169,12 +145,14 @@ public:
const
Argument
&
outArg
,
const
IVectorPtr
&
ids
,
int
idIndex
,
int
idSize
)
{
int
idSize
,
bool
handleBackward
)
{
realLayer_
=
layer
;
realOutArg_
=
outArg
;
ids_
=
ids
;
idIndex_
=
idIndex
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
...
@@ -187,28 +165,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class
SequenceScatterAgentLayer
:
public
ScatterAgentLayer
{
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
SequenceScatterAgentLayer
(
const
LayerConfig
&
config
)
:
ScatterAgentLayer
(
config
)
{}
virtual
~
SequenceScatterAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
ScatterAgentLayer
::
backward
(
callback
);
}
void
forwardSequence
(
PassType
passType
);
};
}
// namespace paddle
paddle/gserver/layers/SequencePoolLayer.cpp
浏览文件 @
17994e38
...
...
@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
const
Argument
&
input
=
getInput
(
0
);
CHECK
(
input
.
hasSeq
()
||
input
.
hasSubseq
())
<<
"Input should be a sequence or subsequence for layer "
<<
getName
();
newBatchSize_
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
size_t
dim
=
getSize
();
// check
...
...
paddle/gserver/tests/rnn_data_provider.py
浏览文件 @
17994e38
...
...
@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
0
])
words2
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
1
])
yield
words1
,
words2
,
d
[
2
]
###########################################################
data3
=
[
[[[
1
,
2
],
[
4
,
5
,
2
]],
[
1
,
2
],
0
],
[[[
0
,
2
],
[
2
,
5
],
[
0
,
1
,
2
]],
[
2
,
3
,
0
],
1
],
]
# Used for sequence_nest_mixed_inputs.conf
@
provider
(
input_types
=
[
integer_value_sub_sequence
(
10
),
integer_value_sequence
(
10
),
integer_value
(
2
)
],
should_shuffle
=
False
)
def
process_mixed
(
settings
,
file_name
):
for
d
in
data3
:
yield
d
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
浏览文件 @
17994e38
...
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq
2
'
)
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
paddle/gserver/tests/sequence_rnn_matched_inputs.py
0 → 100644
浏览文件 @
17994e38
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
subseq
,
seq
,
nonseq
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
seq
,
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
subseq
,
expand_layer
(
seq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
nonseq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_NO_SEQUENCE
),
StaticInput
(
encoding
)
])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
0 → 100644
浏览文件 @
17994e38
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
data1
,
data2
,
label
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
print_layer
(
input
=
[
data1
,
seq
,
label
,
inner_mem
])
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
seq
,
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
data1
,
data2
,
label
,
StaticInput
(
encoding
)])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_multi_input.conf
浏览文件 @
17994e38
...
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq
2
'
)
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
17994e38
...
...
@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
TEST
(
RecurrentGradientMachine
,
rnn_mixed_input
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn_mixed_inputs.py"
,
"gserver/tests/sequence_rnn_matched_inputs.py"
,
1e-6
,
useGpu
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
...
...
paddle/math/Vector.cpp
浏览文件 @
17994e38
...
...
@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
setSync(useGpu); \
if (useGpu) { \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
} else { \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
} \
} while (0)
...
...
@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case
DATA_AT_GPU
:
CHECK
(
gpuVectorT_
);
this
->
resizeOrCreate
(
gpuVectorT_
->
getSize
(),
false
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
,
HPPL_STREAM_DEFAULT
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
);
setSync
(
SYNCED
);
break
;
case
DATA_AT_CPU
:
...
...
@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case
DATA_AT_CPU
:
CHECK
(
cpuVectorT_
);
this
->
resizeOrCreate
(
cpuVectorT_
->
getSize
(),
true
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
,
HPPL_STREAM_DEFAULT
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
);
setSync
(
SYNCED
);
break
;
case
DATA_AT_GPU
:
...
...
paddle/parameter/Argument.h
浏览文件 @
17994e38
...
...
@@ -149,6 +149,7 @@ struct Argument {
:
getBatchSize
();
}
bool
hasSeq
()
const
{
return
sequenceStartPositions
!=
nullptr
;
}
bool
hasSubseq
()
const
{
return
subSequenceStartPositions
!=
nullptr
;
}
const
int
*
getCpuStartPositions
()
const
{
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
17994e38
...
...
@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool
beam_search
)
{
FLAGS_config_args
=
beam_search
?
"beam_search=1"
:
"beam_search=0"
;
for
(
auto
useGpu
:
useGpuConfs
)
{
LOG
(
INFO
)
<<
configFile
<<
" useGpu="
<<
useGpu
<<
" beam_search="
<<
beam_search
;
testGeneration
(
configFile
,
useGpu
,
hasSubseq
,
expRetFile
);
}
};
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
17994e38
...
...
@@ -333,48 +333,32 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
for
linkid
,
link
in
enumerate
(
in_links
):
if
isinstance
(
link
,
basestring
):
name
=
link
has_subseq
=
False
else
:
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
# assign target_inlinkid according to target_inlinkname
if
target_inlinkname
==
name
:
g_current_submodel
.
target_inlinkid
=
linkid
if
in_links_count
==
0
:
in_links_has_subseq
=
has_subseq
else
:
config_assert
(
in_links_has_subseq
==
has_subseq
,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count
+=
1
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer
=
g_layer_map
[
layer_name
]
if
has_subseq
:
SequenceScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
else
:
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
pair
=
g_current_submodel
.
in_links
.
add
()
pair
.
layer_name
=
layer_name
pair
.
link_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
has_subseq
=
has_subseq
@
config_func
def
RecurrentLayerGroupSetOutLink
(
link
):
if
isinstance
(
link
,
basestring
):
name
=
link
has_subseq
=
False
else
:
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
pair
=
g_current_submodel
.
out_links
.
add
()
pair
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
link_name
=
layer_name
pair
.
has_subseq
=
has_subseq
def
RecurrentLayerGroupSetGenerator
(
generator
=
None
):
...
...
@@ -425,8 +409,6 @@ def RecurrentLayerGroupEnd(name):
agent_name
=
GetLayerBaseName
(
pair
.
link_name
)
if
prev_submodel
.
HasField
(
"generator"
):
DataLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
elif
pair
.
has_subseq
:
SequenceGatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
else
:
GatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
...
...
@@ -2253,13 +2235,6 @@ class AgentLayer(LayerBase):
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_agent'
)
class
SequenceAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceAgentLayer
,
self
).
__init__
(
name
,
'sequence_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'gather_agent'
)
class
GatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
...
...
@@ -2274,20 +2249,6 @@ class ScatterAgentLayer(LayerBase):
name
,
'scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_gather_agent'
)
class
SequenceGatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceGatherAgentLayer
,
self
).
__init__
(
name
,
'sequence_gather_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_scatter_agent'
)
class
SequenceScatterAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceScatterAgentLayer
,
self
).
__init__
(
name
,
'sequence_scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'multiplex'
)
class
MultiplexLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
...
...
@@ -2303,12 +2264,12 @@ class MultiplexLayer(LayerBase):
@
config_func
def
Link
(
name
,
has_subseq
=
False
,
):
def
Link
(
name
,
has_subseq
=
False
):
"""
Still keeping has_subseq for backward compatibility
"""
link_config
=
LinkConfig
()
link_config
.
link_name
=
name
link_config
.
has_subseq
=
has_subseq
return
link_config
...
...
@@ -2341,12 +2302,6 @@ def Memory(name,
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
memory_name
=
name
+
"+delay1"
agent_name
=
memory_name
if
is_sequence
:
config_assert
(
boot_layer
is
not
None
,
"there must be boot_layer in network when is_sequence = True"
)
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
else
:
agent_layer
=
AgentLayer
(
agent_name
,
size
)
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
'Memory should be used in recurrent layer group only'
)
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
17994e38
...
...
@@ -3329,8 +3329,9 @@ class StaticInput(object):
input
.
size
=
size
class
SubsequenceInput
(
objec
t
):
def
SubsequenceInput
(
inpu
t
):
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
...
...
@@ -3339,11 +3340,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
"""
def
__init__
(
self
,
input
):
assert
isinstance
(
input
,
LayerOutput
)
assert
input
.
size
is
not
None
self
.
input
=
input
return
input
@
wrap_name_default
(
"recurrent_group"
)
...
...
@@ -3407,7 +3404,8 @@ def recurrent_group(step,
input sequence in a reverse order.
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
...
...
@@ -3429,46 +3427,21 @@ def recurrent_group(step,
model_type
(
'recurrent_nn'
)
def
is_single_input
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
\
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
if
is_single_input
(
input
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
def
is_in_links
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
in_links
=
filter
(
is_in_links
,
input
)
def
targetInlink_in_inlinks
():
for
inlink
in
in_links
:
if
isinstance
(
inlink
,
SubsequenceInput
):
if
targetInlink
==
inlink
.
input
:
return
True
elif
targetInlink
==
inlink
:
return
True
return
False
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
targetInlinkName
=
None
if
targetInlink
==
None
\
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
contains_sub_seq
=
[
False
]
def
map_in_links
(
x
):
if
isinstance
(
x
,
SubsequenceInput
):
contains_sub_seq
[
0
]
=
True
return
Link
(
name
=
x
.
input
.
name
,
has_subseq
=
True
)
else
:
return
x
.
name
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
name
,
in_links
=
map
(
map_in_links
,
in_links
),
seq_reversed
=
reverse
,
target_inlinkname
=
targetInlinkName
)
in_links
=
map
(
lambda
x
:
x
.
name
,
in_links
),
seq_reversed
=
reverse
)
in_args
=
[]
has_LayerOutput
=
False
for
each_input
in
input
:
...
...
@@ -3476,10 +3449,7 @@ def recurrent_group(step,
if
isinstance
(
each_input
,
LayerOutput
):
in_args
.
append
(
each_input
)
has_LayerOutput
=
True
elif
isinstance
(
each_input
,
SubsequenceInput
):
in_args
.
append
(
each_input
.
input
)
has_LayerOutput
=
True
else
:
else
:
# StaticInput
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
...
...
@@ -3503,9 +3473,6 @@ def recurrent_group(step,
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
if
contains_sub_seq
[
0
]:
RecurrentLayerGroupSetOutLink
(
Link
(
ot
.
name
,
has_subseq
=
True
))
else
:
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
浏览文件 @
17994e38
...
...
@@ -261,12 +261,10 @@ sub_models {
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
@@ -285,12 +283,10 @@ sub_models {
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
浏览文件 @
17994e38
...
...
@@ -351,12 +351,10 @@ sub_models {
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
@@ -383,12 +381,10 @@ sub_models {
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
17994e38
...
...
@@ -155,7 +155,7 @@ layers {
}
layers {
name: "sub_seq_input@__recurrent_group_2__"
type: "s
equence_s
catter_agent"
type: "scatter_agent"
size: 100
active_type: ""
}
...
...
@@ -182,7 +182,7 @@ layers {
}
layers {
name: "rnn_subseq_forward"
type: "
sequence_
gather_agent"
type: "gather_agent"
size: 200
active_type: ""
}
...
...
@@ -623,12 +623,10 @@ sub_models {
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
has_subseq: false
}
target_inlinkid: -1
}
...
...
@@ -647,12 +645,10 @@ sub_models {
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
has_subseq: false
}
target_inlinkid: -1
}
...
...
@@ -671,12 +667,10 @@ sub_models {
in_links {
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
has_subseq: true
}
target_inlinkid: -1
}
...
...
@@ -703,12 +697,10 @@ sub_models {
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
@@ -727,12 +719,10 @@ sub_models {
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
@@ -751,12 +741,10 @@ sub_models {
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录