Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
09f34c4b
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
09f34c4b
编写于
6月 19, 2017
作者:
E
emailweixu
提交者:
GitHub
6月 19, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2479 from emailweixu/mixed_input_rnn
RecurrentGroup with mixed input sequence types
上级
17fe8322
ef61288f
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
662 addition
and
484 deletion
+662
-484
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+320
-177
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+44
-11
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+42
-53
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+21
-63
paddle/gserver/layers/SequencePoolLayer.cpp
paddle/gserver/layers/SequencePoolLayer.cpp
+3
-0
paddle/gserver/tests/rnn_data_provider.py
paddle/gserver/tests/rnn_data_provider.py
+19
-0
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+2
-2
paddle/gserver/tests/sequence_rnn_matched_inputs.py
paddle/gserver/tests/sequence_rnn_matched_inputs.py
+85
-0
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
+79
-0
paddle/gserver/tests/sequence_rnn_multi_input.conf
paddle/gserver/tests/sequence_rnn_multi_input.conf
+1
-1
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+9
-0
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+4
-3
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+1
-0
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+2
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+7
-58
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+20
-70
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
...config_helpers/tests/configs/protostr/shared_gru.protostr
+0
-8
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
...onfig_helpers/tests/configs/protostr/shared_lstm.protostr
+0
-10
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+2
-27
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+1
-1
未找到文件。
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
09f34c4b
...
@@ -214,7 +214,6 @@ void RecurrentGradientMachine::init(
...
@@ -214,7 +214,6 @@ void RecurrentGradientMachine::init(
inFrameLines_
[
i
].
linkName
=
subModelConfig
->
in_links
(
i
).
link_name
();
inFrameLines_
[
i
].
linkName
=
subModelConfig
->
in_links
(
i
).
link_name
();
inFrameLines_
[
i
].
inLayer
=
inFrameLines_
[
i
].
inLayer
=
rootNetwork_
->
getLayer
(
subModelConfig
->
in_links
(
i
).
layer_name
());
rootNetwork_
->
getLayer
(
subModelConfig
->
in_links
(
i
).
layer_name
());
inFrameLines_
[
i
].
hasSubseq
=
subModelConfig
->
in_links
(
i
).
has_subseq
();
}
}
outFrameLines_
.
resize
(
subModelConfig
->
out_links_size
());
outFrameLines_
.
resize
(
subModelConfig
->
out_links_size
());
...
@@ -241,11 +240,8 @@ void RecurrentGradientMachine::init(
...
@@ -241,11 +240,8 @@ void RecurrentGradientMachine::init(
rootNetwork_
->
getLayer
(
memoryConfig
.
boot_layer_name
());
rootNetwork_
->
getLayer
(
memoryConfig
.
boot_layer_name
());
LayerConfig
scatterConfig
=
*
agentConfig
;
LayerConfig
scatterConfig
=
*
agentConfig
;
memoryFrameLines_
[
i
].
is_sequence
=
memoryConfig
.
is_sequence
();
memoryFrameLines_
[
i
].
rootAgent
.
reset
(
memoryFrameLines_
[
i
].
rootAgent
.
reset
(
memoryConfig
.
is_sequence
()
new
ScatterAgentLayer
(
scatterConfig
));
?
new
SequenceScatterAgentLayer
(
scatterConfig
)
:
new
ScatterAgentLayer
(
scatterConfig
));
memoryFrameLines_
[
i
].
rootAgent
->
init
(
LayerMap
(),
parameterMap_
);
memoryFrameLines_
[
i
].
rootAgent
->
init
(
LayerMap
(),
parameterMap_
);
memoryFrameLines_
[
i
].
bootLayer
=
memoryFrameLines_
[
i
].
rootAgent
;
memoryFrameLines_
[
i
].
bootLayer
=
memoryFrameLines_
[
i
].
rootAgent
;
...
@@ -267,9 +263,7 @@ void RecurrentGradientMachine::init(
...
@@ -267,9 +263,7 @@ void RecurrentGradientMachine::init(
if
(
subModelConfig
->
has_generator
())
{
if
(
subModelConfig
->
has_generator
())
{
memoryFrameLines_
[
i
].
scatterAgents
.
resize
(
2
);
memoryFrameLines_
[
i
].
scatterAgents
.
resize
(
2
);
for
(
auto
&
agent
:
memoryFrameLines_
[
i
].
scatterAgents
)
{
for
(
auto
&
agent
:
memoryFrameLines_
[
i
].
scatterAgents
)
{
agent
.
reset
(
memoryConfig
.
is_sequence
()
agent
.
reset
(
new
ScatterAgentLayer
(
*
agentConfig
));
?
new
SequenceScatterAgentLayer
(
*
agentConfig
)
:
new
ScatterAgentLayer
(
*
agentConfig
));
agent
->
init
(
LayerMap
(),
parameterMap_
);
agent
->
init
(
LayerMap
(),
parameterMap_
);
}
}
}
}
...
@@ -297,8 +291,6 @@ void RecurrentGradientMachine::init(
...
@@ -297,8 +291,6 @@ void RecurrentGradientMachine::init(
if
(
subModelConfig
->
evaluator_names_size
()
>
0
)
{
if
(
subModelConfig
->
evaluator_names_size
()
>
0
)
{
evaluator_
.
reset
(
frames_
[
0
]
->
makeEvaluator
());
evaluator_
.
reset
(
frames_
[
0
]
->
makeEvaluator
());
}
}
targetInfoInlinkId_
=
subModelConfig
->
target_inlinkid
();
}
}
void
RecurrentGradientMachine
::
resizeOrCreateFrames
(
int
numFrames
)
{
void
RecurrentGradientMachine
::
resizeOrCreateFrames
(
int
numFrames
)
{
...
@@ -376,108 +368,102 @@ void RecurrentGradientMachine::prefetch(const std::vector<Argument>& inArgs) {
...
@@ -376,108 +368,102 @@ void RecurrentGradientMachine::prefetch(const std::vector<Argument>& inArgs) {
LOG
(
FATAL
)
<<
"should not use this function"
;
LOG
(
FATAL
)
<<
"should not use this function"
;
}
}
void
RecurrentGradientMachine
::
forward
(
const
std
::
vector
<
Argument
>&
inArgs
,
void
RecurrentGradientMachine
::
checkInputConsistency
(
std
::
vector
<
Argument
>*
outArgs
,
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
)
{
PassType
passType
)
{
if
(
commonSeqInfo_
.
empty
())
{
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
commonSeqInfo_
.
resize
(
seqInfo
.
size
());
generateSequence
();
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
return
;
commonSeqInfo_
[
i
].
topLevelLength
=
seqInfo
[
i
].
topLevelLength
;
}
// else forward..
commonSeqInfo_
[
i
].
seqId
=
seqInfo
[
i
].
seqId
;
}
const
Argument
&
input
=
inFrameLines_
[
0
].
inLayer
->
getOutput
();
}
else
{
CHECK
(
input
.
sequenceStartPositions
);
CHECK_EQ
(
commonSeqInfo_
.
size
(),
seqInfo
.
size
())
int
batchSize
=
input
.
getBatchSize
();
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
size_t
numSequences
=
input
.
getNumSequences
();
<<
" has mismatched number of sequences"
;
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
bool
hasSubseq
=
input
.
hasSubseq
();
CHECK_EQ
(
commonSeqInfo_
[
i
].
topLevelLength
,
seqInfo
[
i
].
topLevelLength
)
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
// In case of !hasSubseq or targetInfoInlinkId_ == -1, all inlinks share the
<<
" has mismatched sequence length"
;
// same inframe info
CHECK_EQ
(
commonSeqInfo_
[
i
].
seqId
,
seqInfo
[
i
].
seqId
)
bool
shareInlinkInfo
=
!
hasSubseq
||
targetInfoInlinkId_
==
-
1
;
<<
" RecurrentGroup "
<<
subModelName_
<<
" input "
<<
inlinkId
<<
" has mismatched sequence length"
;
// Defaultly, share info with the first inlink
if
(
shareInlinkInfo
)
{
targetInfoInlinkId_
=
0
;
}
// check hasSubseq in both config and input are the same
CHECK_EQ
(
hasSubseq
,
inFrameLines_
[
0
].
hasSubseq
);
CHECK_EQ
(
starts
[
numSequences
],
batchSize
);
CHECK
(
input
.
sequenceStartPositions
);
// check other inputs has same sequence length and start
for
(
size_t
i
=
1
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
CHECK_EQ
((
size_t
)
input1
.
getNumSequences
(),
numSequences
);
// check all inputs should have same hasSubseq flag
CHECK_EQ
(
input
.
hasSubseq
(),
inFrameLines_
[
0
].
hasSubseq
);
// if shareInlinkInfo, checks:
// 1. all inlinks have same number of total tokens
// 2. all inlinks have same number of tokens for each sentence of each
// sample. If hasSubseq, one sample has multiple sentence, else, one
// sample is one sentence
if
(
shareInlinkInfo
)
{
CHECK_EQ
(
input1
.
getBatchSize
(),
batchSize
);
CHECK
(
std
::
equal
(
starts
,
starts
+
numSequences
+
1
,
input1
.
sequenceStartPositions
->
getData
(
false
)));
}
}
}
}
}
if
(
hasSubseq
)
{
void
RecurrentGradientMachine
::
calcNumSequencesAtEachStep
()
{
CHECK
(
input
.
subSequenceStartPositions
);
int
numSequences
=
commonSeqInfo_
.
size
();
size_t
numSubSequences
=
input
.
getNumSubSequences
();
numSeqs_
.
resize
(
maxSequenceLength_
);
const
int
*
subStarts
=
input
.
subSequenceStartPositions
->
getData
(
false
);
for
(
int
i
=
0
;
i
<
numSequences
;
++
i
)
{
CHECK_EQ
(
subStarts
[
numSubSequences
],
batchSize
);
for
(
int
j
=
0
;
j
<
commonSeqInfo_
[
i
].
topLevelLength
;
++
j
)
{
// if hasSubseq, check other inputs has same sub-sequence and sub-start
numSeqs_
[
j
]
=
i
+
1
;
for
(
size_t
i
=
1
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
CHECK_EQ
((
size_t
)
input1
.
getNumSubSequences
(),
numSubSequences
);
if
(
shareInlinkInfo
)
{
CHECK
(
std
::
equal
(
subStarts
,
subStarts
+
numSubSequences
+
1
,
input1
.
subSequenceStartPositions
->
getData
(
false
)));
}
}
}
}
}
}
void
RecurrentGradientMachine
::
reorganizeInput
(
PassType
passType
)
{
info_
.
clear
();
info_
.
clear
();
info_
.
resize
(
inFrameLines_
.
size
());
info_
.
resize
(
inFrameLines_
.
size
());
commonSeqInfo_
.
clear
();
seqInfos_
.
clear
();
seqInfos_
.
clear
();
seqInfos_
.
resize
(
inFrameLines_
.
size
());
seqInfos_
.
resize
(
inFrameLines_
.
size
());
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
const
Argument
&
input
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
if
(
!
input
.
hasSeq
())
{
continue
;
}
input
.
getSeqInfo
(
&
seqInfos_
[
i
]);
checkInputConsistency
(
i
,
seqInfos_
[
i
]);
}
CHECK
(
!
commonSeqInfo_
.
empty
())
<<
"At least one input needs to be sequence or subsequence"
;
maxSequenceLength_
=
commonSeqInfo_
[
0
].
topLevelLength
;
calcNumSequencesAtEachStep
();
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
const
Argument
&
input
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
if
(
!
input
.
hasSeq
())
{
seqInfos_
[
i
]
=
commonSeqInfo_
;
}
createInFrameInfo
(
i
,
input
,
passType
);
}
{
{
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
if
(
shareInlinkInfo
)
{
input
.
getSeqInfo
(
&
seqInfos_
[
0
]);
maxSequenceLength_
=
seqInfos_
[
0
][
0
].
topLevelLength
;
createInFrameInfo
(
0
,
input
,
passType
);
}
else
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
input1
.
getSeqInfo
(
&
seqInfos_
[
i
]);
maxSequenceLength_
=
seqInfos_
[
i
][
0
].
topLevelLength
;
createInFrameInfo
(
i
,
input1
,
passType
);
}
}
// inFrameLine select rows in real layer one time
// inFrameLine select rows in real layer one time
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
int
curInlinkId
=
shareInlinkInfo
?
0
:
i
;
selectRowsOneTime
(
inFrameLines_
[
i
].
inLayer
,
selectRowsOneTime
(
inFrameLines_
[
i
].
inLayer
,
info_
[
curInlinkId
].
allIds
,
info_
[
i
].
allIds
,
&
(
inFrameLines_
[
i
].
outArg
),
&
(
inFrameLines_
[
i
].
outArg
),
passType
);
passType
);
}
}
}
}
resizeOrCreateFrames
(
maxSequenceLength_
);
}
resizeBootFrame
(
numSequences
);
void
RecurrentGradientMachine
::
reorganizeOutput
(
PassType
passType
)
{
calcSequenceStartPositions
();
for
(
size_t
i
=
0
;
i
<
outFrameLines_
.
size
();
++
i
)
{
Info
info
;
auto
&
outFrameLine
=
outFrameLines_
[
i
];
ICpuGpuVectorPtr
sequenceStartPositions
;
ICpuGpuVectorPtr
subSequenceStartPositions
;
createOutFrameInfo
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
auto
gatherAgent
=
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
CHECK_NOTNULL
(
gatherAgent
);
gatherAgent
->
copyIdAndSequenceInfo
(
sequenceStartPositions
,
subSequenceStartPositions
,
info
.
allIds
,
info
.
idIndex
);
}
}
void
RecurrentGradientMachine
::
connectFrames
(
PassType
passType
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
if
(
memoryFrameLine
.
rootAgent
)
{
if
(
memoryFrameLine
.
rootAgent
)
{
auto
scatterAgent
=
auto
scatterAgent
=
...
@@ -487,8 +473,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -487,8 +473,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
memoryFrameLine
.
outArg
,
memoryFrameLine
.
outArg
,
memoryFrameLine
.
allIds
,
memoryFrameLine
.
allIds
,
/* idIndex */
0
,
/* idIndex */
0
,
memoryFrameLine
.
allIds
->
getSize
());
memoryFrameLine
.
allIds
->
getSize
(),
if
(
memoryFrameLine
.
is_sequence
)
{
// memoryConfig is sequence
/* handleBackward */
true
);
if
(
memoryFrameLine
.
sequenceStartPositions
)
{
int
size
=
memoryFrameLine
.
sequenceStartPositions
->
getSize
();
int
size
=
memoryFrameLine
.
sequenceStartPositions
->
getSize
();
scatterAgent
->
setSequenceStartPositions
(
scatterAgent
->
setSequenceStartPositions
(
memoryFrameLine
.
sequenceStartPositions
,
memoryFrameLine
.
sequenceStartPositions
,
...
@@ -501,28 +488,26 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -501,28 +488,26 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
auto
gatherAgent
=
auto
gatherAgent
=
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
CHECK_NOTNULL
(
gatherAgent
);
gatherAgent
->
clearRealLayers
();
gatherAgent
->
copyIdAndSequenceInfo
(
input
,
info_
[
targetInfoInlinkId_
].
allIds
,
info_
[
targetInfoInlinkId_
].
idIndex
);
}
}
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
int
idSize
=
0
;
// connect in_links
// connect in_links
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
inFrameLines_
.
size
();
++
j
)
{
Info
&
info
=
info_
[
shareInlinkInfo
?
0
:
j
];
Info
&
info
=
info_
[
j
];
// idSize denotes the sum number of tokens in each length i
// idSize denotes the sum number of tokens in each length i
idSize
=
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
int
idIndex
=
info
.
idIndex
.
empty
()
?
0
:
info
.
idIndex
[
i
];
int
idSize
=
info
.
idIndex
.
empty
()
?
numSeqs_
[
i
]
:
info
.
idIndex
[
i
+
1
]
-
info
.
idIndex
[
i
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
InFrameLine
inFrameLine
=
inFrameLines_
[
j
];
auto
scatterAgent
=
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
dynamic_cast
<
ScatterAgentLayer
*>
(
inFrameLine
.
agents
[
i
].
get
());
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
scatterAgent
->
setRealLayerAndOutput
(
inFrameLine
.
inLayer
,
inFrameLine
.
outArg
,
inFrameLine
.
outArg
,
info
.
allIds
,
info
.
allIds
,
info
.
idIndex
[
i
],
idIndex
,
idSize
);
idSize
,
if
(
hasSubseq
)
{
i
==
0
);
if
(
info
.
sequenceStartPositions
)
{
// size: the length of subsequence
// size: the length of subsequence
int
size
=
info
.
seqStartPosIndex
[
i
+
1
]
-
info
.
seqStartPosIndex
[
i
];
int
size
=
info
.
seqStartPosIndex
[
i
+
1
]
-
info
.
seqStartPosIndex
[
i
];
scatterAgent
->
setSequenceStartPositions
(
scatterAgent
->
setSequenceStartPositions
(
...
@@ -536,11 +521,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -536,11 +521,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
dynamic_cast
<
GatherAgentLayer
*>
(
outFrameLine
.
agentLayer
.
get
());
gatherAgent
->
addRealLayer
(
outFrameLine
.
frames
[
i
]);
gatherAgent
->
addRealLayer
(
outFrameLine
.
frames
[
i
]);
}
}
// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
// And inlinks that !hasSubSeq must have same inlink length.
idSize
=
info_
[
0
].
idIndex
[
i
+
1
]
-
info_
[
0
].
idIndex
[
i
];
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
NeuralNetwork
::
connect
(
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
i
],
memoryFrameLine
.
agents
[
i
],
...
@@ -548,6 +528,28 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -548,6 +528,28 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
numSeqs_
[
i
]
/*height of agent*/
);
numSeqs_
[
i
]
/*height of agent*/
);
}
}
}
}
}
void
RecurrentGradientMachine
::
forward
(
const
std
::
vector
<
Argument
>&
inArgs
,
std
::
vector
<
Argument
>*
outArgs
,
PassType
passType
)
{
/* inArgs and outArgs are not used.
The inputs are inFrameLines_[i].inLayer.
The outputs are outFramesLines_[i].agentLayer
*/
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
generateSequence
();
return
;
}
// else forward..
reorganizeInput
(
passType
);
int
numSequences
=
commonSeqInfo_
.
size
();
resizeOrCreateFrames
(
maxSequenceLength_
);
resizeBootFrame
(
numSequences
);
connectFrames
(
passType
);
REGISTER_TIMER_INFO
(
"RecurrentFwTime"
,
"RecurrentFwTime"
);
REGISTER_TIMER_INFO
(
"RecurrentFwTime"
,
"RecurrentFwTime"
);
// forward
// forward
...
@@ -558,16 +560,12 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -558,16 +560,12 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
const
std
::
vector
<
Argument
>
inArgs
;
const
std
::
vector
<
Argument
>
inArgs
;
std
::
vector
<
Argument
>
outArgs
;
std
::
vector
<
Argument
>
outArgs
;
frames_
[
i
]
->
forward
(
inArgs
,
&
outArgs
,
passType
);
frames_
[
i
]
->
forward
(
inArgs
,
&
outArgs
,
passType
);
if
(
hasSubseq
)
{
for
(
auto
&
outFrameLine
:
outFrameLines_
)
{
CHECK
(
outFrameLine
.
frames
[
i
]
->
getOutput
().
sequenceStartPositions
)
<<
"In hierachical RNN, all out links should be from sequences."
;
}
}
}
}
if
(
evaluator_
&&
passType
==
PASS_TEST
)
{
if
(
evaluator_
&&
passType
==
PASS_TEST
)
{
this
->
eval
(
evaluator_
.
get
());
this
->
eval
(
evaluator_
.
get
());
}
}
reorganizeOutput
(
passType
);
}
}
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
...
@@ -634,76 +632,228 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
...
@@ -634,76 +632,228 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
this
->
beamSearchStatistics_
=
nullptr
;
this
->
beamSearchStatistics_
=
nullptr
;
}
}
}
}
namespace
{
void
lenToStarts
(
std
::
vector
<
int
>&
starts
)
{
int
pos
=
0
;
starts
.
back
()
=
0
;
for
(
auto
&
start
:
starts
)
{
int
tmp
=
start
;
start
=
pos
;
pos
+=
tmp
;
}
starts
.
back
()
=
pos
;
}
}
void
RecurrentGradientMachine
::
calcSequenceStartPositions
()
{
std
::
vector
<
int
>
starts
(
commonSeqInfo_
.
size
()
+
1
);
for
(
auto
&
seqInfo
:
commonSeqInfo_
)
{
starts
[
seqInfo
.
seqId
]
=
seqInfo
.
topLevelLength
;
}
lenToStarts
(
starts
);
ICpuGpuVector
::
resizeOrCreate
(
sequenceStartPositions_
,
starts
.
size
(),
false
);
std
::
copy
(
starts
.
begin
(),
starts
.
end
(),
sequenceStartPositions_
->
getMutableData
(
false
));
}
void
RecurrentGradientMachine
::
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
)
{
bool
hasSeq
=
outFrameLine
.
frames
[
0
]
->
getOutput
().
hasSeq
();
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
CHECK_EQ
(
hasSeq
,
frame
->
getOutput
().
hasSeq
());
int
numSequences
=
frame
->
getOutput
().
getNumSequences
();
CHECK_EQ
(
numSeqs_
[
i
],
numSequences
);
}
}
void
RecurrentGradientMachine
::
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
checkOutputConsistency
(
outFrameLine
);
if
(
!
outFrameLine
.
frames
[
0
]
->
getOutput
().
hasSeq
())
{
createOutFrameInfo_seq
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
}
else
{
createOutFrameInfo_subseq
(
outFrameLine
,
info
,
sequenceStartPositions
,
subSequenceStartPositions
);
}
}
void
RecurrentGradientMachine
::
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
std
::
vector
<
int
>
allIds
;
info
.
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
const
int
*
starts
=
sequenceStartPositions_
->
getData
(
false
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
seqStart
=
starts
[
commonSeqInfo_
[
j
].
seqId
];
int
seqLength
=
commonSeqInfo_
[
j
].
topLevelLength
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
info
.
idIndex
.
push_back
(
allIds
.
size
());
}
sequenceStartPositions
=
sequenceStartPositions_
;
copyScattedId
(
allIds
,
&
info
.
allIds
,
allIds
.
size
());
CHECK_EQ
(
info
.
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
void
RecurrentGradientMachine
::
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
)
{
size_t
numSequences
=
commonSeqInfo_
.
size
();
std
::
vector
<
int
>
allIds
;
info
.
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
const
int
*
starts
=
sequenceStartPositions_
->
getData
(
false
);
std
::
vector
<
int
>
subStarts
(
starts
[
numSequences
]
+
1
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
const
int
*
seqStarts
=
frame
->
getOutput
().
sequenceStartPositions
->
getData
(
false
);
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
subStarts
[
starts
[
commonSeqInfo_
[
j
].
seqId
]
+
i
]
=
seqStarts
[
j
+
1
]
-
seqStarts
[
j
];
}
}
lenToStarts
(
subStarts
);
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LayerPtr
frame
=
outFrameLine
.
frames
[
i
];
size_t
numSequences
=
frame
->
getOutput
().
getNumSequences
();
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
pos
=
starts
[
commonSeqInfo_
[
j
].
seqId
]
+
i
;
int
subSeqStart
=
subStarts
[
pos
];
int
subSeqEnd
=
subStarts
[
pos
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
}
info
.
idIndex
.
push_back
(
allIds
.
size
());
}
ICpuGpuVector
::
resizeOrCreate
(
subSequenceStartPositions
,
subStarts
.
size
(),
false
);
int
*
cpuSubSequenceStartPositions
=
subSequenceStartPositions
->
getMutableData
(
false
);
std
::
copy
(
subStarts
.
begin
(),
subStarts
.
end
(),
cpuSubSequenceStartPositions
);
ICpuGpuVector
::
resizeOrCreate
(
sequenceStartPositions
,
numSequences
+
1
,
false
);
int
*
cpuSequenceStartPositions
=
sequenceStartPositions
->
getMutableData
(
false
);
for
(
size_t
i
=
0
;
i
<=
numSequences
;
++
i
)
{
cpuSequenceStartPositions
[
i
]
=
subStarts
[
starts
[
i
]];
}
copyScattedId
(
allIds
,
&
info
.
allIds
,
allIds
.
size
());
CHECK_EQ
(
info
.
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
/* create scattered id infomation for all realLayer of inFrameLines one time.
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* for all realLayer of inFrameLines one time.
* for all realLayer of inFrameLines one time.
*/
*/
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlinkId
,
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlinkId
,
const
Argument
&
input
,
const
Argument
&
input
,
PassType
passType
)
{
PassType
passType
)
{
bool
hasSubseq
=
input
.
hasSubseq
();
if
(
!
input
.
hasSeq
())
{
// numSequences: # samples(sequences) in a batch
createInFrameInfo_nonseq
(
inlinkId
,
input
,
passType
);
size_t
numSequences
=
input
.
getNumSequences
();
}
else
if
(
!
input
.
hasSubseq
())
{
createInFrameInfo_seq
(
inlinkId
,
input
,
passType
);
}
else
{
createInFrameInfo_subseq
(
inlinkId
,
input
,
passType
);
}
}
void
RecurrentGradientMachine
::
createInFrameInfo_nonseq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
numSeqs_
.
clear
();
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
clear
();
inlinkInfo
->
idIndex
.
clear
();
inlinkInfo
->
idIndex
.
push_back
(
0
);
// first idIndex = 0
for
(
size_t
i
=
0
;
i
<
seqInfo
.
size
();
++
i
)
{
allIds
.
push_back
(
seqInfo
[
i
].
seqId
);
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
}
void
RecurrentGradientMachine
::
createInFrameInfo_seq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
j
=
0
;
j
<
numSeqs_
[
i
];
++
j
)
{
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
}
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
CHECK_EQ
(
inlinkInfo
->
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
void
RecurrentGradientMachine
::
createInFrameInfo_subseq
(
int
inlinkId
,
const
Argument
&
input
,
PassType
passType
)
{
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlinkInfo
->
idIndex
.
resize
(
1
,
0
);
// first idIndex = 0
std
::
vector
<
int
>
sequenceStartPositions
;
std
::
vector
<
int
>
sequenceStartPositions
;
const
int
*
subSequenceStartPositions
=
nullptr
;
const
int
*
subSequenceStartPositions
=
nullptr
;
if
(
hasSubseq
)
{
// for sequenceScatterAgentLayer
subSequenceStartPositions
=
input
.
subSequenceStartPositions
->
getData
(
false
);
subSequenceStartPositions
=
input
.
subSequenceStartPositions
->
getData
(
false
);
inlinkInfo
->
seqStartPosIndex
.
clear
();
inlinkInfo
->
seqStartPosIndex
.
clear
();
inlinkInfo
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
inlinkInfo
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
}
// maxSequenceLength_: max topLevelLength in allsamples
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
if
(
hasSubseq
)
{
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
for
(
int
j
=
0
;
j
<
numSeqs_
[
i
];
++
j
)
{
}
int
subSeqStart
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
];
int
numSeqs
=
0
;
int
subSeqEnd
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
];
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
allIds
.
push_back
(
k
);
if
(
i
>=
seqLength
)
{
break
;
}
++
numSeqs
;
if
(
hasSubseq
)
{
int
subSeqStart
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
];
int
subSeqEnd
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
];
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
allIds
.
push_back
(
k
);
}
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
subSeqEnd
-
subSeqStart
);
}
else
{
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
}
}
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
subSeqEnd
-
subSeqStart
);
}
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
inlinkInfo
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
if
(
hasSubseq
)
{
inlinkInfo
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
}
}
if
(
hasSubseq
)
{
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
input
.
getNumSubSequences
()));
CHECK_EQ
(
inlinkInfo
->
seqStartPosIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
createSeqPos
(
sequenceStartPositions
,
&
inlinkInfo
->
sequenceStartPositions
);
}
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
input
.
getNumSubSequences
()));
CHECK_EQ
(
inlinkInfo
->
seqStartPosIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
createSeqPos
(
sequenceStartPositions
,
&
inlinkInfo
->
sequenceStartPositions
);
// copy and check scatterId
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
copyScattedId
(
allIds
,
&
inlinkInfo
->
allIds
,
input
.
getBatchSize
());
...
@@ -717,11 +867,11 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
...
@@ -717,11 +867,11 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const
Argument
&
input
=
(
*
memoryFrameLine
).
rootLayer
->
getOutput
();
const
Argument
&
input
=
(
*
memoryFrameLine
).
rootLayer
->
getOutput
();
size_t
numSequences
=
input
.
getNumSequences
();
size_t
numSequences
=
input
.
getNumSequences
();
std
::
vector
<
int
>
allIds
;
std
::
vector
<
int
>
allIds
;
bool
seqFlag
=
(
*
memoryFrameLine
).
is_sequence
;
bool
seqFlag
=
input
.
hasSeq
();
CHECK
(
!
input
.
hasSubseq
())
<<
"Subsequence boot layer for memory is not supported"
;
if
(
seqFlag
)
{
// for sequenceScatterAgentLayer
if
(
seqFlag
)
{
// for sequenceScatterAgentLayer
CHECK
(
input
.
sequenceStartPositions
)
<<
"boot layer must be a sequence when is_sequence = true"
;
std
::
vector
<
int
>
sequenceStartPositions
;
std
::
vector
<
int
>
sequenceStartPositions
;
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
...
@@ -804,8 +954,7 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
...
@@ -804,8 +954,7 @@ size_t RecurrentGradientMachine::getGenBatchSize() {
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
if
(
!
memoryFrameLine
.
rootLayer
)
continue
;
if
(
!
memoryFrameLine
.
rootLayer
)
continue
;
Argument
&
bootArg
=
memoryFrameLine
.
rootLayer
->
getOutput
();
Argument
&
bootArg
=
memoryFrameLine
.
rootLayer
->
getOutput
();
size_t
batchSize
=
memoryFrameLine
.
is_sequence
?
bootArg
.
getNumSequences
()
size_t
batchSize
=
bootArg
.
getNumSequences
();
:
bootArg
.
getBatchSize
();
if
(
numSequences
)
{
if
(
numSequences
)
{
CHECK_EQ
(
numSequences
,
batchSize
);
CHECK_EQ
(
numSequences
,
batchSize
);
}
else
{
}
else
{
...
@@ -845,12 +994,7 @@ void RecurrentGradientMachine::generateSequence() {
...
@@ -845,12 +994,7 @@ void RecurrentGradientMachine::generateSequence() {
if
(
memoryFrameLine
.
rootAgent
)
{
if
(
memoryFrameLine
.
rootAgent
)
{
auto
scatterAgent
=
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
rootAgent
.
get
());
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
rootAgent
.
get
());
bool
seqFlag
=
memoryFrameLine
.
is_sequence
;
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
rootLayer
,
ids
);
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
rootLayer
,
ids
,
seqFlag
);
if
(
seqFlag
)
{
CHECK
(
memoryFrameLine
.
rootLayer
->
getOutput
().
sequenceStartPositions
)
<<
"boot layer must be a sequence when is_sequence = true"
;
}
}
}
NeuralNetwork
::
connect
(
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
0
],
memoryFrameLine
.
bootLayer
,
ids
.
size
());
memoryFrameLine
.
agents
[
0
],
memoryFrameLine
.
bootLayer
,
ids
.
size
());
...
@@ -858,6 +1002,7 @@ void RecurrentGradientMachine::generateSequence() {
...
@@ -858,6 +1002,7 @@ void RecurrentGradientMachine::generateSequence() {
// boot layer forward
// boot layer forward
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
memoryFrameLine
.
bootLayer
->
forward
(
PASS_TEST
);
memoryFrameLine
.
bootLayer
->
forward
(
PASS_TEST
);
}
}
...
@@ -930,8 +1075,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
...
@@ -930,8 +1075,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
scatterIds
,
scatterIds
);
memoryFrameLine
.
is_sequence
);
scatterAgent
->
forward
(
PASS_TEST
);
scatterAgent
->
forward
(
PASS_TEST
);
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
memoryFrameLine
.
scatterAgents
[
machineCur
]);
memoryFrameLine
.
scatterAgents
[
machineCur
]);
...
@@ -1003,8 +1147,7 @@ void RecurrentGradientMachine::connectPrevFrame(int stepId,
...
@@ -1003,8 +1147,7 @@ void RecurrentGradientMachine::connectPrevFrame(int stepId,
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
auto
scatterAgent
=
dynamic_cast
<
ScatterAgentLayer
*>
(
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
memoryFrameLine
.
scatterAgents
[
machineCur
].
get
());
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
scatterAgent
->
setRealLayer
(
memoryFrameLine
.
frames
[
machinePrev
],
isOutIds
?
topIds_
:
machineIds_
,
isOutIds
?
topIds_
:
machineIds_
);
memoryFrameLine
.
is_sequence
);
scatterAgent
->
forward
(
PASS_TEST
);
scatterAgent
->
forward
(
PASS_TEST
);
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
NeuralNetwork
::
connect
(
memoryFrameLine
.
agents
[
machineCur
],
memoryFrameLine
.
scatterAgents
[
machineCur
]);
memoryFrameLine
.
scatterAgents
[
machineCur
]);
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
09f34c4b
...
@@ -284,6 +284,16 @@ public:
...
@@ -284,6 +284,16 @@ public:
}
}
protected:
protected:
std
::
vector
<
Argument
::
SeqInfo
>
commonSeqInfo_
;
ICpuGpuVectorPtr
sequenceStartPositions_
;
void
calcSequenceStartPositions
();
void
checkInputConsistency
(
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
);
void
reorganizeInput
(
PassType
passType
);
void
reorganizeOutput
(
PassType
passType
);
void
connectFrames
(
PassType
passType
);
void
calcNumSequencesAtEachStep
();
void
resizeOrCreateFrames
(
int
numFrames
);
void
resizeOrCreateFrames
(
int
numFrames
);
void
resizeBootFrame
(
int
numSequences
);
void
resizeBootFrame
(
int
numSequences
);
...
@@ -295,8 +305,7 @@ protected:
...
@@ -295,8 +305,7 @@ protected:
std
::
string
linkName
;
std
::
string
linkName
;
LayerPtr
inLayer
;
LayerPtr
inLayer
;
std
::
vector
<
LayerPtr
>
agents
;
// Scatter Agents to reform batch input
std
::
vector
<
LayerPtr
>
agents
;
// Scatter Agents to reform batch input
bool
hasSubseq
;
Argument
outArg
;
// scatter output argument
Argument
outArg
;
// scatter output argument
};
};
std
::
vector
<
InFrameLine
>
inFrameLines_
;
std
::
vector
<
InFrameLine
>
inFrameLines_
;
...
@@ -318,7 +327,6 @@ protected:
...
@@ -318,7 +327,6 @@ protected:
std
::
vector
<
LayerPtr
>
agents
;
std
::
vector
<
LayerPtr
>
agents
;
std
::
vector
<
LayerPtr
>
scatterAgents
;
// scatter agent used by beam search
std
::
vector
<
LayerPtr
>
scatterAgents
;
// scatter agent used by beam search
Argument
outArg
;
// scatter output argument
Argument
outArg
;
// scatter output argument
bool
is_sequence
;
// Different memoryFrameLine have different element as follows
// Different memoryFrameLine have different element as follows
IVectorPtr
allIds
;
// scattered id of realLayer
IVectorPtr
allIds
;
// scattered id of realLayer
ICpuGpuVectorPtr
ICpuGpuVectorPtr
...
@@ -330,22 +338,27 @@ protected:
...
@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
// which is assigned by targetInfoInlinkId_.
struct
Info
{
struct
Info
{
IVectorPtr
allIds
;
// scattered id of realLayer
// The original positions in the original batch
std
::
vector
<
int
>
idIndex
;
// index of allIds
IVectorPtr
allIds
;
// scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std
::
vector
<
int
>
idIndex
;
ICpuGpuVectorPtr
ICpuGpuVectorPtr
sequenceStartPositions
;
// scattered sequenceStartPositions
sequenceStartPositions
;
// scattered sequenceStartPositions
std
::
vector
<
int
>
seqStartPosIndex
;
// index of sequenceStartPositions
std
::
vector
<
int
>
seqStartPosIndex
;
// index of sequenceStartPositions
};
};
std
::
vector
<
Info
>
info_
;
std
::
vector
<
Info
>
info_
;
// for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std
::
vector
<
int
>
numSeqs_
;
std
::
vector
<
int
>
numSeqs_
;
std
::
vector
<
std
::
vector
<
Argument
::
SeqInfo
>>
seqInfos_
;
std
::
vector
<
std
::
vector
<
Argument
::
SeqInfo
>>
seqInfos_
;
// the id of inlink which share info with outlinks
void
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
);
int
targetInfoInlinkId_
;
/* create scattered id infomation for all realLayer of inFrameLines one time.
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
* If hasSubseq, will also create scattered sequenceStartPositions infomation
...
@@ -354,6 +367,28 @@ protected:
...
@@ -354,6 +367,28 @@ protected:
void
createInFrameInfo
(
int
inlinks_id
,
void
createInFrameInfo
(
int
inlinks_id
,
const
Argument
&
input
,
const
Argument
&
input
,
PassType
passType
);
PassType
passType
);
void
createInFrameInfo_nonseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_seq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_subseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createMemoryFrameInfo
(
MemoryFrameLine
*
memoryFrameLine
,
void
createMemoryFrameInfo
(
MemoryFrameLine
*
memoryFrameLine
,
PassType
passType
);
PassType
passType
);
...
@@ -386,9 +421,7 @@ protected:
...
@@ -386,9 +421,7 @@ protected:
NeuralNetwork
*
rootNetwork_
;
NeuralNetwork
*
rootNetwork_
;
bool
reversed_
;
bool
reversed_
;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
int
maxSequenceLength_
;
// Max top-level length
// else: max number of tokens in batchsize samples(sentences)
int
maxSequenceLength_
;
bool
useGpu_
;
bool
useGpu_
;
bool
stopBeamSearch_
;
bool
stopBeamSearch_
;
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
09f34c4b
...
@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
...
@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
real
Height
=
realOutput
.
getBatchSize
();
int
real
NumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
real
Height
);
CHECK_LE
(
numSamples_
,
real
NumSequences
);
// get Arguments from real layers
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realHeight
)
{
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
if
(
realOutput
.
ids
)
{
if
(
realOutput
.
hasSeq
())
{
output_
.
ids
=
int
numRows
=
IVector
::
create
(
realOutput
.
ids
->
getData
(),
numSamples_
,
useGpu_
);
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
getSize
(),
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
}
else
{
output_
.
subArgFrom
(
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numSamples_
,
getSize
(),
useGpu_
);
realOutput
,
/* offset */
0
,
numSamples_
,
getSize
(),
useGpu_
);
...
@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
...
@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
}
}
}
}
void
SequenceAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
realNumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
realNumSequences
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
int
numRows
=
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
CHECK
(
!
realOutput
.
ids
)
<<
"Not supported"
;
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
getSize
(),
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
output_
=
realOutput
;
}
}
REGISTER_LAYER
(
sequence_agent
,
SequenceAgentLayer
);
bool
GatherAgentLayer
::
init
(
const
LayerMap
&
layerMap
,
bool
GatherAgentLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
const
ParameterMap
&
parameterMap
)
{
CHECK_EQ
(
config_
.
inputs_size
(),
0
);
CHECK_EQ
(
config_
.
inputs_size
(),
0
);
...
@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
...
@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return
true
;
return
true
;
}
}
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
const
Argument
&
input
,
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
const
IVectorPtr
&
ids
,
ICpuGpuVectorPtr
sequenceStartPositions
,
const
std
::
vector
<
int
>&
idIndex
)
{
ICpuGpuVectorPtr
subSequenceStartPositions
,
output_
.
sequenceStartPositions
=
input
.
sequenceStartPositions
;
const
IVectorPtr
&
ids
,
output_
.
subSequenceStartPositions
=
input
.
subSequenceStartPositions
;
const
std
::
vector
<
int
>&
idIndex
)
{
realLayers_
.
clear
();
output_
.
sequenceStartPositions
=
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
subSequenceStartPositions
;
allIds_
=
ids
;
allIds_
=
ids
;
idIndex_
=
idIndex
;
idIndex_
=
idIndex
;
}
}
void
GatherAgentLayer
::
forward
(
PassType
passType
)
{
void
GatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
forwardIds
(
passType
);
forwardValue
(
passType
);
}
void
GatherAgentLayer
::
forwardValue
(
PassType
passType
)
{
MatrixPtr
valueReal
=
realLayers_
[
0
]
->
getOutputValue
();
if
(
!
valueReal
)
return
;
int
height
=
allIds_
->
getSize
();
int
height
=
allIds_
->
getSize
();
int
width
=
this
->
getSize
();
int
width
=
this
->
getSize
();
...
@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
...
@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
if
(
realOutArg_
.
hasSeq
())
{
forwardSequence
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
}
else
{
// used in generation
...
@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
...
@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if
(
realGrad
)
{
if
(
realGrad
)
{
// for agent in inFrameLines and memoryFrameLines,
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
// only first scatterAgentLayer should do addToRows in backward
if
(
idIndex_
==
0
)
{
if
(
handleBackward_
)
{
outputGrad
->
addToRows
(
*
realGrad
,
*
ids_
);
outputGrad
->
addToRows
(
*
realGrad
,
*
ids_
);
}
}
}
}
...
@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
...
@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
SequenceGatherAgentLayer
::
forward
(
PassType
passType
)
{
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
int
height
=
0
;
int
height
=
0
;
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
idReal
)
{
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// Gather generator.idsVec
// if is beam search generation result. Get first result.
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
...
@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
...
@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
}
else
{
}
else
{
// Gather output.value, same as GatherAgentLayer
LOG
(
FATAL
)
<<
"Not implemented"
;
CHECK
(
output_
.
subSequenceStartPositions
);
GatherAgentLayer
::
forward
(
passType
);
}
}
}
}
void
S
equenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
void
S
catterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
...
@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */
seqStartPosIndex_
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
/* seqSize */
numSequences_
);
}
else
{
}
else
{
// Putting the generation logic here is really an ugly hack!
// used in generation
// used in generation
int
height
=
0
;
int
height
=
0
;
size_t
numSequences
=
ids_
->
getSize
();
size_t
numSequences
=
ids_
->
getSize
();
...
@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
...
@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
}
}
REGISTER_LAYER
(
sequence_gather_agent
,
SequenceGatherAgentLayer
);
REGISTER_LAYER
(
sequence_scatter_agent
,
SequenceScatterAgentLayer
);
}
// namespace paddle
}
// namespace paddle
paddle/gserver/layers/AgentLayer.h
浏览文件 @
09f34c4b
...
@@ -49,18 +49,6 @@ public:
...
@@ -49,18 +49,6 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class
SequenceAgentLayer
:
public
AgentLayer
{
public:
explicit
SequenceAgentLayer
(
const
LayerConfig
&
config
)
:
AgentLayer
(
config
)
{}
~
SequenceAgentLayer
()
{}
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
/**
* Like AgentLayer, but it can gather many real layers. Each real
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
* layer give a few rows of a sequence, after gather all real layers,
...
@@ -83,7 +71,10 @@ public:
...
@@ -83,7 +71,10 @@ public:
const
ParameterMap
&
parameterMap
)
override
;
const
ParameterMap
&
parameterMap
)
override
;
// call before addRealLayer
// call before addRealLayer
void
copyIdAndSequenceInfo
(
const
Argument
&
input
,
void
clearRealLayers
()
{
realLayers_
.
clear
();
}
void
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
allIds
,
const
IVectorPtr
&
allIds
,
const
std
::
vector
<
int
>&
idIndex
);
const
std
::
vector
<
int
>&
idIndex
);
...
@@ -92,24 +83,8 @@ public:
...
@@ -92,24 +83,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
void
forwardValue
(
PassType
passType
);
void
forwardIds
(
PassType
passType
);
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class
SequenceGatherAgentLayer
:
public
GatherAgentLayer
{
public:
explicit
SequenceGatherAgentLayer
(
const
LayerConfig
&
config
)
:
GatherAgentLayer
(
config
)
{}
virtual
~
SequenceGatherAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
// same as GatherAgentLayer
GatherAgentLayer
::
backward
(
callback
);
}
};
};
/**
/**
...
@@ -129,6 +104,11 @@ protected:
...
@@ -129,6 +104,11 @@ protected:
int
idSize_
;
int
idSize_
;
int
seqStartPosIndex_
;
int
seqStartPosIndex_
;
int
numSequences_
;
// number of sequences in this scatterAgentLayer
int
numSequences_
;
// number of sequences in this scatterAgentLayer
bool
handleBackward_
;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
@@ -147,19 +127,15 @@ public:
...
@@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
* true in SequenceScatterAgentLayer.
*/
*/
void
setRealLayer
(
LayerPtr
layer
,
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
)
{
const
std
::
vector
<
int
>&
ids
,
bool
copyId
=
false
)
{
realLayer_
=
layer
;
realLayer_
=
layer
;
IVector
::
resizeOrCreate
(
ids_
,
ids
.
size
(),
useGpu_
);
IVector
::
resizeOrCreate
(
ids_
,
ids
.
size
(),
useGpu_
);
ids_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
ids_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
if
(
copyId
)
{
if
(
useGpu_
)
{
if
(
useGpu_
)
{
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
}
else
{
}
else
{
cpuIds_
=
ids_
;
cpuIds_
=
ids_
;
}
}
}
}
}
...
@@ -169,12 +145,14 @@ public:
...
@@ -169,12 +145,14 @@ public:
const
Argument
&
outArg
,
const
Argument
&
outArg
,
const
IVectorPtr
&
ids
,
const
IVectorPtr
&
ids
,
int
idIndex
,
int
idIndex
,
int
idSize
)
{
int
idSize
,
bool
handleBackward
)
{
realLayer_
=
layer
;
realLayer_
=
layer
;
realOutArg_
=
outArg
;
realOutArg_
=
outArg
;
ids_
=
ids
;
ids_
=
ids
;
idIndex_
=
idIndex
;
idIndex_
=
idIndex
;
idSize_
=
idSize
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
}
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
@@ -187,28 +165,8 @@ public:
...
@@ -187,28 +165,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
void
forwardSequence
(
PassType
passType
);
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class
SequenceScatterAgentLayer
:
public
ScatterAgentLayer
{
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
SequenceScatterAgentLayer
(
const
LayerConfig
&
config
)
:
ScatterAgentLayer
(
config
)
{}
virtual
~
SequenceScatterAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
ScatterAgentLayer
::
backward
(
callback
);
}
};
};
}
// namespace paddle
}
// namespace paddle
paddle/gserver/layers/SequencePoolLayer.cpp
浏览文件 @
09f34c4b
...
@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
...
@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
const
Argument
&
input
=
getInput
(
0
);
const
Argument
&
input
=
getInput
(
0
);
CHECK
(
input
.
hasSeq
()
||
input
.
hasSubseq
())
<<
"Input should be a sequence or subsequence for layer "
<<
getName
();
newBatchSize_
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
newBatchSize_
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
size_t
dim
=
getSize
();
size_t
dim
=
getSize
();
// check
// check
...
...
paddle/gserver/tests/rnn_data_provider.py
浏览文件 @
09f34c4b
...
@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
...
@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
0
])
words1
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
0
])
words2
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
1
])
words2
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
1
])
yield
words1
,
words2
,
d
[
2
]
yield
words1
,
words2
,
d
[
2
]
###########################################################
data3
=
[
[[[
1
,
2
],
[
4
,
5
,
2
]],
[
1
,
2
],
0
],
[[[
0
,
2
],
[
2
,
5
],
[
0
,
1
,
2
]],
[
2
,
3
,
0
],
1
],
]
# Used for sequence_nest_mixed_inputs.conf
@
provider
(
input_types
=
[
integer_value_sub_sequence
(
10
),
integer_value_sequence
(
10
),
integer_value
(
2
)
],
should_shuffle
=
False
)
def
process_mixed
(
settings
,
file_name
):
for
d
in
data3
:
yield
d
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
浏览文件 @
09f34c4b
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq
2
'
)
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
@@ -57,7 +57,7 @@ def outer_step(wid, x):
...
@@ -57,7 +57,7 @@ def outer_step(wid, x):
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
# "return last" should also work. But currently RecurrentGradientMachine
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it, and will report error: In hierachical RNN, all out
# does not handle it, and will report error: In hierachical RNN, all out
# links should be from sequences now.
# links should be from sequences now.
return
inner_rnn_output
return
inner_rnn_output
...
...
paddle/gserver/tests/sequence_rnn_matched_inputs.py
0 → 100644
浏览文件 @
09f34c4b
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
subseq
,
seq
,
nonseq
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
seq
,
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
subseq
,
expand_layer
(
seq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
nonseq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_NO_SEQUENCE
),
StaticInput
(
encoding
)
])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
0 → 100644
浏览文件 @
09f34c4b
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
data1
,
data2
,
label
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
print_layer
(
input
=
[
data1
,
seq
,
label
,
inner_mem
])
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
StaticInput
(
seq
),
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
data1
,
data2
,
StaticInput
(
label
),
StaticInput
(
encoding
)])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_multi_input.conf
浏览文件 @
09f34c4b
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq
2
'
)
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
09f34c4b
...
@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
...
@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
}
}
TEST
(
RecurrentGradientMachine
,
rnn_mixed_input
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn_mixed_inputs.py"
,
"gserver/tests/sequence_rnn_matched_inputs.py"
,
1e-6
,
useGpu
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
testing
::
InitGoogleTest
(
&
argc
,
argv
);
...
...
paddle/math/Vector.cpp
浏览文件 @
09f34c4b
...
@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
...
@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
do { \
setSync(useGpu); \
if (useGpu) { \
if (useGpu) { \
copyToGpu(); \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
return gpuVectorT_->OP(args); \
} else { \
} else { \
copyToCpu(); \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
return cpuVectorT_->OP(args); \
} \
} \
} while (0)
} while (0)
...
@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
...
@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case
DATA_AT_GPU
:
case
DATA_AT_GPU
:
CHECK
(
gpuVectorT_
);
CHECK
(
gpuVectorT_
);
this
->
resizeOrCreate
(
gpuVectorT_
->
getSize
(),
false
);
this
->
resizeOrCreate
(
gpuVectorT_
->
getSize
(),
false
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
,
HPPL_STREAM_DEFAULT
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
);
setSync
(
SYNCED
);
setSync
(
SYNCED
);
break
;
break
;
case
DATA_AT_CPU
:
case
DATA_AT_CPU
:
...
@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
...
@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case
DATA_AT_CPU
:
case
DATA_AT_CPU
:
CHECK
(
cpuVectorT_
);
CHECK
(
cpuVectorT_
);
this
->
resizeOrCreate
(
cpuVectorT_
->
getSize
(),
true
);
this
->
resizeOrCreate
(
cpuVectorT_
->
getSize
(),
true
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
,
HPPL_STREAM_DEFAULT
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
);
setSync
(
SYNCED
);
setSync
(
SYNCED
);
break
;
break
;
case
DATA_AT_GPU
:
case
DATA_AT_GPU
:
...
...
paddle/parameter/Argument.h
浏览文件 @
09f34c4b
...
@@ -149,6 +149,7 @@ struct Argument {
...
@@ -149,6 +149,7 @@ struct Argument {
:
getBatchSize
();
:
getBatchSize
();
}
}
bool
hasSeq
()
const
{
return
sequenceStartPositions
!=
nullptr
;
}
bool
hasSubseq
()
const
{
return
subSequenceStartPositions
!=
nullptr
;
}
bool
hasSubseq
()
const
{
return
subSequenceStartPositions
!=
nullptr
;
}
const
int
*
getCpuStartPositions
()
const
{
const
int
*
getCpuStartPositions
()
const
{
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
09f34c4b
...
@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
...
@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool
beam_search
)
{
bool
beam_search
)
{
FLAGS_config_args
=
beam_search
?
"beam_search=1"
:
"beam_search=0"
;
FLAGS_config_args
=
beam_search
?
"beam_search=1"
:
"beam_search=0"
;
for
(
auto
useGpu
:
useGpuConfs
)
{
for
(
auto
useGpu
:
useGpuConfs
)
{
LOG
(
INFO
)
<<
configFile
<<
" useGpu="
<<
useGpu
<<
" beam_search="
<<
beam_search
;
testGeneration
(
configFile
,
useGpu
,
hasSubseq
,
expRetFile
);
testGeneration
(
configFile
,
useGpu
,
hasSubseq
,
expRetFile
);
}
}
};
};
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
09f34c4b
...
@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
...
@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin
(
name
)
SubModelBegin
(
name
)
g_current_submodel
.
is_recurrent_layer_group
=
True
g_current_submodel
.
is_recurrent_layer_group
=
True
g_current_submodel
.
reversed
=
seq_reversed
g_current_submodel
.
reversed
=
seq_reversed
g_current_submodel
.
target_inlinkid
=
-
1
in_links_count
=
0
in_links_count
=
0
for
linkid
,
link
in
enumerate
(
in_links
):
for
linkid
,
link
in
enumerate
(
in_links
):
if
isinstance
(
link
,
basestring
):
if
isinstance
(
link
,
basestring
):
name
=
link
name
=
link
has_subseq
=
False
else
:
else
:
name
=
link
.
link_name
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
# assign target_inlinkid according to target_inlinkname
if
target_inlinkname
==
name
:
g_current_submodel
.
target_inlinkid
=
linkid
if
in_links_count
==
0
:
in_links_has_subseq
=
has_subseq
else
:
config_assert
(
in_links_has_subseq
==
has_subseq
,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count
+=
1
in_links_count
+=
1
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer
=
g_layer_map
[
layer_name
]
layer
=
g_layer_map
[
layer_name
]
if
has_subseq
:
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
SequenceScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
else
:
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
pair
=
g_current_submodel
.
in_links
.
add
()
pair
=
g_current_submodel
.
in_links
.
add
()
pair
.
layer_name
=
layer_name
pair
.
layer_name
=
layer_name
pair
.
link_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
link_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
has_subseq
=
has_subseq
@
config_func
@
config_func
def
RecurrentLayerGroupSetOutLink
(
link
):
def
RecurrentLayerGroupSetOutLink
(
link
):
if
isinstance
(
link
,
basestring
):
if
isinstance
(
link
,
basestring
):
name
=
link
name
=
link
has_subseq
=
False
else
:
else
:
name
=
link
.
link_name
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
pair
=
g_current_submodel
.
out_links
.
add
()
pair
=
g_current_submodel
.
out_links
.
add
()
pair
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
link_name
=
layer_name
pair
.
link_name
=
layer_name
pair
.
has_subseq
=
has_subseq
def
RecurrentLayerGroupSetGenerator
(
generator
=
None
):
def
RecurrentLayerGroupSetGenerator
(
generator
=
None
):
...
@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
...
@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
generator
=
None
,
generator
=
None
,
target_inlinkname
=
""
,
target_inlinkname
=
""
,
seq_reversed
=
False
):
seq_reversed
=
False
):
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
,
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
)
target_inlinkname
)
for
link
in
out_links
:
for
link
in
out_links
:
RecurrentLayerGroupSetOutLink
(
link
)
RecurrentLayerGroupSetOutLink
(
link
)
...
@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
...
@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
agent_name
=
GetLayerBaseName
(
pair
.
link_name
)
agent_name
=
GetLayerBaseName
(
pair
.
link_name
)
if
prev_submodel
.
HasField
(
"generator"
):
if
prev_submodel
.
HasField
(
"generator"
):
DataLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
DataLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
elif
pair
.
has_subseq
:
SequenceGatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
else
:
else
:
GatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
GatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
...
@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
...
@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_agent'
)
class
SequenceAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceAgentLayer
,
self
).
__init__
(
name
,
'sequence_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'gather_agent'
)
@
config_layer
(
'gather_agent'
)
class
GatherAgentLayer
(
LayerBase
):
class
GatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
...
@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
...
@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
name
,
'scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
name
,
'scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_gather_agent'
)
class
SequenceGatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceGatherAgentLayer
,
self
).
__init__
(
name
,
'sequence_gather_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_scatter_agent'
)
class
SequenceScatterAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceScatterAgentLayer
,
self
).
__init__
(
name
,
'sequence_scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'multiplex'
)
@
config_layer
(
'multiplex'
)
class
MultiplexLayer
(
LayerBase
):
class
MultiplexLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
...
@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
...
@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
@
config_func
@
config_func
def
Link
(
def
Link
(
name
,
has_subseq
=
False
):
name
,
"""
has_subseq
=
False
,
):
Still keeping has_subseq for backward compatibility
"""
link_config
=
LinkConfig
()
link_config
=
LinkConfig
()
link_config
.
link_name
=
name
link_config
.
link_name
=
name
link_config
.
has_subseq
=
has_subseq
return
link_config
return
link_config
...
@@ -2341,20 +2297,13 @@ def Memory(name,
...
@@ -2341,20 +2297,13 @@ def Memory(name,
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
memory_name
=
name
+
"+delay1"
memory_name
=
name
+
"+delay1"
agent_name
=
memory_name
agent_name
=
memory_name
if
is_sequence
:
agent_layer
=
AgentLayer
(
agent_name
,
size
)
config_assert
(
boot_layer
is
not
None
,
"there must be boot_layer in network when is_sequence = True"
)
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
else
:
agent_layer
=
AgentLayer
(
agent_name
,
size
)
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
'Memory should be used in recurrent layer group only'
)
'Memory should be used in recurrent layer group only'
)
memory
=
g_current_submodel
.
memories
.
add
()
memory
=
g_current_submodel
.
memories
.
add
()
if
name
is
not
None
:
if
name
is
not
None
:
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
is_sequence
=
is_sequence
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
boot_with_const_id
is
not
None
))
boot_with_const_id
is
not
None
))
config_assert
(
config_assert
(
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
09f34c4b
...
@@ -311,18 +311,6 @@ class LayerOutput(object):
...
@@ -311,18 +311,6 @@ class LayerOutput(object):
self
.
outputs
=
outputs
self
.
outputs
=
outputs
self
.
reverse
=
reverse
self
.
reverse
=
reverse
def
__repr__
(
self
):
"""
Disable __repr__ for debug reason. Will be implemented when release
"""
assert
False
,
"this method should not be invoked"
def
__str__
(
self
):
"""
Disable __str__ for debug reason. Will be implemented when release
"""
assert
False
,
"this method should not be invoked"
def
set_input
(
self
,
input
):
def
set_input
(
self
,
input
):
"""
"""
Set the input for a memory layer. Can only be used for memory layer
Set the input for a memory layer. Can only be used for memory layer
...
@@ -2944,7 +2932,7 @@ def memory(name,
...
@@ -2944,7 +2932,7 @@ def memory(name,
:param memory_name: the name of the memory.
:param memory_name: the name of the memory.
It is ignored when name is provided.
It is ignored when name is provided.
:type memory_name: basestring
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:param is_seq:
DEPRECATED.
is sequence for boot_layer
:type is_seq: bool
:type is_seq: bool
:param boot_layer: boot layer of memory.
:param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None
:type boot_layer: LayerOutput|None
...
@@ -2971,7 +2959,6 @@ def memory(name,
...
@@ -2971,7 +2959,6 @@ def memory(name,
memory_name
=
Memory
(
memory_name
=
Memory
(
name
,
name
,
size
,
size
,
is_sequence
=
is_seq
,
boot_layer
=
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_layer
=
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
=
boot_bias
,
boot_bias
=
boot_bias
,
boot_bias_active_type
=
boot_bias_active_type
.
name
,
boot_bias_active_type
=
boot_bias_active_type
.
name
,
...
@@ -3318,19 +3305,21 @@ class StaticInput(object):
...
@@ -3318,19 +3305,21 @@ class StaticInput(object):
"""
"""
StaticInput is only used in recurrent_group which defines a read-only memory
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
that can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
"""
"""
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerOutput
)
assert
isinstance
(
input
,
LayerOutput
)
self
.
input
=
input
self
.
input
=
input
self
.
is_seq
=
is_seq
assert
input
.
size
is
not
None
assert
input
.
size
is
not
None
or
size
is
not
None
if
size
is
not
None
:
if
size
is
not
None
:
input
.
size
=
size
assert
input
.
size
=
=
size
class
SubsequenceInput
(
objec
t
):
def
SubsequenceInput
(
inpu
t
):
"""
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
The example usage is:
...
@@ -3339,11 +3328,7 @@ class SubsequenceInput(object):
...
@@ -3339,11 +3328,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
input = SubsequenceInput(layer)
"""
"""
return
input
def
__init__
(
self
,
input
):
assert
isinstance
(
input
,
LayerOutput
)
assert
input
.
size
is
not
None
self
.
input
=
input
@
wrap_name_default
(
"recurrent_group"
)
@
wrap_name_default
(
"recurrent_group"
)
...
@@ -3407,7 +3392,8 @@ def recurrent_group(step,
...
@@ -3407,7 +3392,8 @@ def recurrent_group(step,
input sequence in a reverse order.
input sequence in a reverse order.
:type reverse: bool
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
SubsequenceInput inputs, config should assign one input
...
@@ -3429,46 +3415,21 @@ def recurrent_group(step,
...
@@ -3429,46 +3415,21 @@ def recurrent_group(step,
model_type
(
'recurrent_nn'
)
model_type
(
'recurrent_nn'
)
def
is_single_input
(
x
):
def
is_single_input
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
\
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
or
isinstance
(
x
,
SubsequenceInput
)
if
is_single_input
(
input
):
if
is_single_input
(
input
):
input
=
[
input
]
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
assert
isinstance
(
input
,
collections
.
Sequence
)
def
is_in_links
(
x
):
def
is_in_links
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
in_links
=
filter
(
is_in_links
,
input
)
in_links
=
filter
(
is_in_links
,
input
)
def
targetInlink_in_inlinks
():
for
inlink
in
in_links
:
if
isinstance
(
inlink
,
SubsequenceInput
):
if
targetInlink
==
inlink
.
input
:
return
True
elif
targetInlink
==
inlink
:
return
True
return
False
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
targetInlinkName
=
None
if
targetInlink
==
None
\
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
contains_sub_seq
=
[
False
]
def
map_in_links
(
x
):
if
isinstance
(
x
,
SubsequenceInput
):
contains_sub_seq
[
0
]
=
True
return
Link
(
name
=
x
.
input
.
name
,
has_subseq
=
True
)
else
:
return
x
.
name
RecurrentLayerGroupWithoutOutLinksBegin
(
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
name
,
name
=
name
,
in_links
=
map
(
map_in_links
,
in_links
),
in_links
=
map
(
lambda
x
:
x
.
name
,
in_links
),
seq_reversed
=
reverse
,
seq_reversed
=
reverse
)
target_inlinkname
=
targetInlinkName
)
in_args
=
[]
in_args
=
[]
has_LayerOutput
=
False
has_LayerOutput
=
False
for
each_input
in
input
:
for
each_input
in
input
:
...
@@ -3476,21 +3437,13 @@ def recurrent_group(step,
...
@@ -3476,21 +3437,13 @@ def recurrent_group(step,
if
isinstance
(
each_input
,
LayerOutput
):
if
isinstance
(
each_input
,
LayerOutput
):
in_args
.
append
(
each_input
)
in_args
.
append
(
each_input
)
has_LayerOutput
=
True
has_LayerOutput
=
True
elif
isinstance
(
each_input
,
SubsequenceInput
):
else
:
# StaticInput
in_args
.
append
(
each_input
.
input
)
has_LayerOutput
=
True
else
:
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
mem
=
memory
(
name
=
mem_name
,
name
=
None
,
is_seq
=
each_input
.
is_seq
,
size
=
each_input
.
input
.
size
,
size
=
each_input
.
input
.
size
,
boot_layer
=
each_input
.
input
)
boot_layer
=
each_input
.
input
)
with
mixed_layer
(
mem
.
set_input
(
mem
)
name
=
mem_name
,
size
=
each_input
.
input
.
size
,
act
=
IdentityActivation
())
as
mix
:
mix
+=
identity_projection
(
mem
)
in_args
.
append
(
mem
)
in_args
.
append
(
mem
)
assert
(
is_generating
!=
has_LayerOutput
)
assert
(
is_generating
!=
has_LayerOutput
)
...
@@ -3503,10 +3456,7 @@ def recurrent_group(step,
...
@@ -3503,10 +3456,7 @@ def recurrent_group(step,
for
ot
in
layer_outs
:
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
ot
.
reverse
=
reverse
if
contains_sub_seq
[
0
]:
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupSetOutLink
(
Link
(
ot
.
name
,
has_subseq
=
True
))
else
:
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
...
@@ -5608,13 +5558,13 @@ def row_conv_layer(input,
...
@@ -5608,13 +5558,13 @@ def row_conv_layer(input,
to deploy in an online and low-latency setting. The lookahead convolution
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
浏览文件 @
09f34c4b
...
@@ -256,19 +256,15 @@ sub_models {
...
@@ -256,19 +256,15 @@ sub_models {
memories {
memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "__simple_gru_0___transform"
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
link_name: "__simple_gru_0__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__simple_gru_1___recurrent_group"
name: "__simple_gru_1___recurrent_group"
...
@@ -280,18 +276,14 @@ sub_models {
...
@@ -280,18 +276,14 @@ sub_models {
memories {
memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "__simple_gru_1___transform"
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
link_name: "__simple_gru_1__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
浏览文件 @
09f34c4b
...
@@ -341,24 +341,19 @@ sub_models {
...
@@ -341,24 +341,19 @@ sub_models {
memories {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
}
memories {
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "__mixed_0__"
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
link_name: "__lstm_group_0__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__lstm_group_1___recurrent_group"
name: "__lstm_group_1___recurrent_group"
...
@@ -373,23 +368,18 @@ sub_models {
...
@@ -373,23 +368,18 @@ sub_models {
memories {
memories {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
}
memories {
memories {
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "__mixed_1__"
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
link_name: "__lstm_group_1__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
09f34c4b
...
@@ -155,7 +155,7 @@ layers {
...
@@ -155,7 +155,7 @@ layers {
}
}
layers {
layers {
name: "sub_seq_input@__recurrent_group_2__"
name: "sub_seq_input@__recurrent_group_2__"
type: "s
equence_s
catter_agent"
type: "scatter_agent"
size: 100
size: 100
active_type: ""
active_type: ""
}
}
...
@@ -182,7 +182,7 @@ layers {
...
@@ -182,7 +182,7 @@ layers {
}
}
layers {
layers {
name: "rnn_subseq_forward"
name: "rnn_subseq_forward"
type: "
sequence_
gather_agent"
type: "gather_agent"
size: 200
size: 200
active_type: ""
active_type: ""
}
}
...
@@ -618,19 +618,15 @@ sub_models {
...
@@ -618,19 +618,15 @@ sub_models {
memories {
memories {
layer_name: "rnn_forward@__recurrent_group_0__"
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "seq_input"
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
link_name: "rnn_forward"
has_subseq: false
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__recurrent_group_1__"
name: "__recurrent_group_1__"
...
@@ -642,19 +638,15 @@ sub_models {
...
@@ -642,19 +638,15 @@ sub_models {
memories {
memories {
layer_name: "rnn_back@__recurrent_group_1__"
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "seq_input"
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
link_name: "rnn_back"
has_subseq: false
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__recurrent_group_2__"
name: "__recurrent_group_2__"
...
@@ -666,19 +658,15 @@ sub_models {
...
@@ -666,19 +658,15 @@ sub_models {
memories {
memories {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "sub_seq_input"
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
}
out_links {
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
link_name: "rnn_subseq_forward"
has_subseq: true
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__lstm_group_0___recurrent_group"
name: "__lstm_group_0___recurrent_group"
...
@@ -693,24 +681,19 @@ sub_models {
...
@@ -693,24 +681,19 @@ sub_models {
memories {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
}
memories {
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "__mixed_0__"
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
link_name: "__lstm_group_0__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__gru_group_0___recurrent_group"
name: "__gru_group_0___recurrent_group"
...
@@ -722,19 +705,15 @@ sub_models {
...
@@ -722,19 +705,15 @@ sub_models {
memories {
memories {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "__mixed_1__"
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
link_name: "__gru_group_0__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
sub_models {
sub_models {
name: "__recurrent_group_3__"
name: "__recurrent_group_3__"
...
@@ -746,18 +725,14 @@ sub_models {
...
@@ -746,18 +725,14 @@ sub_models {
memories {
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
}
in_links {
in_links {
layer_name: "seq_input"
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
}
out_links {
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
link_name: "__fc_layer_0__"
has_subseq: false
}
}
target_inlinkid: -1
}
}
python/paddle/v2/layer.py
浏览文件 @
09f34c4b
...
@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
...
@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
else
:
else
:
extra_layers
=
[]
extra_layers
=
[]
layer_names
=
__get_used_layers__
(
output_layers
+
extra_layers
)
layer_names
=
__get_used_layers__
(
list
(
output_layers
)
+
list
(
extra_layers
)
)
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
.
add
(
'root'
)
submodel_names
.
add
(
'root'
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录