Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
09f34c4b
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
09f34c4b
编写于
6月 19, 2017
作者:
E
emailweixu
提交者:
GitHub
6月 19, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2479 from emailweixu/mixed_input_rnn
RecurrentGroup with mixed input sequence types
上级
17fe8322
ef61288f
变更
20
展开全部
隐藏空白更改
内联
并排
Showing
20 changed file
with
662 addition
and
484 deletion
+662
-484
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+320
-177
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+44
-11
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+42
-53
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+21
-63
paddle/gserver/layers/SequencePoolLayer.cpp
paddle/gserver/layers/SequencePoolLayer.cpp
+3
-0
paddle/gserver/tests/rnn_data_provider.py
paddle/gserver/tests/rnn_data_provider.py
+19
-0
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
+2
-2
paddle/gserver/tests/sequence_rnn_matched_inputs.py
paddle/gserver/tests/sequence_rnn_matched_inputs.py
+85
-0
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
+79
-0
paddle/gserver/tests/sequence_rnn_multi_input.conf
paddle/gserver/tests/sequence_rnn_multi_input.conf
+1
-1
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+9
-0
paddle/math/Vector.cpp
paddle/math/Vector.cpp
+4
-3
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+1
-0
paddle/trainer/tests/test_recurrent_machine_generation.cpp
paddle/trainer/tests/test_recurrent_machine_generation.cpp
+2
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+7
-58
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+20
-70
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
...config_helpers/tests/configs/protostr/shared_gru.protostr
+0
-8
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
...onfig_helpers/tests/configs/protostr/shared_lstm.protostr
+0
-10
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
...ig_helpers/tests/configs/protostr/test_rnn_group.protostr
+2
-27
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+1
-1
未找到文件。
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
09f34c4b
此差异已折叠。
点击以展开。
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
09f34c4b
...
...
@@ -284,6 +284,16 @@ public:
}
protected:
std
::
vector
<
Argument
::
SeqInfo
>
commonSeqInfo_
;
ICpuGpuVectorPtr
sequenceStartPositions_
;
void
calcSequenceStartPositions
();
void
checkInputConsistency
(
int
inlinkId
,
const
std
::
vector
<
Argument
::
SeqInfo
>&
seqInfo
);
void
reorganizeInput
(
PassType
passType
);
void
reorganizeOutput
(
PassType
passType
);
void
connectFrames
(
PassType
passType
);
void
calcNumSequencesAtEachStep
();
void
resizeOrCreateFrames
(
int
numFrames
);
void
resizeBootFrame
(
int
numSequences
);
...
...
@@ -295,8 +305,7 @@ protected:
std
::
string
linkName
;
LayerPtr
inLayer
;
std
::
vector
<
LayerPtr
>
agents
;
// Scatter Agents to reform batch input
bool
hasSubseq
;
Argument
outArg
;
// scatter output argument
Argument
outArg
;
// scatter output argument
};
std
::
vector
<
InFrameLine
>
inFrameLines_
;
...
...
@@ -318,7 +327,6 @@ protected:
std
::
vector
<
LayerPtr
>
agents
;
std
::
vector
<
LayerPtr
>
scatterAgents
;
// scatter agent used by beam search
Argument
outArg
;
// scatter output argument
bool
is_sequence
;
// Different memoryFrameLine have different element as follows
IVectorPtr
allIds
;
// scattered id of realLayer
ICpuGpuVectorPtr
...
...
@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct
Info
{
IVectorPtr
allIds
;
// scattered id of realLayer
std
::
vector
<
int
>
idIndex
;
// index of allIds
// The original positions in the original batch
IVectorPtr
allIds
;
// scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std
::
vector
<
int
>
idIndex
;
ICpuGpuVectorPtr
sequenceStartPositions
;
// scattered sequenceStartPositions
std
::
vector
<
int
>
seqStartPosIndex
;
// index of sequenceStartPositions
};
std
::
vector
<
Info
>
info_
;
std
::
vector
<
Info
>
info_
;
// for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std
::
vector
<
int
>
numSeqs_
;
std
::
vector
<
std
::
vector
<
Argument
::
SeqInfo
>>
seqInfos_
;
// the id of inlink which share info with outlinks
int
targetInfoInlinkId_
;
void
checkOutputConsistency
(
OutFrameLine
&
outFrameLine
);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
...
...
@@ -354,6 +367,28 @@ protected:
void
createInFrameInfo
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_nonseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_seq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createInFrameInfo_subseq
(
int
inlinks_id
,
const
Argument
&
input
,
PassType
passType
);
void
createOutFrameInfo
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_seq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createOutFrameInfo_subseq
(
OutFrameLine
&
outFrameLine
,
Info
&
info
,
ICpuGpuVectorPtr
&
sequenceStartPositions
,
ICpuGpuVectorPtr
&
subSequenceStartPositions
);
void
createMemoryFrameInfo
(
MemoryFrameLine
*
memoryFrameLine
,
PassType
passType
);
...
...
@@ -386,9 +421,7 @@ protected:
NeuralNetwork
*
rootNetwork_
;
bool
reversed_
;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int
maxSequenceLength_
;
int
maxSequenceLength_
;
// Max top-level length
bool
useGpu_
;
bool
stopBeamSearch_
;
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
09f34c4b
...
...
@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
real
Height
=
realOutput
.
getBatchSize
();
CHECK_LE
(
numSamples_
,
real
Height
);
int
real
NumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
real
NumSequences
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realHeight
)
{
if
(
realOutput
.
ids
)
{
output_
.
ids
=
IVector
::
create
(
realOutput
.
ids
->
getData
(),
numSamples_
,
useGpu_
);
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
if
(
realOutput
.
hasSeq
())
{
int
numRows
=
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
getSize
(),
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numSamples_
,
getSize
(),
useGpu_
);
...
...
@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
}
}
void
SequenceAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Argument
&
realOutput
=
realLayer_
->
getOutput
();
int
realNumSequences
=
realOutput
.
getNumSequences
();
CHECK_LE
(
numSamples_
,
realNumSequences
);
// get Arguments from real layers
if
(
numSamples_
>
0
&&
numSamples_
<
realNumSequences
)
{
int
numRows
=
realOutput
.
sequenceStartPositions
->
getData
(
false
)[
numSamples_
];
CHECK
(
!
realOutput
.
ids
)
<<
"Not supported"
;
output_
.
subArgFrom
(
realOutput
,
/* offset */
0
,
numRows
,
getSize
(),
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
0
,
/* seqSize */
numSamples_
+
1
);
}
else
{
output_
=
realOutput
;
}
}
REGISTER_LAYER
(
sequence_agent
,
SequenceAgentLayer
);
bool
GatherAgentLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
CHECK_EQ
(
config_
.
inputs_size
(),
0
);
...
...
@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return
true
;
}
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
const
Argument
&
input
,
const
IVectorPtr
&
ids
,
const
std
::
vector
<
int
>&
idIndex
)
{
output_
.
sequenceStartPositions
=
input
.
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
input
.
subSequenceStartPositions
;
realLayers_
.
clear
();
void
GatherAgentLayer
::
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
ids
,
const
std
::
vector
<
int
>&
idIndex
)
{
output_
.
sequenceStartPositions
=
sequenceStartPositions
;
output_
.
subSequenceStartPositions
=
subSequenceStartPositions
;
allIds_
=
ids
;
idIndex_
=
idIndex
;
}
void
GatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
forwardIds
(
passType
);
forwardValue
(
passType
);
}
void
GatherAgentLayer
::
forwardValue
(
PassType
passType
)
{
MatrixPtr
valueReal
=
realLayers_
[
0
]
->
getOutputValue
();
if
(
!
valueReal
)
return
;
int
height
=
allIds_
->
getSize
();
int
width
=
this
->
getSize
();
...
...
@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
if
(
realOutArg_
.
hasSeq
())
{
forwardSequence
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
...
...
@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if
(
realGrad
)
{
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if
(
idIndex_
==
0
)
{
if
(
handleBackward_
)
{
outputGrad
->
addToRows
(
*
realGrad
,
*
ids_
);
}
}
...
...
@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
SequenceGatherAgentLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
int
height
=
0
;
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
idReal
)
{
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
...
...
@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
else
{
// Gather output.value, same as GatherAgentLayer
CHECK
(
output_
.
subSequenceStartPositions
);
GatherAgentLayer
::
forward
(
passType
);
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
void
S
equenceScatterAgentLayer
::
forward
(
PassType
passType
)
{
void
S
catterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
...
@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
}
else
{
// Putting the generation logic here is really an ugly hack!
// used in generation
int
height
=
0
;
size_t
numSequences
=
ids_
->
getSize
();
...
...
@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER
(
sequence_gather_agent
,
SequenceGatherAgentLayer
);
REGISTER_LAYER
(
sequence_scatter_agent
,
SequenceScatterAgentLayer
);
}
// namespace paddle
paddle/gserver/layers/AgentLayer.h
浏览文件 @
09f34c4b
...
...
@@ -49,18 +49,6 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class
SequenceAgentLayer
:
public
AgentLayer
{
public:
explicit
SequenceAgentLayer
(
const
LayerConfig
&
config
)
:
AgentLayer
(
config
)
{}
~
SequenceAgentLayer
()
{}
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
{}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
...
...
@@ -83,7 +71,10 @@ public:
const
ParameterMap
&
parameterMap
)
override
;
// call before addRealLayer
void
copyIdAndSequenceInfo
(
const
Argument
&
input
,
void
clearRealLayers
()
{
realLayers_
.
clear
();
}
void
copyIdAndSequenceInfo
(
ICpuGpuVectorPtr
sequenceStartPositions
,
ICpuGpuVectorPtr
subSequenceStartPositions
,
const
IVectorPtr
&
allIds
,
const
std
::
vector
<
int
>&
idIndex
);
...
...
@@ -92,24 +83,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class
SequenceGatherAgentLayer
:
public
GatherAgentLayer
{
public:
explicit
SequenceGatherAgentLayer
(
const
LayerConfig
&
config
)
:
GatherAgentLayer
(
config
)
{}
virtual
~
SequenceGatherAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
// same as GatherAgentLayer
GatherAgentLayer
::
backward
(
callback
);
}
void
forwardValue
(
PassType
passType
);
void
forwardIds
(
PassType
passType
);
};
/**
...
...
@@ -129,6 +104,11 @@ protected:
int
idSize_
;
int
seqStartPosIndex_
;
int
numSequences_
;
// number of sequences in this scatterAgentLayer
bool
handleBackward_
;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
...
@@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
,
bool
copyId
=
false
)
{
void
setRealLayer
(
LayerPtr
layer
,
const
std
::
vector
<
int
>&
ids
)
{
realLayer_
=
layer
;
IVector
::
resizeOrCreate
(
ids_
,
ids
.
size
(),
useGpu_
);
ids_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
if
(
copyId
)
{
if
(
useGpu_
)
{
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
}
else
{
cpuIds_
=
ids_
;
}
if
(
useGpu_
)
{
IVector
::
resizeOrCreate
(
cpuIds_
,
ids
.
size
(),
false
);
cpuIds_
->
copyFrom
(
ids
.
data
(),
ids
.
size
());
}
else
{
cpuIds_
=
ids_
;
}
}
...
...
@@ -169,12 +145,14 @@ public:
const
Argument
&
outArg
,
const
IVectorPtr
&
ids
,
int
idIndex
,
int
idSize
)
{
int
idSize
,
bool
handleBackward
)
{
realLayer_
=
layer
;
realOutArg_
=
outArg
;
ids_
=
ids
;
idIndex_
=
idIndex
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
...
@@ -187,28 +165,8 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class
SequenceScatterAgentLayer
:
public
ScatterAgentLayer
{
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
public:
explicit
SequenceScatterAgentLayer
(
const
LayerConfig
&
config
)
:
ScatterAgentLayer
(
config
)
{}
virtual
~
SequenceScatterAgentLayer
()
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{
ScatterAgentLayer
::
backward
(
callback
);
}
void
forwardSequence
(
PassType
passType
);
};
}
// namespace paddle
paddle/gserver/layers/SequencePoolLayer.cpp
浏览文件 @
09f34c4b
...
...
@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer
::
forward
(
passType
);
const
Argument
&
input
=
getInput
(
0
);
CHECK
(
input
.
hasSeq
()
||
input
.
hasSubseq
())
<<
"Input should be a sequence or subsequence for layer "
<<
getName
();
newBatchSize_
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
size_t
dim
=
getSize
();
// check
...
...
paddle/gserver/tests/rnn_data_provider.py
浏览文件 @
09f34c4b
...
...
@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
0
])
words2
=
reduce
(
lambda
x
,
y
:
x
+
y
,
d
[
1
])
yield
words1
,
words2
,
d
[
2
]
###########################################################
data3
=
[
[[[
1
,
2
],
[
4
,
5
,
2
]],
[
1
,
2
],
0
],
[[[
0
,
2
],
[
2
,
5
],
[
0
,
1
,
2
]],
[
2
,
3
,
0
],
1
],
]
# Used for sequence_nest_mixed_inputs.conf
@
provider
(
input_types
=
[
integer_value_sub_sequence
(
10
),
integer_value_sequence
(
10
),
integer_value
(
2
)
],
should_shuffle
=
False
)
def
process_mixed
(
settings
,
file_name
):
for
d
in
data3
:
yield
d
paddle/gserver/tests/sequence_nest_rnn_multi_input.conf
浏览文件 @
09f34c4b
...
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_subseq
2
'
)
obj
=
'process_subseq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
@@ -57,7 +57,7 @@ def outer_step(wid, x):
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it, and will report error: In hierachical RNN, all out
# does not handle it, and will report error: In hierachical RNN, all out
# links should be from sequences now.
return
inner_rnn_output
...
...
paddle/gserver/tests/sequence_rnn_matched_inputs.py
0 → 100644
浏览文件 @
09f34c4b
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
subseq
,
seq
,
nonseq
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
seq
,
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
subseq
,
expand_layer
(
seq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_SEQUENCE
),
expand_layer
(
nonseq
,
expand_as
=
subseq
,
expand_level
=
ExpandLevel
.
FROM_NO_SEQUENCE
),
StaticInput
(
encoding
)
])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_mixed_inputs.py
0 → 100644
浏览文件 @
09f34c4b
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.trainer_config_helpers
import
*
######################## data source ################################
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_mixed'
)
settings
(
batch_size
=
2
,
learning_rate
=
0.01
)
######################## network configure ################################
dict_dim
=
10
word_dim
=
2
hidden_dim
=
2
label_dim
=
2
data1
=
data_layer
(
name
=
"word1"
,
size
=
dict_dim
)
data2
=
data_layer
(
name
=
"word2"
,
size
=
dict_dim
)
label
=
data_layer
(
name
=
"label"
,
size
=
label_dim
)
encoding
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def
outer_step
(
subseq
,
seq
,
nonseq
,
encoding
):
outer_mem
=
memory
(
name
=
"outer_rnn_state"
,
size
=
hidden_dim
)
def
inner_step
(
data1
,
data2
,
label
):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
subseq
=
embedding_layer
(
input
=
data1
,
size
=
word_dim
)
seq
=
embedding_layer
(
input
=
data2
,
size
=
word_dim
)
nonseq
=
embedding_layer
(
input
=
label
,
size
=
word_dim
)
print_layer
(
input
=
[
data1
,
seq
,
label
,
inner_mem
])
out
=
fc_layer
(
input
=
[
subseq
,
seq
,
nonseq
,
inner_mem
],
size
=
hidden_dim
,
act
=
TanhActivation
(),
bias_attr
=
True
,
name
=
'inner_rnn_state'
)
return
out
decoder
=
recurrent_group
(
step
=
inner_step
,
name
=
'inner'
,
input
=
[
subseq
,
StaticInput
(
seq
),
nonseq
])
last
=
last_seq
(
name
=
"outer_rnn_state"
,
input
=
decoder
)
context
=
simple_attention
(
encoded_sequence
=
encoding
,
encoded_proj
=
encoding
,
decoder_state
=
last
)
return
context
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
input
=
[
data1
,
data2
,
StaticInput
(
label
),
StaticInput
(
encoding
)])
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
act
=
SoftmaxActivation
(),
bias_attr
=
True
)
outputs
(
classification_cost
(
input
=
prob
,
label
=
label
))
paddle/gserver/tests/sequence_rnn_multi_input.conf
浏览文件 @
09f34c4b
...
...
@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2
(
train_list
=
'gserver/tests/Sequence/dummy.list'
,
test_list
=
None
,
module
=
'rnn_data_provider'
,
obj
=
'process_seq
2
'
)
obj
=
'process_seq'
)
settings
(
batch_size
=
2
,
learning_rate
=
0
.
01
)
...
...
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
09f34c4b
...
...
@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
TEST
(
RecurrentGradientMachine
,
rnn_mixed_input
)
{
for
(
bool
useGpu
:
{
false
,
true
})
{
test
(
"gserver/tests/sequence_rnn_mixed_inputs.py"
,
"gserver/tests/sequence_rnn_matched_inputs.py"
,
1e-6
,
useGpu
);
}
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
...
...
paddle/math/Vector.cpp
浏览文件 @
09f34c4b
...
...
@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
setSync(useGpu); \
if (useGpu) { \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
} else { \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
} \
} while (0)
...
...
@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case
DATA_AT_GPU
:
CHECK
(
gpuVectorT_
);
this
->
resizeOrCreate
(
gpuVectorT_
->
getSize
(),
false
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
,
HPPL_STREAM_DEFAULT
);
cpuVectorT_
->
copyFrom
(
*
gpuVectorT_
);
setSync
(
SYNCED
);
break
;
case
DATA_AT_CPU
:
...
...
@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case
DATA_AT_CPU
:
CHECK
(
cpuVectorT_
);
this
->
resizeOrCreate
(
cpuVectorT_
->
getSize
(),
true
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
,
HPPL_STREAM_DEFAULT
);
gpuVectorT_
->
copyFrom
(
*
cpuVectorT_
);
setSync
(
SYNCED
);
break
;
case
DATA_AT_GPU
:
...
...
paddle/parameter/Argument.h
浏览文件 @
09f34c4b
...
...
@@ -149,6 +149,7 @@ struct Argument {
:
getBatchSize
();
}
bool
hasSeq
()
const
{
return
sequenceStartPositions
!=
nullptr
;
}
bool
hasSubseq
()
const
{
return
subSequenceStartPositions
!=
nullptr
;
}
const
int
*
getCpuStartPositions
()
const
{
...
...
paddle/trainer/tests/test_recurrent_machine_generation.cpp
浏览文件 @
09f34c4b
...
...
@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool
beam_search
)
{
FLAGS_config_args
=
beam_search
?
"beam_search=1"
:
"beam_search=0"
;
for
(
auto
useGpu
:
useGpuConfs
)
{
LOG
(
INFO
)
<<
configFile
<<
" useGpu="
<<
useGpu
<<
" beam_search="
<<
beam_search
;
testGeneration
(
configFile
,
useGpu
,
hasSubseq
,
expRetFile
);
}
};
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
09f34c4b
...
...
@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin
(
name
)
g_current_submodel
.
is_recurrent_layer_group
=
True
g_current_submodel
.
reversed
=
seq_reversed
g_current_submodel
.
target_inlinkid
=
-
1
in_links_count
=
0
for
linkid
,
link
in
enumerate
(
in_links
):
if
isinstance
(
link
,
basestring
):
name
=
link
has_subseq
=
False
else
:
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
# assign target_inlinkid according to target_inlinkname
if
target_inlinkname
==
name
:
g_current_submodel
.
target_inlinkid
=
linkid
if
in_links_count
==
0
:
in_links_has_subseq
=
has_subseq
else
:
config_assert
(
in_links_has_subseq
==
has_subseq
,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count
+=
1
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
layer
=
g_layer_map
[
layer_name
]
if
has_subseq
:
SequenceScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
else
:
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
ScatterAgentLayer
(
name
=
name
,
size
=
layer
.
size
)
pair
=
g_current_submodel
.
in_links
.
add
()
pair
.
layer_name
=
layer_name
pair
.
link_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
has_subseq
=
has_subseq
@
config_func
def
RecurrentLayerGroupSetOutLink
(
link
):
if
isinstance
(
link
,
basestring
):
name
=
link
has_subseq
=
False
else
:
name
=
link
.
link_name
has_subseq
=
link
.
has_subseq
layer_name
=
MakeLayerNameInParentSubmodel
(
name
)
pair
=
g_current_submodel
.
out_links
.
add
()
pair
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
pair
.
link_name
=
layer_name
pair
.
has_subseq
=
has_subseq
def
RecurrentLayerGroupSetGenerator
(
generator
=
None
):
...
...
@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
generator
=
None
,
target_inlinkname
=
""
,
seq_reversed
=
False
):
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
,
target_inlinkname
)
RecurrentLayerGroupWithoutOutLinksBegin
(
name
,
in_links
,
seq_reversed
)
for
link
in
out_links
:
RecurrentLayerGroupSetOutLink
(
link
)
...
...
@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
agent_name
=
GetLayerBaseName
(
pair
.
link_name
)
if
prev_submodel
.
HasField
(
"generator"
):
DataLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
elif
pair
.
has_subseq
:
SequenceGatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
else
:
GatherAgentLayer
(
name
=
agent_name
,
size
=
layer
.
size
)
...
...
@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
name
,
'agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_agent'
)
class
SequenceAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceAgentLayer
,
self
).
__init__
(
name
,
'sequence_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'gather_agent'
)
class
GatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
...
...
@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
name
,
'scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_gather_agent'
)
class
SequenceGatherAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceGatherAgentLayer
,
self
).
__init__
(
name
,
'sequence_gather_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'sequence_scatter_agent'
)
class
SequenceScatterAgentLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
device
=
None
):
super
(
SequenceScatterAgentLayer
,
self
).
__init__
(
name
,
'sequence_scatter_agent'
,
size
,
inputs
=
[],
device
=
device
)
@
config_layer
(
'multiplex'
)
class
MultiplexLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
size
,
device
=
None
):
...
...
@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
@
config_func
def
Link
(
name
,
has_subseq
=
False
,
):
def
Link
(
name
,
has_subseq
=
False
):
"""
Still keeping has_subseq for backward compatibility
"""
link_config
=
LinkConfig
()
link_config
.
link_name
=
name
link_config
.
has_subseq
=
has_subseq
return
link_config
...
...
@@ -2341,20 +2297,13 @@ def Memory(name,
config_assert
(
name
is
not
None
,
"name needs cannot be None"
)
memory_name
=
name
+
"+delay1"
agent_name
=
memory_name
if
is_sequence
:
config_assert
(
boot_layer
is
not
None
,
"there must be boot_layer in network when is_sequence = True"
)
agent_layer
=
SequenceAgentLayer
(
agent_name
,
size
)
else
:
agent_layer
=
AgentLayer
(
agent_name
,
size
)
agent_layer
=
AgentLayer
(
agent_name
,
size
)
config_assert
(
g_current_submodel
.
is_recurrent_layer_group
,
'Memory should be used in recurrent layer group only'
)
memory
=
g_current_submodel
.
memories
.
add
()
if
name
is
not
None
:
memory
.
layer_name
=
MakeLayerNameInSubmodel
(
name
)
memory
.
link_name
=
MakeLayerNameInSubmodel
(
agent_name
)
memory
.
is_sequence
=
is_sequence
options
=
sum
((
boot_layer
is
not
None
,
bool
(
boot_bias
),
boot_with_const_id
is
not
None
))
config_assert
(
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
09f34c4b
...
...
@@ -311,18 +311,6 @@ class LayerOutput(object):
self
.
outputs
=
outputs
self
.
reverse
=
reverse
def
__repr__
(
self
):
"""
Disable __repr__ for debug reason. Will be implemented when release
"""
assert
False
,
"this method should not be invoked"
def
__str__
(
self
):
"""
Disable __str__ for debug reason. Will be implemented when release
"""
assert
False
,
"this method should not be invoked"
def
set_input
(
self
,
input
):
"""
Set the input for a memory layer. Can only be used for memory layer
...
...
@@ -2944,7 +2932,7 @@ def memory(name,
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:param is_seq:
DEPRECATED.
is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None
...
...
@@ -2971,7 +2959,6 @@ def memory(name,
memory_name
=
Memory
(
name
,
size
,
is_sequence
=
is_seq
,
boot_layer
=
boot_layer
.
name
if
boot_layer
is
not
None
else
None
,
boot_bias
=
boot_bias
,
boot_bias_active_type
=
boot_bias_active_type
.
name
,
...
...
@@ -3318,19 +3305,21 @@ class StaticInput(object):
"""
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
"""
def
__init__
(
self
,
input
,
is_seq
=
False
,
size
=
None
):
assert
isinstance
(
input
,
LayerOutput
)
self
.
input
=
input
self
.
is_seq
=
is_seq
assert
input
.
size
is
not
None
or
size
is
not
None
assert
input
.
size
is
not
None
if
size
is
not
None
:
input
.
size
=
size
assert
input
.
size
=
=
size
class
SubsequenceInput
(
objec
t
):
def
SubsequenceInput
(
inpu
t
):
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
...
...
@@ -3339,11 +3328,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
"""
def
__init__
(
self
,
input
):
assert
isinstance
(
input
,
LayerOutput
)
assert
input
.
size
is
not
None
self
.
input
=
input
return
input
@
wrap_name_default
(
"recurrent_group"
)
...
...
@@ -3407,7 +3392,8 @@ def recurrent_group(step,
input sequence in a reverse order.
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
...
...
@@ -3429,46 +3415,21 @@ def recurrent_group(step,
model_type
(
'recurrent_nn'
)
def
is_single_input
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
\
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
StaticInput
)
if
is_single_input
(
input
):
input
=
[
input
]
assert
isinstance
(
input
,
collections
.
Sequence
)
def
is_in_links
(
x
):
return
isinstance
(
x
,
LayerOutput
)
or
isinstance
(
x
,
SubsequenceInput
)
return
isinstance
(
x
,
LayerOutput
)
in_links
=
filter
(
is_in_links
,
input
)
def
targetInlink_in_inlinks
():
for
inlink
in
in_links
:
if
isinstance
(
inlink
,
SubsequenceInput
):
if
targetInlink
==
inlink
.
input
:
return
True
elif
targetInlink
==
inlink
:
return
True
return
False
assert
(
targetInlink
==
None
or
targetInlink_in_inlinks
())
targetInlinkName
=
None
if
targetInlink
==
None
\
else
targetInlink
.
name
if
isinstance
(
targetInlink
,
LayerOutput
)
\
else
targetInlink
.
input
.
name
contains_sub_seq
=
[
False
]
def
map_in_links
(
x
):
if
isinstance
(
x
,
SubsequenceInput
):
contains_sub_seq
[
0
]
=
True
return
Link
(
name
=
x
.
input
.
name
,
has_subseq
=
True
)
else
:
return
x
.
name
RecurrentLayerGroupWithoutOutLinksBegin
(
name
=
name
,
in_links
=
map
(
map_in_links
,
in_links
),
seq_reversed
=
reverse
,
target_inlinkname
=
targetInlinkName
)
in_links
=
map
(
lambda
x
:
x
.
name
,
in_links
),
seq_reversed
=
reverse
)
in_args
=
[]
has_LayerOutput
=
False
for
each_input
in
input
:
...
...
@@ -3476,21 +3437,13 @@ def recurrent_group(step,
if
isinstance
(
each_input
,
LayerOutput
):
in_args
.
append
(
each_input
)
has_LayerOutput
=
True
elif
isinstance
(
each_input
,
SubsequenceInput
):
in_args
.
append
(
each_input
.
input
)
has_LayerOutput
=
True
else
:
else
:
# StaticInput
mem_name
=
"__%s_memory__"
%
each_input
.
input
.
name
mem
=
memory
(
name
=
mem_name
,
is_seq
=
each_input
.
is_seq
,
name
=
None
,
size
=
each_input
.
input
.
size
,
boot_layer
=
each_input
.
input
)
with
mixed_layer
(
name
=
mem_name
,
size
=
each_input
.
input
.
size
,
act
=
IdentityActivation
())
as
mix
:
mix
+=
identity_projection
(
mem
)
mem
.
set_input
(
mem
)
in_args
.
append
(
mem
)
assert
(
is_generating
!=
has_LayerOutput
)
...
...
@@ -3503,10 +3456,7 @@ def recurrent_group(step,
for
ot
in
layer_outs
:
assert
isinstance
(
ot
,
LayerOutput
)
ot
.
reverse
=
reverse
if
contains_sub_seq
[
0
]:
RecurrentLayerGroupSetOutLink
(
Link
(
ot
.
name
,
has_subseq
=
True
))
else
:
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupSetOutLink
(
ot
.
name
)
RecurrentLayerGroupEnd
(
name
=
name
)
...
...
@@ -5608,13 +5558,13 @@ def row_conv_layer(input,
to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are:
.. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
浏览文件 @
09f34c4b
...
...
@@ -256,19 +256,15 @@ sub_models {
memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__simple_gru_1___recurrent_group"
...
...
@@ -280,18 +276,14 @@ sub_models {
memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
浏览文件 @
09f34c4b
...
...
@@ -341,24 +341,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_1___recurrent_group"
...
...
@@ -373,23 +368,18 @@ sub_models {
memories {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
浏览文件 @
09f34c4b
...
...
@@ -155,7 +155,7 @@ layers {
}
layers {
name: "sub_seq_input@__recurrent_group_2__"
type: "s
equence_s
catter_agent"
type: "scatter_agent"
size: 100
active_type: ""
}
...
...
@@ -182,7 +182,7 @@ layers {
}
layers {
name: "rnn_subseq_forward"
type: "
sequence_
gather_agent"
type: "gather_agent"
size: 200
active_type: ""
}
...
...
@@ -618,19 +618,15 @@ sub_models {
memories {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_1__"
...
...
@@ -642,19 +638,15 @@ sub_models {
memories {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_2__"
...
...
@@ -666,19 +658,15 @@ sub_models {
memories {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
is_sequence: false
}
in_links {
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
has_subseq: true
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_0___recurrent_group"
...
...
@@ -693,24 +681,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__gru_group_0___recurrent_group"
...
...
@@ -722,19 +705,15 @@ sub_models {
memories {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
...
...
@@ -746,18 +725,14 @@ sub_models {
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
python/paddle/v2/layer.py
浏览文件 @
09f34c4b
...
...
@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
else
:
extra_layers
=
[]
layer_names
=
__get_used_layers__
(
output_layers
+
extra_layers
)
layer_names
=
__get_used_layers__
(
list
(
output_layers
)
+
list
(
extra_layers
)
)
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
.
add
(
'root'
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录