Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
aeb2d848
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aeb2d848
编写于
9月 16, 2016
作者:
H
Haonan
提交者:
GitHub
9月 16, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #76 from emailweixu/fix_RecurrentGradientMachine
Further fix for memory of RecurrentGradientMachine
上级
a9d327bd
9d12ca95
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
233 addition
and
129 deletion
+233
-129
paddle/cuda/src/hl_cuda_matrix.cu
paddle/cuda/src/hl_cuda_matrix.cu
+1
-0
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+59
-56
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+1
-5
paddle/gserver/layers/PrintLayer.cpp
paddle/gserver/layers/PrintLayer.cpp
+58
-0
paddle/gserver/tests/sequence_nest_rnn.conf
paddle/gserver/tests/sequence_nest_rnn.conf
+4
-3
paddle/gserver/tests/sequence_rnn.conf
paddle/gserver/tests/sequence_rnn.conf
+3
-3
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
+8
-5
paddle/parameter/Argument.cpp
paddle/parameter/Argument.cpp
+24
-41
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+22
-14
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+8
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+43
-2
python/paddle/trainer_config_helpers/tests/layers_test_config.py
...paddle/trainer_config_helpers/tests/layers_test_config.py
+2
-0
未找到文件。
paddle/cuda/src/hl_cuda_matrix.cu
浏览文件 @
aeb2d848
...
@@ -19,6 +19,7 @@ limitations under the License. */
...
@@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh"
#include "hl_matrix_apply.cuh"
#include "hl_sequence.h"
#include "hl_sequence.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
DEFINE_MATRIX_UNARY_OP
(
Zero
,
a
=
0
);
DEFINE_MATRIX_UNARY_OP
(
Zero
,
a
=
0
);
DEFINE_MATRIX_TERNARY_PARAMETER_OP
(
_add
,
TWO_PARAMETER
,
c
=
p1
*
a
+
p2
*
b
);
DEFINE_MATRIX_TERNARY_PARAMETER_OP
(
_add
,
TWO_PARAMETER
,
c
=
p1
*
a
+
p2
*
b
);
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
aeb2d848
...
@@ -434,23 +434,25 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -434,23 +434,25 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
}
}
}
}
seqLengthAndStart_
.
clear
();
info_
.
clear
();
info_
.
clear
();
info_
.
resize
(
inFrameLines_
.
size
());
info_
.
resize
(
inFrameLines_
.
size
());
seqLengthAndStart_
.
resize
(
inFrameLines_
.
size
());
seqInfos_
.
clear
();
seqInfos_
.
resize
(
inFrameLines_
.
size
());
{
{
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
// if shareInlinkInfo, only calculate info of the first inlink
// if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink
// else, calculate info for each inlink
if
(
shareInlinkInfo
)
{
if
(
shareInlinkInfo
)
{
input
.
getSeqLengthAndStart
(
&
seqLengthAndStart_
[
0
],
&
maxSequenceLength_
);
input
.
getSeqInfo
(
&
seqInfos_
[
0
]);
maxSequenceLength_
=
seqInfos_
[
0
][
0
].
topLevelLength
;
createInFrameInfo
(
0
,
input
,
passType
);
createInFrameInfo
(
0
,
input
,
passType
);
}
else
{
}
else
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
i
++
)
{
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
const
Argument
&
input1
=
inFrameLines_
[
i
].
inLayer
->
getOutput
();
input1
.
getSeq
LengthAndStart
(
&
seqLengthAndStart_
[
i
],
input1
.
getSeq
Info
(
&
seqInfos_
[
i
]);
&
maxSequenceLength_
)
;
maxSequenceLength_
=
seqInfos_
[
i
][
0
].
topLevelLength
;
createInFrameInfo
(
i
,
input1
,
passType
);
createInFrameInfo
(
i
,
input1
,
passType
);
}
}
}
}
...
@@ -614,7 +616,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
...
@@ -614,7 +616,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
* for all realLayer of inFrameLines one time.
* for all realLayer of inFrameLines one time.
*/
*/
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlink
s_i
d
,
void
RecurrentGradientMachine
::
createInFrameInfo
(
int
inlink
I
d
,
const
Argument
&
input
,
const
Argument
&
input
,
PassType
passType
)
{
PassType
passType
)
{
bool
hasSubseq
=
input
.
hasSubseq
();
bool
hasSubseq
=
input
.
hasSubseq
();
...
@@ -622,66 +624,67 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
...
@@ -622,66 +624,67 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
size_t
numSequences
=
input
.
getNumSequences
();
size_t
numSequences
=
input
.
getNumSequences
();
std
::
vector
<
int
>
allIds
;
std
::
vector
<
int
>
allIds
;
auto
&
seqInfo
=
seqInfos_
[
inlinkId
];
numSeqs_
.
clear
();
numSeqs_
.
clear
();
Info
*
inlink_info
=
&
info_
[
inlinks_id
];
Info
*
inlinkInfo
=
&
info_
[
inlinkId
];
inlink_info
->
idIndex
.
clear
();
inlinkInfo
->
idIndex
.
clear
();
inlink_info
->
idIndex
.
push_back
(
0
);
// first idIndex = 0
inlinkInfo
->
idIndex
.
push_back
(
0
);
// first idIndex = 0
std
::
vector
<
int
>
sequenceStartPositions
;
const
int
*
subSequenceStartPositions
=
nullptr
;
if
(
hasSubseq
)
{
// for sequenceScatterAgentLayer
if
(
hasSubseq
)
{
// for sequenceScatterAgentLayer
// numSubSequences : all sentences within all samples(batch)
subSequenceStartPositions
=
size_t
numSubSequences
=
input
.
getNumSubSequences
();
input
.
subSequenceStartPositions
->
getData
(
false
);
std
::
vector
<
int
>
sequenceStartPositions
;
inlinkInfo
->
seqStartPosIndex
.
clear
();
inlink_info
->
seqStartPosIndex
.
clear
();
inlinkInfo
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
inlink_info
->
seqStartPosIndex
.
push_back
(
0
);
// first seqStartPosIndex = 0
}
// maxSequenceLength_: max number of sentences(subseq) in allsamples
// maxSequenceLength_: max topLevelLength in allsamples
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
if
(
hasSubseq
)
{
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
sequenceStartPositions
.
push_back
(
0
);
// first element = 0
int
numSeqs
=
0
;
for
(
size_t
j
=
0
;
j
<
numSubSequences
;
++
j
)
{
// for each sentence
// seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if
(
std
::
get
<
3
>
(
seqLengthAndStart_
[
inlinks_id
][
j
])
==
i
)
{
++
numSeqs
;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int
subSeqStart
=
std
::
get
<
1
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
int
subSeqLength
=
std
::
get
<
0
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
for
(
int
k
=
subSeqStart
;
k
<
subSeqStart
+
subSeqLength
;
++
k
)
{
allIds
.
push_back
(
k
);
}
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
subSeqLength
);
}
}
inlink_info
->
idIndex
.
push_back
(
allIds
.
size
());
inlink_info
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
}
}
// inFrameLine create sequenceStartPositions one time
int
numSeqs
=
0
;
CHECK_EQ
(
sequenceStartPositions
.
size
(),
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
maxSequenceLength_
+
numSubSequences
)
;
int
seqLength
=
seqInfo
[
j
].
topLevelLength
;
CHECK_EQ
(
inlink_info
->
seqStartPosIndex
.
size
(),
if
(
i
>=
seqLength
)
{
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
))
;
break
;
createSeqPos
(
sequenceStartPositions
,
&
inlink_info
->
sequenceStartPositions
);
}
}
else
{
// for scatterAgentLayer
++
numSeqs
;
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
if
(
hasSubseq
)
{
int
numSeqs
=
0
;
int
subSeqStart
=
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
]
;
for
(
size_t
j
=
0
;
j
<
numSequences
;
++
j
)
{
int
subSeqEnd
=
int
seqLength
=
std
::
get
<
0
>
(
seqLengthAndStart_
[
inlinks_id
][
j
])
;
subSequenceStartPositions
[
seqInfo
[
j
].
subSeqStart
+
i
+
1
]
;
if
(
i
>=
seqLength
)
{
for
(
int
k
=
subSeqStart
;
k
<
subSeqEnd
;
++
k
)
{
break
;
allIds
.
push_back
(
k
)
;
}
}
++
numSeqs
;
sequenceStartPositions
.
push_back
(
sequenceStartPositions
.
back
()
+
int
seqStart
=
std
::
get
<
1
>
(
seqLengthAndStart_
[
inlinks_id
][
j
]);
subSeqEnd
-
subSeqStart
);
}
else
{
int
seqStart
=
seqInfo
[
j
].
seqStart
;
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
allIds
.
push_back
(
reversed_
?
(
seqStart
+
seqLength
-
1
-
i
)
:
(
seqStart
+
i
));
:
(
seqStart
+
i
));
}
}
inlink_info
->
idIndex
.
push_back
(
allIds
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
}
}
inlinkInfo
->
idIndex
.
push_back
(
allIds
.
size
());
numSeqs_
.
push_back
(
numSeqs
);
if
(
hasSubseq
)
{
inlinkInfo
->
seqStartPosIndex
.
push_back
(
sequenceStartPositions
.
size
());
}
}
if
(
hasSubseq
)
{
// inFrameLine create sequenceStartPositions one time
CHECK_EQ
(
sequenceStartPositions
.
size
(),
maxSequenceLength_
+
input
.
getNumSubSequences
());
CHECK_EQ
(
inlinkInfo
->
seqStartPosIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
createSeqPos
(
sequenceStartPositions
,
&
inlinkInfo
->
sequenceStartPositions
);
}
}
// copy and check scatterId
// copy and check scatterId
copyScattedId
(
allIds
,
&
inlink
_i
nfo
->
allIds
,
input
.
getBatchSize
());
copyScattedId
(
allIds
,
&
inlink
I
nfo
->
allIds
,
input
.
getBatchSize
());
CHECK_EQ
(
inlink
_i
nfo
->
idIndex
.
size
(),
CHECK_EQ
(
inlink
I
nfo
->
idIndex
.
size
(),
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
static_cast
<
size_t
>
(
maxSequenceLength_
+
1
));
}
}
...
@@ -701,7 +704,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
...
@@ -701,7 +704,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
const
int
*
starts
=
input
.
sequenceStartPositions
->
getData
(
false
);
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
// memory info adopt info of inlinks[0]
// memory info adopt info of inlinks[0]
int
seqId
=
s
td
::
get
<
2
>
(
seqLengthAndStart_
[
0
][
i
])
;
int
seqId
=
s
eqInfos_
[
0
][
i
].
seqId
;
for
(
int
k
=
starts
[
seqId
];
k
<
starts
[
seqId
+
1
];
++
k
)
{
for
(
int
k
=
starts
[
seqId
];
k
<
starts
[
seqId
+
1
];
++
k
)
{
allIds
.
push_back
(
k
);
allIds
.
push_back
(
k
);
}
}
...
@@ -713,7 +716,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
...
@@ -713,7 +716,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
}
else
{
// for scatterAgentLayer
}
else
{
// for scatterAgentLayer
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
allIds
.
push_back
(
s
td
::
get
<
2
>
(
seqLengthAndStart_
[
0
][
i
])
);
allIds
.
push_back
(
s
eqInfos_
[
0
][
i
].
seqId
);
}
}
}
}
// copy and check scatterId
// copy and check scatterId
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
aeb2d848
...
@@ -337,11 +337,7 @@ protected:
...
@@ -337,11 +337,7 @@ protected:
// data) or has more than i subsequences (for subsequence data)
// data) or has more than i subsequences (for subsequence data)
std
::
vector
<
int
>
numSeqs_
;
std
::
vector
<
int
>
numSeqs_
;
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
std
::
vector
<
std
::
vector
<
Argument
::
SeqInfo
>>
seqInfos_
;
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std
::
vector
<
std
::
vector
<
std
::
tuple
<
int
,
int
,
int
,
int
>>>
seqLengthAndStart_
;
// the id of inlink which share info with outlinks
// the id of inlink which share info with outlinks
int
targetInfoInlinkId_
;
int
targetInfoInlinkId_
;
...
...
paddle/gserver/layers/PrintLayer.cpp
0 → 100644
浏览文件 @
aeb2d848
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace
paddle
{
class
PrintLayer
:
public
Layer
{
public:
explicit
PrintLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
)
{}
};
void
PrintLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
for
(
size_t
i
=
0
;
i
!=
inputLayers_
.
size
();
++
i
)
{
const
auto
&
argu
=
getInput
(
i
);
const
std
::
string
&
name
=
inputLayers_
[
i
]
->
getName
();
if
(
argu
.
value
)
{
std
::
ostringstream
os
;
argu
.
value
->
print
(
os
);
LOG
(
INFO
)
<<
"layer="
<<
name
<<
" value matrix:
\n
"
<<
os
.
str
();
}
if
(
argu
.
ids
)
{
std
::
ostringstream
os
;
argu
.
ids
->
print
(
os
,
argu
.
ids
->
getSize
());
LOG
(
INFO
)
<<
"layer="
<<
name
<<
" ids vector:
\n
"
<<
os
.
str
();
}
if
(
auto
startPos
=
argu
.
sequenceStartPositions
)
{
std
::
ostringstream
os
;
startPos
->
getVector
(
false
)
->
print
(
os
,
startPos
->
getSize
());
LOG
(
INFO
)
<<
"layer="
<<
name
<<
" sequence pos vector:
\n
"
<<
os
.
str
();
}
if
(
auto
subStartPos
=
argu
.
subSequenceStartPositions
)
{
std
::
ostringstream
os
;
subStartPos
->
getVector
(
false
)
->
print
(
os
,
subStartPos
->
getSize
());
LOG
(
INFO
)
<<
"layer="
<<
name
<<
" sub-sequence pos vector:
\n
"
<<
os
.
str
();
}
}
}
REGISTER_LAYER
(
print
,
PrintLayer
);
}
// namespace paddle
paddle/gserver/tests/sequence_nest_rnn.conf
浏览文件 @
aeb2d848
...
@@ -42,14 +42,16 @@ def outer_step(x):
...
@@ -42,14 +42,16 @@ def outer_step(x):
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
inner_mem
=
memory
(
name
=
"inner_rnn_state"
,
size
=
hidden_dim
,
size
=
hidden_dim
,
boot_layer
=
outer_mem
)
boot_layer
=
outer_mem
)
return
fc_layer
(
input
=[
y
,
inner_mem
],
out
=
fc_layer
(
input
=[
y
,
inner_mem
],
size
=
hidden_dim
,
size
=
hidden_dim
,
act
=
TanhActivation
(),
act
=
TanhActivation
(),
bias_attr
=
True
,
bias_attr
=
True
,
name
=
"inner_rnn_state"
)
name
=
"inner_rnn_state"
)
return
out
inner_rnn_output
=
recurrent_group
(
inner_rnn_output
=
recurrent_group
(
step
=
inner_step
,
step
=
inner_step
,
name
=
"inner"
,
input
=
x
)
input
=
x
)
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
last
=
last_seq
(
input
=
inner_rnn_output
,
name
=
"outer_rnn_state"
)
...
@@ -60,11 +62,10 @@ def outer_step(x):
...
@@ -60,11 +62,10 @@ def outer_step(x):
return
inner_rnn_output
return
inner_rnn_output
out
=
recurrent_group
(
out
=
recurrent_group
(
name
=
"outer"
,
step
=
outer_step
,
step
=
outer_step
,
input
=
SubsequenceInput
(
emb
))
input
=
SubsequenceInput
(
emb
))
value_printer_evaluator
(
input
=
out
)
rep
=
last_seq
(
input
=
out
)
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
input
=
rep
,
...
...
paddle/gserver/tests/sequence_rnn.conf
浏览文件 @
aeb2d848
...
@@ -35,18 +35,18 @@ emb = embedding_layer(input=data, size=word_dim)
...
@@ -35,18 +35,18 @@ emb = embedding_layer(input=data, size=word_dim)
def
step
(
y
):
def
step
(
y
):
mem
=
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
mem
=
memory
(
name
=
"rnn_state"
,
size
=
hidden_dim
)
return
fc_layer
(
input
=[
y
,
mem
],
out
=
fc_layer
(
input
=[
y
,
mem
],
size
=
hidden_dim
,
size
=
hidden_dim
,
act
=
TanhActivation
(),
act
=
TanhActivation
(),
bias_attr
=
True
,
bias_attr
=
True
,
name
=
"rnn_state"
)
name
=
"rnn_state"
)
return
out
out
=
recurrent_group
(
out
=
recurrent_group
(
name
=
"rnn"
,
step
=
step
,
step
=
step
,
input
=
emb
)
input
=
emb
)
value_printer_evaluator
(
input
=
out
)
rep
=
last_seq
(
input
=
out
)
rep
=
last_seq
(
input
=
out
)
prob
=
fc_layer
(
size
=
label_dim
,
prob
=
fc_layer
(
size
=
label_dim
,
input
=
rep
,
input
=
rep
,
...
...
paddle/gserver/tests/test_RecurrentGradientMachine.cpp
浏览文件 @
aeb2d848
...
@@ -92,7 +92,7 @@ void CalCost(const string& conf, const string& dir, real* cost,
...
@@ -92,7 +92,7 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir
(
dir
.
c_str
());
rmDir
(
dir
.
c_str
());
}
}
void
test
(
const
string
&
conf1
,
const
string
&
conf2
)
{
void
test
(
const
string
&
conf1
,
const
string
&
conf2
,
double
eps
)
{
int
num_passes
=
5
;
int
num_passes
=
5
;
real
*
cost1
=
new
real
[
num_passes
];
real
*
cost1
=
new
real
[
num_passes
];
const
string
dir1
=
"gserver/tests/t1"
;
const
string
dir1
=
"gserver/tests/t1"
;
...
@@ -104,8 +104,9 @@ void test(const string& conf1, const string& conf2) {
...
@@ -104,8 +104,9 @@ void test(const string& conf1, const string& conf2) {
for
(
int
i
=
0
;
i
<
num_passes
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num_passes
;
i
++
)
{
LOG
(
INFO
)
<<
"num_passes: "
<<
i
<<
", cost1="
<<
cost1
[
i
]
LOG
(
INFO
)
<<
"num_passes: "
<<
i
<<
", cost1="
<<
cost1
[
i
]
<<
", cost2="
<<
cost2
[
i
];
<<
", cost2="
<<
cost2
[
i
]
ASSERT_NEAR
(
cost1
[
i
],
cost2
[
i
],
1e-3
);
<<
", diff="
<<
std
::
abs
(
cost1
[
i
]
-
cost2
[
i
]);
ASSERT_NEAR
(
cost1
[
i
],
cost2
[
i
],
eps
);
}
}
delete
[]
cost1
;
delete
[]
cost1
;
delete
[]
cost2
;
delete
[]
cost2
;
...
@@ -113,12 +114,14 @@ void test(const string& conf1, const string& conf2) {
...
@@ -113,12 +114,14 @@ void test(const string& conf1, const string& conf2) {
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
TEST
(
RecurrentGradientMachine
,
HasSubSequence
)
{
test
(
"gserver/tests/sequence_layer_group.conf"
,
test
(
"gserver/tests/sequence_layer_group.conf"
,
"gserver/tests/sequence_nest_layer_group.conf"
);
"gserver/tests/sequence_nest_layer_group.conf"
,
1e-5
);
}
}
TEST
(
RecurrentGradientMachine
,
rnn
)
{
TEST
(
RecurrentGradientMachine
,
rnn
)
{
test
(
"gserver/tests/sequence_rnn.conf"
,
test
(
"gserver/tests/sequence_rnn.conf"
,
"gserver/tests/sequence_nest_rnn.conf"
);
"gserver/tests/sequence_nest_rnn.conf"
,
0
);
}
}
...
...
paddle/parameter/Argument.cpp
浏览文件 @
aeb2d848
...
@@ -477,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus,
...
@@ -477,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus,
}
}
}
}
void
Argument
::
getSeqLengthAndStart
(
void
Argument
::
getSeqInfo
(
std
::
vector
<
SeqInfo
>*
seqInfo
)
const
{
std
::
vector
<
std
::
tuple
<
int
,
int
,
int
,
int
>>*
seqLengthAndStart
,
int
*
maxSequenceLength
)
const
{
const
int
*
starts
=
sequenceStartPositions
->
getData
(
false
);
const
int
*
starts
=
sequenceStartPositions
->
getData
(
false
);
if
(
hasSubseq
())
{
const
int
*
subStarts
=
hasSubseq
()
size_t
numSubSequences
=
getNumSubSequences
();
?
subSequenceStartPositions
->
getData
(
false
)
:
nullptr
;
(
*
seqLengthAndStart
).
reserve
(
numSubSequences
);
size_t
numSequences
=
getNumSequences
();
const
int
*
subStarts
=
subSequenceStartPositions
->
getData
(
false
);
seqInfo
->
reserve
(
numSequences
);
int
seqIndex
=
0
;
int
subSeqEnd
=
0
;
int
subSeqIndex
=
0
;
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
*
maxSequenceLength
=
0
;
SeqInfo
info
;
for
(
size_t
i
=
0
;
i
<
numSubSequences
;
++
i
)
{
info
.
seqStart
=
starts
[
i
];
if
(
subStarts
[
i
]
==
starts
[
seqIndex
])
{
info
.
subLevelLength
=
starts
[
i
+
1
]
-
starts
[
i
];
subSeqIndex
=
0
;
info
.
seqId
=
i
;
(
*
seqLengthAndStart
)
if
(
hasSubseq
())
{
.
push_back
(
std
::
make_tuple
<
int
,
int
,
int
,
int
>
(
info
.
subSeqStart
=
subSeqEnd
;
subStarts
[
i
+
1
]
-
subStarts
[
i
],
(
int
)
subStarts
[
i
],
while
(
subStarts
[
subSeqEnd
]
<
starts
[
i
+
1
])
{
(
int
)
seqIndex
,
(
int
)
subSeqIndex
));
++
subSeqEnd
;
++
subSeqIndex
;
++
seqIndex
;
}
else
if
(
subStarts
[
i
]
<
starts
[
seqIndex
])
{
(
*
seqLengthAndStart
)
.
push_back
(
std
::
make_tuple
<
int
,
int
,
int
,
int
>
(
subStarts
[
i
+
1
]
-
subStarts
[
i
],
(
int
)
subStarts
[
i
],
(
int
)
seqIndex
-
1
,
(
int
)
subSeqIndex
));
++
subSeqIndex
;
}
}
// maxSequenceLength_ = 1 + max(subSeqIndex) in each Seq.
info
.
topLevelLength
=
subSeqEnd
-
info
.
subSeqStart
;
if
(
*
maxSequenceLength
<
std
::
get
<
3
>
((
*
seqLengthAndStart
)[
i
]))
}
else
{
*
maxSequenceLength
=
std
::
get
<
3
>
((
*
seqLengthAndStart
)[
i
]);
info
.
topLevelLength
=
info
.
subLevelLength
;
}
info
.
subSeqStart
=
0
;
// not used
*
maxSequenceLength
+=
1
;
}
else
{
size_t
numSequences
=
getNumSequences
();
(
*
seqLengthAndStart
).
reserve
(
numSequences
);
for
(
size_t
i
=
0
;
i
<
numSequences
;
++
i
)
{
(
*
seqLengthAndStart
)
.
push_back
(
std
::
make_tuple
<
int
,
int
,
int
,
int
>
(
starts
[
i
+
1
]
-
starts
[
i
],
(
int
)
starts
[
i
],
(
int
)
i
,
(
int
)
i
));
}
}
std
::
sort
((
*
seqLengthAndStart
).
begin
(),
(
*
seqLengthAndStart
).
end
(),
seqInfo
->
push_back
(
info
);
std
::
greater
<
std
::
tuple
<
int
,
int
,
int
,
int
>>
());
*
maxSequenceLength
=
std
::
get
<
0
>
((
*
seqLengthAndStart
)[
0
]);
}
}
std
::
sort
(
seqInfo
->
begin
(),
seqInfo
->
end
(),
[](
const
SeqInfo
&
a
,
const
SeqInfo
&
b
)
{
return
a
.
topLevelLength
>
b
.
topLevelLength
;
});
}
}
void
Argument
::
checkSubset
()
const
{
void
Argument
::
checkSubset
()
const
{
...
...
paddle/parameter/Argument.h
浏览文件 @
aeb2d848
...
@@ -253,21 +253,29 @@ struct Argument {
...
@@ -253,21 +253,29 @@ struct Argument {
static
void
splitByDataId
(
const
std
::
vector
<
Argument
>&
argus
,
static
void
splitByDataId
(
const
std
::
vector
<
Argument
>&
argus
,
std
::
vector
<
std
::
vector
<
Argument
>>*
arguGroups
);
std
::
vector
<
std
::
vector
<
Argument
>>*
arguGroups
);
struct
SeqInfo
{
// Equal to sequence length for sequence data
// Equal to number of subsequences for subsequence data
int
topLevelLength
;
int
seqStart
;
int
seqId
;
// Equal to topLevelLength for sequence data
// Equal to sum of the length of subsequences for subsequence data
int
subLevelLength
;
// Only used for subsequence data, start position of this sequence
// is subSequenceStartPositions, i.e.
// subSequenceStartPositions[subSeqStart] == seqStart
int
subSeqStart
;
};
/*
/*
Get Sequence Length, startPositions and max Length according to input
Get SeqInfo for each sequence of this argument
1. For sequence data:
Elements in *seqInfo are sorted by topLevelLength in descending order
Each tuple is (seq_length, seq_start, seq_id, seq_id)
*/
The tuples are sorted according to seq_length or subseq_length
void
getSeqInfo
(
std
::
vector
<
SeqInfo
>*
segInfo
)
const
;
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/
void
getSeqLengthAndStart
(
std
::
vector
<
std
::
tuple
<
int
,
int
,
int
,
int
>>*
seqLengthAndStart
,
int
*
maxSequenceLength
)
const
;
/*
/*
Check Whether sequenceStartPositions is subset of
Check Whether sequenceStartPositions is subset of
subSequenceStartPositions.
subSequenceStartPositions.
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
aeb2d848
...
@@ -1408,6 +1408,14 @@ class SelectiveFCLayer(LayerBase):
...
@@ -1408,6 +1408,14 @@ class SelectiveFCLayer(LayerBase):
input_index
,
psize
,
dims
,
sparse
,
format
)
input_index
,
psize
,
dims
,
sparse
,
format
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
self
.
create_bias_parameter
(
bias
,
self
.
config
.
size
)
@
config_layer
(
'print'
)
class
PrintLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
):
super
(
PrintLayer
,
self
).
__init__
(
name
,
'print'
,
0
,
inputs
)
@
config_layer
(
'data'
)
@
config_layer
(
'data'
)
class
DataLayer
(
LayerBase
):
class
DataLayer
(
LayerBase
):
def
__init__
(
def
__init__
(
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
aeb2d848
...
@@ -21,7 +21,6 @@ from .evaluators import *
...
@@ -21,7 +21,6 @@ from .evaluators import *
from
.poolings
import
MaxPooling
,
AvgPooling
,
BasePoolingType
from
.poolings
import
MaxPooling
,
AvgPooling
,
BasePoolingType
from
.attrs
import
*
from
.attrs
import
*
from
.default_decorators
import
*
from
.default_decorators
import
*
try
:
try
:
import
cPickle
as
pickle
import
cPickle
as
pickle
except
ImportError
:
except
ImportError
:
...
@@ -52,7 +51,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
...
@@ -52,7 +51,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'multi_binary_label_cross_entropy'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
'rank_cost'
,
'lambda_cost'
,
'huber_cost'
,
'block_expand_layer'
,
'out_prod_layer'
,
'block_expand_layer'
,
'out_prod_layer'
,
'print_layer'
]
]
...
@@ -108,6 +107,8 @@ class LayerType(object):
...
@@ -108,6 +107,8 @@ class LayerType(object):
LINEAR_COMBINATION_LAYER
=
"convex_comb"
LINEAR_COMBINATION_LAYER
=
"convex_comb"
BLOCK_EXPAND
=
"blockexpand"
BLOCK_EXPAND
=
"blockexpand"
PRINT_LAYER
=
"print"
CTC_LAYER
=
"ctc"
CTC_LAYER
=
"ctc"
CRF_LAYER
=
"crf"
CRF_LAYER
=
"crf"
CRF_DECODING_LAYER
=
"crf_decoding"
CRF_DECODING_LAYER
=
"crf_decoding"
...
@@ -202,6 +203,25 @@ ERROR_CLIPPING = 'error_clipping_threshold'
...
@@ -202,6 +203,25 @@ ERROR_CLIPPING = 'error_clipping_threshold'
DROPOUT
=
'drop_rate'
DROPOUT
=
'drop_rate'
def
check_input
(
input
):
"""
Check input is a LayerOutput or list of LayerOutput or tuple of LayerOutput
if is a LayerOutput,
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: list of LayerOutput
:rtype: list of LayerOutput
"""
if
isinstance
(
input
,
LayerOutput
):
return
[
LayerOutput
]
assert
isinstance
(
input
,
list
)
for
inp
in
input
:
assert
isinstance
(
inp
,
LayerOutput
)
return
list
(
input
)
def
layer_support
(
*
attrs
):
def
layer_support
(
*
attrs
):
def
decorator
(
method
):
def
decorator
(
method
):
@
functools
.
wraps
(
method
)
@
functools
.
wraps
(
method
)
...
@@ -730,6 +750,27 @@ def fc_layer(input, size, act=None, name=None,
...
@@ -730,6 +750,27 @@ def fc_layer(input, size, act=None, name=None,
size
=
size
)
size
=
size
)
@
wrap_name_default
(
"print"
)
def
print_layer
(
input
,
name
=
None
):
"""
Print the output value of input layers. This layer is useful for debugging.
:param name: The Layer Name.
:type name: basestring
:param input: The input layer. Could be a list/tuple of input layer.
:type input: LayerOutput|list|tuple
:return: No return
"""
check_input
(
input
)
Layer
(
name
=
name
,
type
=
LayerType
.
PRINT_LAYER
,
inputs
=
[
l
.
name
for
l
in
input
],
)
LayerOutput
(
name
,
LayerType
.
PRINT_LAYER
,
input
)
@
wrap_name_default
(
"seq_pooling"
)
@
wrap_name_default
(
"seq_pooling"
)
@
wrap_bias_attr_default
(
has_bias
=
False
)
@
wrap_bias_attr_default
(
has_bias
=
False
)
@
wrap_param_default
([
'pooling_type'
],
default_factory
=
lambda
_
:
MaxPooling
())
@
wrap_param_default
([
'pooling_type'
],
default_factory
=
lambda
_
:
MaxPooling
())
...
...
python/paddle/trainer_config_helpers/tests/layers_test_config.py
浏览文件 @
aeb2d848
...
@@ -34,6 +34,8 @@ out = fc_layer(input=[cos1, cos3, linear_comb, z],
...
@@ -34,6 +34,8 @@ out = fc_layer(input=[cos1, cos3, linear_comb, z],
size
=
num_classes
,
size
=
num_classes
,
act
=
SoftmaxActivation
())
act
=
SoftmaxActivation
())
print_layer
(
input
=
[
out
])
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
outputs
(
classification_cost
(
out
,
data_layer
(
name
=
"label"
,
size
=
num_classes
)))
# for ctc
# for ctc
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录