Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b66af308
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b66af308
编写于
8月 31, 2017
作者:
C
Cao Ying
提交者:
GitHub
8月 31, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3639 from lcy-seso/cross_entropy_over_beam
add a cross_entropy_over_beam layer.
上级
01f10e64
36f0aa73
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
1283 addition
and
10 deletion
+1283
-10
paddle/gserver/layers/CrossEntropyOverBeam.cpp
paddle/gserver/layers/CrossEntropyOverBeam.cpp
+393
-0
paddle/gserver/layers/CrossEntropyOverBeam.h
paddle/gserver/layers/CrossEntropyOverBeam.h
+135
-0
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+7
-0
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+353
-0
paddle/parameter/Argument.cpp
paddle/parameter/Argument.cpp
+1
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+16
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+120
-7
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
.../paddle/trainer_config_helpers/tests/configs/file_list.sh
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
...ts/configs/protostr/test_cross_entropy_over_beam.protostr
+207
-0
python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
...fig_helpers/tests/configs/test_cross_entropy_over_beam.py
+45
-0
python/paddle/v2/inference.py
python/paddle/v2/inference.py
+5
-2
未找到文件。
paddle/gserver/layers/CrossEntropyOverBeam.cpp
0 → 100644
浏览文件 @
b66af308
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "CrossEntropyOverBeam.h"
namespace
paddle
{
void
CostForOneSequence
::
calValidExpandStep
()
{
validExpansionCount_
=
0
;
goldAsExtraPath_
=
true
;
for
(
size_t
i
=
0
;
i
<
beams_
->
expansionCount
;
++
i
)
{
real
gold
=
static_cast
<
real
>
(
beams_
->
gold
[
i
]);
if
(
i
)
{
real
*
start
=
beams_
->
candidateIds
[
i
-
1
]
->
getData
();
goldRowIds_
[
i
]
=
std
::
count_if
(
start
,
start
+
goldRowIds_
[
i
-
1
]
*
beamSize_
+
goldColIds_
[
i
-
1
],
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
}
else
{
goldRowIds_
[
i
]
=
0
;
}
real
*
start
=
beams_
->
candidateIds
[
i
]
->
getData
()
+
goldRowIds_
[
i
]
*
beamSize_
;
real
*
findEnd
=
std
::
find
(
start
,
start
+
beamSize_
,
gold
);
validExpansionCount_
++
;
if
(
start
+
beamSize_
==
findEnd
)
return
;
goldColIds_
[
i
]
=
findEnd
-
start
;
}
if
(
goldColIds_
[
beams_
->
expansionCount
-
1
]
!=
-
1
)
goldAsExtraPath_
=
false
;
}
size_t
CostForOneSequence
::
initLastExpansion
()
{
int
beamId
=
validExpansionCount_
-
1
;
const
MatrixPtr
candidates
=
beams_
->
candidateIds
[
beamId
];
size_t
height
=
candidates
->
getHeight
();
/* initialization the last expansion. */
size_t
pathCount
=
std
::
count_if
(
candidates
->
getData
(),
candidates
->
getData
()
+
height
*
beamSize_
,
[](
const
real
&
val
)
{
return
val
!=
-
1
;
});
/*
* if the gold sequence falls off the beam during search, add the gold
* sequence as the last path into the all expanded candidates.
*/
if
(
goldAsExtraPath_
)
goldIdsInFinalExpansion_
=
pathCount
++
;
pathRowIdsInEachBeam_
.
clear
();
pathRowIdsInEachBeam_
.
resize
(
validExpansionCount_
,
std
::
vector
<
int
>
(
pathCount
,
0
));
parentIdsInBeam_
.
clear
();
parentIdsInBeam_
.
resize
(
pathCount
,
0
);
if
(
goldAsExtraPath_
)
{
/* add gold sequence into the total expansion. */
pathRowIdsInEachBeam_
[
beamId
].
back
()
=
beams_
->
gold
[
beamId
]
+
getSeqStartPos
(
beamId
,
goldRowIds_
[
validExpansionCount_
-
1
]);
parentIdsInBeam_
.
back
()
=
goldRowIds_
[
validExpansionCount_
-
1
];
}
else
{
size_t
goldOffset
=
goldRowIds_
[
beamId
]
*
beamSize_
+
goldColIds_
[
beamId
];
goldIdsInFinalExpansion_
=
std
::
count_if
(
candidates
->
getData
(),
candidates
->
getData
()
+
goldOffset
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
}
/*
* TODO(caoying): fix this, store the indices of selected candidate
* paths into Argument.ids
*/
real
*
ids
=
candidates
->
getData
();
size_t
curIdx
=
0
;
for
(
size_t
i
=
0
;
i
<
height
;
++
i
)
{
int
basePos
=
getSeqStartPos
(
beamId
,
i
);
for
(
size_t
j
=
0
;
j
<
beamSize_
;
++
j
)
{
int
id
=
ids
[
i
*
beamSize_
+
j
];
if
(
id
==
-
1
)
continue
;
pathRowIdsInEachBeam_
[
beamId
][
curIdx
]
=
id
+
basePos
;
parentIdsInBeam_
[
curIdx
++
]
=
i
;
}
}
return
pathCount
;
}
void
CostForOneSequence
::
constructTotalExpansion
()
{
/*
* construct the entire expanded beam by begining with the last search
* in which gold falls off the beam.
*/
size_t
totalPathCount
=
initLastExpansion
();
for
(
int
beamId
=
validExpansionCount_
-
2
;
beamId
>=
0
;
--
beamId
)
{
const
MatrixPtr
candidates
=
beams_
->
candidateIds
[
beamId
];
real
*
ids
=
candidates
->
getData
();
int
lastParentIdInBeam
=
-
1
;
int
basePos
=
-
1
;
for
(
size_t
i
=
0
;
i
<
(
goldAsExtraPath_
?
totalPathCount
-
1
:
totalPathCount
);
++
i
)
{
int
id
=
ids
[
parentIdsInBeam_
[
i
]];
int
parentRowId
=
std
::
div
(
parentIdsInBeam_
[
i
],
beamSize_
).
quot
;
if
(
parentIdsInBeam_
[
i
]
!=
lastParentIdInBeam
)
basePos
=
getSeqStartPos
(
beamId
,
parentRowId
);
pathRowIdsInEachBeam_
[
beamId
][
i
]
=
id
+
basePos
;
lastParentIdInBeam
=
parentIdsInBeam_
[
i
];
parentIdsInBeam_
[
i
]
=
parentRowId
;
if
(
goldAsExtraPath_
)
pathRowIdsInEachBeam_
[
beamId
][
totalPathCount
-
1
]
=
beams_
->
gold
[
beamId
]
+
getSeqStartPos
(
beamId
,
goldRowIds_
[
beamId
]);
}
}
}
real
CostForOneSequence
::
globallyNormalizedScore
()
{
expandedPathScores_
.
resize
(
validExpansionCount_
);
Matrix
::
resizeOrCreate
(
softmaxOut_
,
1
,
pathRowIdsInEachBeam_
[
0
].
size
(),
false
,
false
);
softmaxOut_
->
zeroMem
();
MatrixPtr
tmp
=
Matrix
::
create
(
softmaxOut_
->
getData
(),
softmaxOut_
->
getWidth
(),
1
,
false
,
false
);
for
(
size_t
i
=
0
;
i
<
validExpansionCount_
;
++
i
)
{
Matrix
::
resizeOrCreate
(
expandedPathScores_
[
i
],
pathRowIdsInEachBeam_
[
i
].
size
(),
1
,
false
,
false
);
expandedPathScores_
[
i
]
->
zeroMem
();
IVectorPtr
rowIds
=
IVector
::
create
(
pathRowIdsInEachBeam_
[
i
].
data
(),
pathRowIdsInEachBeam_
[
i
].
size
(),
false
);
expandedPathScores_
[
i
]
->
selectRows
(
*
(
beams_
->
scores
[
i
]),
*
rowIds
);
tmp
->
add
(
*
expandedPathScores_
[
i
]);
}
softmaxOut_
->
softmax
(
*
softmaxOut_
);
return
-
std
::
log
(
softmaxOut_
->
getData
()[
goldIdsInFinalExpansion_
]);
}
real
CostForOneSequence
::
forward
()
{
calValidExpandStep
();
constructTotalExpansion
();
return
globallyNormalizedScore
();
}
void
CostForOneSequence
::
backward
()
{
/*
* when softmax layer is the output layer, and it is combined with
* cross-entropy as cost. The derivate with regard to softmax's input
* is simply:
*
* grad_i = softmax_out_i - target_i,
*
* and here hard label is used.
*/
softmaxOut_
->
getData
()[
goldIdsInFinalExpansion_
]
-=
1.
;
MatrixPtr
tmp
=
Matrix
::
create
(
softmaxOut_
->
getData
(),
softmaxOut_
->
getWidth
(),
1
,
false
,
false
);
for
(
size_t
i
=
0
;
i
<
validExpansionCount_
;
++
i
)
{
IVectorPtr
rowIds
=
IVector
::
create
(
pathRowIdsInEachBeam_
[
i
].
data
(),
pathRowIdsInEachBeam_
[
i
].
size
(),
false
);
/*
beams_->scoreGrad[i] has been intialized outside this class, this
class only keeps a pointer pointing to the original input gradients,
so here does not need to allocate or initalize the memory.
*/
tmp
->
addToRows
(
*
beams_
->
scoreGrad
[
i
],
*
rowIds
);
}
}
REGISTER_LAYER
(
cross_entropy_over_beam
,
CrossEntropyOverBeam
);
bool
CrossEntropyOverBeam
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
/* Initialize the basic parent class */
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
0U
,
inputLayers_
.
size
()
%
3
)
<<
"Error input number."
;
beamExpanCount_
=
inputLayers_
.
size
()
/
3
;
candidateScores_
.
resize
(
beamExpanCount_
);
candidateScoreGrad_
.
resize
(
beamExpanCount_
);
candidateInBeam_
.
resize
(
beamExpanCount_
);
goldSequence_
.
resize
(
beamExpanCount_
);
gradToInputs_
.
resize
(
beamExpanCount_
);
setNeedSequenceInfo
(
false
);
return
true
;
}
void
CrossEntropyOverBeam
::
checkInputs
()
{
batchSize_
=
0
;
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
const
Argument
&
scores
=
getInput
(
i
*
3
);
const
Argument
&
selCandidates
=
getInput
(
i
*
3
+
1
);
const
Argument
&
goldSeq
=
getInput
(
i
*
3
+
2
);
if
(
i
)
{
CHECK
(
scores
.
hasSubseq
())
<<
"input "
<<
i
<<
" "
<<
inputLayers_
[
i
*
3
]
->
getName
()
<<
" should be a nested sequence"
;
CHECK_EQ
(
getInputValue
(
i
*
3
+
1
)
->
getWidth
(),
beamSize_
);
CHECK_EQ
(
scores
.
getNumSequences
(),
batchSize_
);
CHECK_EQ
(
scores
.
getNumSubSequences
(),
selCandidates
.
getBatchSize
());
}
else
{
CHECK
(
scores
.
hasSeq
())
<<
"input "
<<
i
<<
" "
<<
inputLayers_
[
i
]
->
getName
()
<<
" should be a sequence"
;
batchSize_
=
scores
.
getNumSequences
();
beamSize_
=
getInputValue
(
i
*
3
+
1
)
->
getWidth
();
CHECK_EQ
(
batchSize_
,
selCandidates
.
getBatchSize
());
}
CHECK_EQ
(
1U
,
scores
.
value
->
getWidth
());
CHECK_EQ
(
batchSize_
,
goldSeq
.
getBatchSize
());
}
}
void
CrossEntropyOverBeam
::
copyInputsToCpu
()
{
auto
copyValue
=
[](
const
MatrixPtr
&
src
,
MatrixPtr
&
trg
)
{
if
(
dynamic_cast
<
GpuMatrix
*>
(
src
.
get
()))
{
Matrix
::
resizeOrCreate
(
trg
,
src
->
getHeight
(),
src
->
getWidth
(),
false
,
false
);
trg
->
copyFrom
(
*
src
);
}
else
{
trg
=
std
::
move
(
src
);
}
};
auto
copyIds
=
[](
const
IVectorPtr
&
src
,
IVectorPtr
&
trg
)
{
if
(
dynamic_cast
<
GpuIVector
*>
(
src
.
get
()))
{
IVector
::
resizeOrCreate
(
trg
,
src
->
getSize
(),
false
);
trg
->
copyFrom
(
*
src
);
}
else
{
trg
=
std
::
move
(
src
);
}
};
beamSplitPos_
.
clear
();
beamSplitPos_
.
resize
(
batchSize_
,
std
::
vector
<
int
>
(
beamExpanCount_
,
0
));
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
copyValue
(
getInputValue
(
i
*
3
),
candidateScores_
[
i
]);
copyValue
(
getInputValue
(
i
*
3
+
1
),
candidateInBeam_
[
i
]);
copyIds
(
getInput
(
i
*
3
+
2
).
ids
,
goldSequence_
[
i
]);
if
(
i
)
{
ICpuGpuVectorPtr
seqInfo
=
getInput
(
i
*
3
).
sequenceStartPositions
;
const
int
*
seqStarts
=
seqInfo
->
getMutableData
(
false
);
ICpuGpuVectorPtr
subSeqInfo
=
getInput
(
i
*
3
).
subSequenceStartPositions
;
const
int
*
subSeqStarts
=
subSeqInfo
->
getMutableData
(
false
);
size_t
seqId
=
1
;
for
(
size_t
subSeqId
=
0
;
subSeqId
<
subSeqInfo
->
getSize
()
-
1
;
++
subSeqId
)
{
CHECK_LT
(
seqId
,
seqInfo
->
getSize
());
if
(
subSeqStarts
[
subSeqId
]
==
seqStarts
[
seqId
])
{
beamSplitPos_
[
seqId
][
i
]
=
beamSplitPos_
[
seqId
-
1
][
i
];
seqId
++
;
}
beamSplitPos_
[
seqId
-
1
][
i
]
++
;
}
}
else
{
for
(
size_t
j
=
0
;
j
<
batchSize_
;
++
j
)
beamSplitPos_
[
j
][
i
]
=
j
+
1
;
}
}
}
void
CrossEntropyOverBeam
::
splitBatchBeams
()
{
beamCosts_
.
resize
(
batchSize_
);
beamPerSeq_
.
resize
(
batchSize_
,
BeamExpansion
(
beamExpanCount_
));
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
int
*
seqStarts
=
getInput
(
i
*
3
).
sequenceStartPositions
->
getMutableData
(
false
);
int
*
subSeqStarts
=
nullptr
;
int
maxLen
=
0
;
if
(
i
)
{
subSeqStarts
=
getInput
(
i
*
3
).
subSequenceStartPositions
->
getMutableData
(
false
);
maxLen
=
getInput
(
i
*
3
).
subSequenceStartPositions
->
getSize
()
-
1
;
}
else
{
maxLen
=
getInput
(
i
).
sequenceStartPositions
->
getSize
()
-
1
;
}
for
(
size_t
j
=
0
;
j
<
batchSize_
;
++
j
)
{
beamPerSeq_
[
j
].
scores
[
i
]
=
Matrix
::
create
(
candidateScores_
[
i
]
->
getData
()
+
seqStarts
[
j
],
seqStarts
[
j
+
1
]
-
seqStarts
[
j
],
1
,
false
,
false
);
beamPerSeq_
[
j
].
scoreGrad
[
i
]
=
Matrix
::
create
(
candidateScoreGrad_
[
i
]
->
getData
()
+
seqStarts
[
j
],
seqStarts
[
j
+
1
]
-
seqStarts
[
j
],
1
,
false
,
false
);
int
offset
=
j
?
beamSplitPos_
[
j
-
1
][
i
]
:
0
;
int
height
=
beamSplitPos_
[
j
][
i
]
-
(
j
?
beamSplitPos_
[
j
-
1
][
i
]
:
0
);
CHECK_GE
(
maxLen
,
offset
+
height
);
beamPerSeq_
[
j
].
seqInfo
[
i
]
=
IVector
::
create
(
(
i
?
subSeqStarts
:
seqStarts
)
+
offset
,
height
+
1
,
false
);
beamPerSeq_
[
j
].
candidateIds
[
i
]
=
Matrix
::
create
(
candidateInBeam_
[
i
]
->
getData
()
+
offset
*
beamSize_
,
height
,
beamSize_
,
false
,
false
);
beamPerSeq_
[
j
].
gold
[
i
]
=
goldSequence_
[
i
]
->
getData
()[
j
];
CHECK_LE
(
beamPerSeq_
[
j
].
gold
[
i
],
seqStarts
[
j
+
1
]
-
seqStarts
[
j
]);
}
}
}
void
CrossEntropyOverBeam
::
resizeOutput
()
{
Matrix
::
resizeOrCreate
(
output_
.
value
,
batchSize_
,
1
,
false
,
false
);
output_
.
value
->
zeroMem
();
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
MatrixPtr
inGrad
=
getInputGrad
(
i
*
3
);
if
(
dynamic_cast
<
GpuMatrix
*>
(
inGrad
.
get
()))
{
Matrix
::
resizeOrCreate
(
candidateScoreGrad_
[
i
],
inGrad
->
getHeight
(),
inGrad
->
getWidth
(),
false
,
false
);
}
else
{
candidateScoreGrad_
[
i
]
=
std
::
move
(
inGrad
);
}
candidateScoreGrad_
[
i
]
->
zeroMem
();
}
}
void
CrossEntropyOverBeam
::
copyGradToGpu
(
size_t
copyCount
)
{
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
if
(
dynamic_cast
<
GpuMatrix
*>
(
getInputGrad
(
i
*
3
).
get
()))
getInputGrad
(
i
*
3
)
->
copyFrom
(
*
candidateScoreGrad_
[
i
]);
if
(
i
==
copyCount
-
1
)
break
;
}
}
void
CrossEntropyOverBeam
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
checkInputs
();
copyInputsToCpu
();
resizeOutput
();
splitBatchBeams
();
MatrixPtr
outputValue
=
getOutputValue
();
for
(
size_t
i
=
0
;
i
<
batchSize_
;
++
i
)
{
beamCosts_
[
i
].
setData
(
std
::
move
(
std
::
make_shared
<
BeamExpansion
>
(
beamPerSeq_
[
i
])),
beamSize_
);
outputValue
->
getData
()[
i
]
=
beamCosts_
[
i
].
forward
();
}
}
void
CrossEntropyOverBeam
::
backward
(
const
UpdateCallback
&
callback
)
{
for
(
size_t
i
=
0
;
i
<
batchSize_
;
++
i
)
{
beamCosts_
[
i
].
backward
();
copyGradToGpu
(
beamCosts_
[
i
].
getValidExpansionCount
());
}
}
}
// namespace paddle
paddle/gserver/layers/CrossEntropyOverBeam.h
0 → 100644
浏览文件 @
b66af308
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "CrossEntropyOverBeam.h"
#include "Layer.h"
namespace
paddle
{
/* This struct stores the beams in all search steps for a single sequence. */
struct
BeamExpansion
{
std
::
vector
<
MatrixPtr
>
scores
;
std
::
vector
<
IVectorPtr
>
seqInfo
;
std
::
vector
<
MatrixPtr
>
candidateIds
;
std
::
vector
<
int
>
gold
;
std
::
vector
<
MatrixPtr
>
scoreGrad
;
size_t
expansionCount
;
explicit
BeamExpansion
(
int
n
)
{
expansionCount
=
n
;
scores
.
resize
(
expansionCount
);
seqInfo
.
resize
(
expansionCount
);
candidateIds
.
resize
(
expansionCount
);
scoreGrad
.
resize
(
expansionCount
);
gold
.
resize
(
expansionCount
);
}
};
typedef
std
::
shared_ptr
<
BeamExpansion
>
BeamExpansionPtr
;
class
CostForOneSequence
{
public:
CostForOneSequence
()
:
beamSize_
(
0
),
validExpansionCount_
(
0
),
goldAsExtraPath_
(
false
)
{}
void
setData
(
const
BeamExpansionPtr
bPtr
,
size_t
beamSize
)
{
beams_
=
bPtr
;
beamSize_
=
beamSize
;
expandedPathScores_
.
clear
();
expandedPathScores_
.
resize
(
beams_
->
expansionCount
);
goldRowIds_
.
clear
();
goldRowIds_
.
resize
(
beams_
->
expansionCount
,
0
);
goldColIds_
.
clear
();
goldColIds_
.
resize
(
beams_
->
expansionCount
,
-
1
);
}
size_t
getValidExpansionCount
()
{
return
validExpansionCount_
;
}
real
forward
();
void
backward
();
private:
void
calValidExpandStep
();
void
constructTotalExpansion
();
size_t
initLastExpansion
();
real
globallyNormalizedScore
();
int
getSeqStartPos
(
size_t
beamId
,
size_t
rowId
)
{
CHECK_GT
(
beams_
->
seqInfo
[
beamId
]
->
getSize
()
-
1
,
rowId
);
int
*
starts
=
beams_
->
seqInfo
[
beamId
]
->
getData
();
return
starts
[
rowId
]
-
starts
[
0
];
}
size_t
beamSize_
;
size_t
validExpansionCount_
;
bool
goldAsExtraPath_
;
std
::
vector
<
int
>
goldRowIds_
;
std
::
vector
<
int
>
goldColIds_
;
BeamExpansionPtr
beams_
;
std
::
vector
<
std
::
vector
<
int
>>
pathRowIdsInEachBeam_
;
std
::
vector
<
int
>
parentIdsInBeam_
;
size_t
goldIdsInFinalExpansion_
;
std
::
vector
<
MatrixPtr
>
expandedPathScores_
;
MatrixPtr
softmaxOut_
;
};
class
CrossEntropyOverBeam
:
public
Layer
{
public:
explicit
CrossEntropyOverBeam
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
private:
void
checkInputs
();
void
copyInputsToCpu
();
void
resizeOutput
();
void
copyGradToGpu
(
size_t
copyCount
);
void
splitBatchBeams
();
size_t
beamExpanCount_
;
size_t
batchSize_
;
size_t
beamSize_
;
/*
* the process of constructing beams is not friendly to GPU, currently, this
* layer only runs on CPU, if any of its inputs is on GPU memory, then copy
* it to CPU memory.
*/
std
::
vector
<
MatrixPtr
>
candidateScores_
;
std
::
vector
<
MatrixPtr
>
candidateScoreGrad_
;
std
::
vector
<
MatrixPtr
>
candidateInBeam_
;
std
::
vector
<
MatrixPtr
>
gradToInputs_
;
std
::
vector
<
IVectorPtr
>
goldSequence_
;
std
::
vector
<
std
::
vector
<
int
>>
beamSplitPos_
;
/*
* split entire bath of beams into beam per sequnence and store the result
* into this member.
*/
std
::
vector
<
BeamExpansion
>
beamPerSeq_
;
/* beamCosts_ is used to propagate error in one sequence. */
std
::
vector
<
CostForOneSequence
>
beamCosts_
;
};
}
// namespace paddle
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
b66af308
...
...
@@ -34,6 +34,13 @@ add_unittest_without_exec(test_CRFLayerGrad
add_test
(
NAME test_CRFLayerGrad
COMMAND test_CRFLayerGrad
)
################ test_CrossEntropyOverBeam ####################
add_unittest_without_exec
(
test_CrossEntropyOverBeam
test_CrossEntropyOverBeamGrad.cpp
LayerGradUtil.cpp
)
add_test
(
NAME test_CrossEntropyOverBeam
COMMAND test_CrossEntropyOverBeam
)
################ test_SeqSliceLayerGrad ####################
add_unittest_without_exec
(
test_SeqSliceLayerGrad
test_SeqSliceLayerGrad.cpp
...
...
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
0 → 100644
浏览文件 @
b66af308
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <random>
#include <sstream>
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using
namespace
paddle
;
// NOLINT
DECLARE_int32
(
gpu_id
);
DECLARE_bool
(
thread_local_rand_use_global_seed
);
const
size_t
MAX_SEQ_NUM
=
23
;
const
size_t
MAX_SEQ_LEN
=
50
;
const
size_t
MAX_BEAM_SIZE
=
27
;
const
size_t
SEED
=
(
size_t
)(
time
(
NULL
));
struct
SingleBeamExpansion
{
vector
<
int
>
seqStartPos
;
vector
<
int
>
subSeqStartPos
;
vector
<
real
>
candidateScores
;
// TODO(caoying): store this into Argument.ids
vector
<
real
>
selectedIndices
;
vector
<
int
>
groundTruth
;
vector
<
size_t
>
inBeam
;
vector
<
int
>
rowIdxInBeam
;
vector
<
int
>
colIdxInBeam
;
void
resetGroundTruth
(
size_t
n
)
{
groundTruth
.
clear
();
groundTruth
.
resize
(
n
,
-
1
);
inBeam
.
clear
();
inBeam
.
resize
(
n
,
0
);
rowIdxInBeam
.
clear
();
rowIdxInBeam
.
resize
(
n
,
-
1
);
colIdxInBeam
.
clear
();
colIdxInBeam
.
resize
(
n
,
-
1
);
}
};
inline
float
randFloat
()
{
return
static_cast
<
float
>
(
rand
())
/
static_cast
<
float
>
(
RAND_MAX
);
}
void
genRand
(
real
*
numbers
,
size_t
n
)
{
default_random_engine
generator
;
uniform_real_distribution
<
real
>
distribution
(
0.0
,
1.0
);
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
numbers
[
i
]
=
distribution
(
generator
);
}
vector
<
real
>
randSampling
(
real
range
,
int
n
)
{
CHECK_GE
(
range
,
n
);
vector
<
real
>
num
(
range
);
iota
(
begin
(
num
),
end
(
num
),
0.
);
if
(
range
==
n
)
return
num
;
random_shuffle
(
begin
(
num
),
end
(
num
));
num
.
resize
(
n
);
sort
(
begin
(
num
),
end
(
num
));
return
num
;
}
void
genCandidateScores
(
bool
hasSubseq
,
size_t
beamSize
,
SingleBeamExpansion
&
prevBeam
,
SingleBeamExpansion
&
curBeam
)
{
vector
<
int
>&
seqStartPos
=
curBeam
.
seqStartPos
;
seqStartPos
.
resize
(
1
,
0
);
vector
<
int
>&
subSeqStartPos
=
curBeam
.
subSeqStartPos
;
subSeqStartPos
.
resize
(
1
,
0
);
srand
(
SEED
);
if
(
prevBeam
.
selectedIndices
.
size
())
{
if
(
prevBeam
.
subSeqStartPos
.
size
()
>
1
)
{
int
seqIdx
=
1
;
// samples in previous beam are nested sequences.
for
(
size_t
i
=
1
;
i
<
prevBeam
.
subSeqStartPos
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
beamSize
;
++
j
)
{
if
(
prevBeam
.
selectedIndices
[(
i
-
1
)
*
beamSize
+
j
]
==
-
1.
)
break
;
subSeqStartPos
.
push_back
(
1
+
(
rand
()
%
MAX_SEQ_LEN
)
+
subSeqStartPos
.
back
());
}
if
(
prevBeam
.
seqStartPos
[
seqIdx
]
==
prevBeam
.
subSeqStartPos
[
i
])
{
seqStartPos
.
push_back
(
subSeqStartPos
.
back
());
seqIdx
++
;
}
}
}
else
{
for
(
size_t
i
=
0
;
i
<=
prevBeam
.
selectedIndices
.
size
();
++
i
)
{
if
(
i
&&
i
%
beamSize
==
0
)
{
seqStartPos
.
push_back
(
subSeqStartPos
.
back
());
if
(
i
==
prevBeam
.
selectedIndices
.
size
())
break
;
}
if
(
prevBeam
.
selectedIndices
[
i
]
==
-
1.
)
continue
;
subSeqStartPos
.
push_back
(
subSeqStartPos
.
back
()
+
(
1
+
(
rand
()
%
MAX_SEQ_LEN
)));
}
}
}
else
{
// the first beam expansion
int
seqNum
=
1
+
(
rand
()
%
MAX_SEQ_NUM
);
for
(
int
i
=
0
;
i
<
seqNum
;
++
i
)
{
if
(
hasSubseq
)
{
for
(
size_t
j
=
0
;
j
<
1
+
(
rand
()
%
MAX_SEQ_NUM
);
++
j
)
subSeqStartPos
.
push_back
(
subSeqStartPos
.
back
()
+
(
1
+
(
rand
()
%
MAX_SEQ_LEN
)));
seqStartPos
.
push_back
(
subSeqStartPos
.
back
());
}
else
{
seqStartPos
.
push_back
(
seqStartPos
.
back
()
+
(
1
+
(
rand
()
%
MAX_SEQ_LEN
)));
}
}
}
size_t
totalSeqNum
=
hasSubseq
?
subSeqStartPos
.
back
()
:
seqStartPos
.
back
();
curBeam
.
candidateScores
.
resize
(
totalSeqNum
,
0.
);
genRand
(
curBeam
.
candidateScores
.
data
(),
totalSeqNum
);
}
void
genSelectedIndices
(
size_t
beamSize
,
vector
<
int
>&
seqStartPos
,
vector
<
real
>&
selectedIndices
)
{
size_t
selectedIdsCount
=
beamSize
*
(
seqStartPos
.
size
()
-
1
);
selectedIndices
.
resize
(
selectedIdsCount
,
-
1.
);
for
(
size_t
i
=
0
;
i
<
seqStartPos
.
size
()
-
1
;
++
i
)
{
int
seqLen
=
seqStartPos
[
i
+
1
]
-
seqStartPos
[
i
];
int
n
=
min
(
seqLen
,
static_cast
<
int
>
(
beamSize
));
vector
<
real
>
ids
=
randSampling
(
seqLen
,
n
);
memcpy
(
selectedIndices
.
data
()
+
i
*
beamSize
,
ids
.
data
(),
sizeof
(
real
)
*
ids
.
size
());
}
}
void
genGroundTruth
(
vector
<
SingleBeamExpansion
>&
beamExpansions
,
size_t
beamSize
)
{
SingleBeamExpansion
&
beam
=
beamExpansions
[
1
];
size_t
seqNum
=
beam
.
seqStartPos
.
size
()
-
1
;
for
(
size_t
i
=
2
;
i
<
beamExpansions
.
size
();
++
i
)
CHECK_EQ
(
seqNum
,
beamExpansions
[
i
].
seqStartPos
.
size
()
-
1
);
srand
(
SEED
);
// initialize the first beam.
beam
.
resetGroundTruth
(
seqNum
);
for
(
size_t
i
=
0
;
i
<
seqNum
;
++
i
)
{
if
(
randFloat
()
>
0.5
)
{
/*
* force the randomly generated label falls in the beam by chance 0.5.
* otherwise, when sequence length is relatively long and beam size is
* relatively small, the gold sequences falls off the beam at in the
* first search.
*/
real
*
begPos
=
beam
.
selectedIndices
.
data
()
+
i
*
beamSize
;
beam
.
colIdxInBeam
[
i
]
=
rand
()
%
count_if
(
begPos
,
begPos
+
beamSize
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
beam
.
groundTruth
[
i
]
=
beam
.
selectedIndices
[
i
*
beamSize
+
beam
.
colIdxInBeam
[
i
]];
beam
.
inBeam
[
i
]
=
1
;
}
else
{
int
label
=
rand
()
%
(
beam
.
seqStartPos
[
i
+
1
]
-
beam
.
seqStartPos
[
i
]);
beam
.
groundTruth
[
i
]
=
label
;
real
*
begPos
=
beam
.
selectedIndices
.
data
()
+
i
*
beamSize
;
real
*
endPos
=
begPos
+
beamSize
;
real
*
lblPos
=
find
(
begPos
,
endPos
,
real
(
label
));
if
(
lblPos
!=
endPos
)
{
beam
.
inBeam
[
i
]
=
1
;
beam
.
colIdxInBeam
[
i
]
=
lblPos
-
begPos
;
}
}
beam
.
rowIdxInBeam
[
i
]
=
i
;
}
// iterate over each beam expansions
for
(
size_t
i
=
2
;
i
<
beamExpansions
.
size
();
++
i
)
{
SingleBeamExpansion
&
curBeam
=
beamExpansions
[
i
];
SingleBeamExpansion
&
prevBeam
=
beamExpansions
[
i
-
1
];
curBeam
.
resetGroundTruth
(
seqNum
);
// iterate over each sequence
for
(
size_t
j
=
0
;
j
<
seqNum
;
++
j
)
{
if
(
!
prevBeam
.
inBeam
[
j
])
continue
;
// gold sequence falls in the beam in previous search.
real
*
begPos
=
prevBeam
.
selectedIndices
.
data
();
int
offset
=
prevBeam
.
rowIdxInBeam
[
j
]
*
beamSize
+
prevBeam
.
colIdxInBeam
[
j
];
curBeam
.
rowIdxInBeam
[
j
]
=
count_if
(
begPos
,
begPos
+
offset
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
if
(
randFloat
()
>
0.5
)
{
// force the randomly generated label falls in the beam by chance 0.5.
real
*
start
=
curBeam
.
selectedIndices
.
data
()
+
curBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
int
n
=
rand
()
%
count_if
(
start
,
start
+
beamSize
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
curBeam
.
colIdxInBeam
[
j
]
=
n
;
curBeam
.
groundTruth
[
j
]
=
*
(
start
+
n
);
curBeam
.
inBeam
[
j
]
=
1
;
}
else
{
CHECK_LE
(
curBeam
.
rowIdxInBeam
[
j
]
+
1
,
curBeam
.
subSeqStartPos
.
size
()
-
1
);
int
start
=
curBeam
.
subSeqStartPos
[
curBeam
.
rowIdxInBeam
[
j
]];
int
end
=
curBeam
.
subSeqStartPos
[
curBeam
.
rowIdxInBeam
[
j
]
+
1
];
CHECK_GT
(
size_t
(
end
),
size_t
(
start
));
int
label
=
rand
()
%
(
end
-
start
);
curBeam
.
groundTruth
[
j
]
=
label
;
real
*
findBeg
=
curBeam
.
selectedIndices
.
data
()
+
curBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
real
*
lblPos
=
find
(
findBeg
,
findBeg
+
beamSize
,
static_cast
<
real
>
(
label
));
if
(
lblPos
!=
(
findBeg
+
beamSize
))
{
curBeam
.
inBeam
[
j
]
=
1
;
curBeam
.
colIdxInBeam
[
j
]
=
lblPos
-
findBeg
;
}
}
}
}
}
void
genOneBeam
(
size_t
beamSize
,
bool
hasSubseq
,
SingleBeamExpansion
&
prevBeam
,
SingleBeamExpansion
&
curBeam
)
{
genCandidateScores
(
hasSubseq
,
beamSize
,
prevBeam
,
curBeam
);
genSelectedIndices
(
beamSize
,
hasSubseq
?
curBeam
.
subSeqStartPos
:
curBeam
.
seqStartPos
,
curBeam
.
selectedIndices
);
}
void
genRandomBeamExpansion
(
size_t
expansionCount
,
size_t
beamSize
,
vector
<
SingleBeamExpansion
>&
beamExpansions
)
{
beamExpansions
.
clear
();
beamExpansions
.
resize
(
expansionCount
+
1
);
// beamExpansions[0] is reserved.
for
(
size_t
i
=
1
;
i
<=
expansionCount
;
++
i
)
genOneBeam
(
beamSize
,
bool
(
i
-
1
),
beamExpansions
[
i
-
1
],
beamExpansions
[
i
]);
genGroundTruth
(
beamExpansions
,
beamSize
);
}
void
testCrossEntropyOverBeam
(
bool
useGpu
,
size_t
beamSize
,
vector
<
SingleBeamExpansion
>&
beams
)
{
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"cross_entropy_over_beam"
);
size_t
seqNum
=
0
;
for
(
size_t
i
=
1
;
i
<
beams
.
size
();
++
i
)
{
const
SingleBeamExpansion
&
beam
=
beams
[
i
];
// create scores for all the candidates
MatrixPtr
candidateScorePtr
=
Matrix
::
create
(
beam
.
candidateScores
.
size
(),
1
,
false
,
false
);
candidateScorePtr
->
copyFrom
(
beam
.
candidateScores
.
data
(),
beam
.
candidateScores
.
size
());
ostringstream
paramName
;
paramName
<<
"candidate_scores_"
<<
i
;
if
(
beam
.
subSeqStartPos
.
size
()
>
1
)
{
seqNum
=
beam
.
subSeqStartPos
.
size
()
-
1
;
config
.
inputDefs
.
push_back
({
INPUT_SELF_DEFINE_DATA
,
paramName
.
str
(),
candidateScorePtr
,
beam
.
seqStartPos
,
beam
.
subSeqStartPos
});
}
else
{
seqNum
=
beam
.
seqStartPos
.
size
()
-
1
;
config
.
inputDefs
.
push_back
({
INPUT_SELF_DEFINE_DATA
,
paramName
.
str
(),
candidateScorePtr
,
beam
.
seqStartPos
});
}
config
.
layerConfig
.
add_inputs
();
// create indices for the selected candidates
MatrixPtr
selectedCandidates
=
Matrix
::
create
(
seqNum
,
beamSize
,
false
,
false
);
selectedCandidates
->
copyFrom
(
beam
.
selectedIndices
.
data
(),
beam
.
selectedIndices
.
size
());
paramName
.
clear
();
paramName
<<
"selected_candidates_"
<<
i
;
config
.
inputDefs
.
push_back
(
{
INPUT_SELF_DEFINE_DATA
,
paramName
.
str
(),
selectedCandidates
});
config
.
layerConfig
.
add_inputs
();
// create the ground truth
paramName
.
clear
();
paramName
<<
"label_"
<<
i
;
config
.
inputDefs
.
push_back
(
{
INPUT_SELF_DEFINE_DATA
,
paramName
.
str
(),
beam
.
groundTruth
});
config
.
layerConfig
.
add_inputs
();
}
testLayerGrad
(
config
,
"cross_entropy_over_beam"
,
seqNum
,
false
,
useGpu
,
false
);
}
TEST
(
Layer
,
CrossEntropyOverBeam
)
{
LOG
(
INFO
)
<<
"SEED = "
<<
SEED
;
const
size_t
beamSize
=
1
+
rand
()
%
MAX_BEAM_SIZE
;
LOG
(
INFO
)
<<
"beamSize = "
<<
beamSize
;
// TODO(caoying): test with random beam expansions.
const
size_t
expansionCount
=
3
;
vector
<
SingleBeamExpansion
>
beams
;
genRandomBeamExpansion
(
expansionCount
,
beamSize
,
beams
);
for
(
bool
useGpu
:
{
false
,
true
})
testCrossEntropyOverBeam
(
useGpu
,
beamSize
,
beams
);
}
int
main
(
int
argc
,
char
**
argv
)
{
initMain
(
argc
,
argv
);
hl_start
();
hl_init
(
FLAGS_gpu_id
);
FLAGS_thread_local_rand_use_global_seed
=
true
;
srand
(
SEED
);
testing
::
InitGoogleTest
(
&
argc
,
argv
);
return
RUN_ALL_TESTS
();
}
paddle/parameter/Argument.cpp
浏览文件 @
b66af308
...
...
@@ -677,6 +677,7 @@ void Argument::reorganizeSeqInfo(
const
ICpuGpuVectorPtr
subSeqStartPos
,
std
::
vector
<
std
::
vector
<
int
>>&
reorganizedSeqInfo
)
{
CHECK
(
seqStartPos
);
reorganizedSeqInfo
.
clear
();
int
seqNum
=
seqStartPos
->
getSize
()
-
1
;
int
*
seqStarts
=
seqStartPos
->
getMutableData
(
false
);
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
b66af308
...
...
@@ -1607,6 +1607,21 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
self
.
config
.
softmax_selfnorm_alpha
=
softmax_selfnorm_alpha
@
config_layer
(
'cross_entropy_over_beam'
)
class
CrossEntropyOverBeamLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
config_assert
(
len
(
inputs
)
%
3
==
0
,
"Error input number."
)
super
(
CrossEntropyOverBeamLayer
,
self
).
__init__
(
name
,
'cross_entropy_over_beam'
,
0
,
inputs
,
**
xargs
)
input_num
=
len
(
inputs
)
/
3
for
i
in
range
(
input_num
):
input_layer
=
self
.
get_input_layer
(
i
*
3
)
config_assert
(
input_layer
.
size
==
1
,
(
"Inputs for this layer are made up of "
"several triples, in which the first one is scores over "
"all candidate paths, whose size should be equal to 1."
))
@
config_layer
(
'fc'
)
class
FCLayer
(
LayerBase
):
layer_type
=
'fc'
...
...
@@ -2268,6 +2283,7 @@ def define_cost(class_name, cost_type):
define_cost
(
'MultiClassCrossEntropy'
,
'multi-class-cross-entropy'
)
define_cost
(
'CrossEntropyOverBeamCostLayer'
,
'cross_entropy_over_beam'
)
define_cost
(
'RankingCost'
,
'rank-cost'
)
define_cost
(
'AucValidation'
,
'auc-validation'
)
define_cost
(
'PnpairValidation'
,
'pnpair-validation'
)
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
b66af308
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
functools
import
collections
import
inspect
...
...
@@ -106,6 +105,8 @@ __all__ = [
'nce_layer'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'BeamInput'
,
'cross_entropy_over_beam'
,
'multi_binary_label_cross_entropy'
,
'sum_cost'
,
'rank_cost'
,
...
...
@@ -225,6 +226,7 @@ class LayerType(object):
HUBER_CLASSIFICATION
=
'huber_classification'
CROSS_ENTROPY
=
'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM
=
'multi_class_cross_entropy_with_selfnorm'
CROSS_ENTROPY_OVER_BEAM
=
'cross_entropy_over_beam'
SOFT_BIN_CLASS_CROSS_ENTROPY
=
'soft_binary_class_cross_entropy'
MULTI_BIN_LABEL_CROSS_ENTROPY
=
'multi_binary_label_cross_entropy'
SUM_COST
=
'sum_cost'
...
...
@@ -4071,8 +4073,12 @@ def __cost_input__(input, label, weight=None):
"""
inputs and parents for cost layers.
"""
ipts
=
[
Input
(
input
.
name
),
Input
(
label
.
name
)]
parents
=
[
input
,
label
]
if
isinstance
(
input
,
LayerOutput
):
input
=
[
input
]
if
isinstance
(
label
,
LayerOutput
):
label
=
[
label
]
ipts
=
[
Input
(
ipt
.
name
)
for
ipt
in
(
input
+
label
)]
parents
=
[
ipt
for
ipt
in
(
input
+
label
)]
if
weight
is
not
None
:
assert
weight
.
size
==
1
ipts
.
append
(
Input
(
weight
.
name
))
...
...
@@ -5777,10 +5783,10 @@ def multi_binary_label_cross_entropy(input,
if
input
.
activation
is
None
or
\
not
isinstance
(
input
.
activation
,
SigmoidActivation
):
logger
.
log
(
logging
.
WARN
,
"%s is not recommend for multi_binary_label_cross_entropy's activation, "
"maybe the sigmoid is better"
%
repr
(
input
.
activation
))
logger
.
log
(
logging
.
WARN
,
(
"%s is not a recommended activation for "
"multi_binary_label_cross_entropy, sigmoid is better"
)
%
repr
(
input
.
activation
))
Layer
(
name
=
name
,
...
...
@@ -5795,6 +5801,113 @@ def multi_binary_label_cross_entropy(input,
size
=
1
)
class
BeamInput
(
object
):
"""
Define the input for cross_entropy_over_beam layer.
A beam is made up of a triple: the first one is scores over all
candidates; the second one is indices of top k selected candidates; the
third one is the index of ground truth, which is also always called
gold.
"""
def
__init__
(
self
,
candidate_scores
,
selected_candidates
,
gold
):
assert
isinstance
(
candidate_scores
,
LayerOutput
)
self
.
candidate_scores
=
candidate_scores
assert
candidate_scores
.
size
==
1
assert
isinstance
(
selected_candidates
,
LayerOutput
)
self
.
selected_candidates
=
selected_candidates
assert
isinstance
(
gold
,
LayerOutput
)
self
.
gold
=
gold
@
wrap_name_default
()
@
layer_support
()
def
cross_entropy_over_beam
(
input
,
name
=
None
):
"""
This layer is used in learning to search models, which is to solve complex
joint prediction problems based on learning to search through a
problem-defined search space.
Specifically, the learning to search process for this layer begins with
searching a target sequence from a nested sequence. In the first search
step, top beam size sequences with highest scores, indices of these top k
sequences in the original nested sequence, and the ground truth (also
called gold) altogether (a triple) make up of the first beam.
Then, several special positions, for example, start and end positions
that define meaningful segments are searched. In these searches, top k
positions with highest scores are selected, and then sequence, starting
from the selected starts till ends of the sequences (or a fixed position)
are taken to search next.
We call the possible top k results returned in one search the beam. This
search process can be repeated for pre-defined turns and leads to several
beam expansions.
Finally, the layer cross_entropy_over_beam takes all the beam expansions
which contain several candidate targets found along the multi-step search.
cross_entropy_over_beam calculates cross entropy over the expanded beams
which all the candidates in the beam as the normalized factor.
Note that, if gold falls off the beam at search step t, then the cost is
calculated over the beam at step t.
This cost layer always works together with kmax_sequence_score_layer,
sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
sub-search space.
The example usage is:
.. code-block:: python
cost = cross_entropy_over_beam(input=[
BeamInput(
candidate_scores=beam1_candidates,
selected_candidates=beam1_topk,
gold=gold1),
BeamInput(
candidate_scores=beam2_candidates,
selected_candidates=beam2_topk,
gold=gold2),
])
:param input: input beams for this layer.
:type input: BeamInput
:param name: input beams for this layer.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
if
isinstance
(
input
,
BeamInput
):
input
=
[
input
]
else
:
assert
isinstance
(
input
,
list
),
(
'input for cross_entropy_over_beam shold be a python list '
'of BeamInput object.'
)
for
ipt
in
input
:
assert
isinstance
(
ipt
,
BeamInput
),
(
'input for cross_entropy_over_beam '
'should be a BeamInput object.'
)
ipts
=
[]
parents
=
[]
for
beam
in
input
:
parents
+=
[
beam
.
candidate_scores
,
beam
.
selected_candidates
,
beam
.
gold
]
ipts
+=
[
beam
.
candidate_scores
.
name
,
beam
.
selected_candidates
.
name
,
beam
.
gold
.
name
]
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY_OVER_BEAM
,
inputs
=
ipts
)
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
parents
,
size
=
1
)
@
wrap_name_default
()
@
layer_support
()
def
smooth_l1_cost
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
layer_attr
=
None
):
...
...
python/paddle/trainer_config_helpers/tests/configs/file_list.sh
浏览文件 @
b66af308
...
...
@@ -9,6 +9,6 @@ test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
test_seq_slice_layer
)
test_seq_slice_layer
test_cross_entropy_over_beam
)
export
whole_configs
=(
test_split_datasource
)
python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
0 → 100644
浏览文件 @
b66af308
type: "nn"
layers {
name: "sentence_states"
type: "data"
size: 32
active_type: ""
}
layers {
name: "sentence_scores"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__kmax_sequence_score_layer_0__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "sentence_scores"
}
beam_size: 5
}
layers {
name: "__sub_nested_seq_layer_0__"
type: "sub_nested_seq"
size: 32
active_type: ""
inputs {
input_layer_name: "sentence_states"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_0__"
}
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 1
active_type: ""
inputs {
input_layer_name: "__sub_nested_seq_layer_0__"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_1__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "sentence_scores"
}
beam_size: 5
}
layers {
name: "__seq_slice_layer_0__"
type: "seq_slice"
size: 32
active_type: ""
inputs {
input_layer_name: "__sub_nested_seq_layer_0__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_1__"
}
select_first: true
}
layers {
name: "__fc_layer_1__"
type: "fc"
size: 1
active_type: ""
inputs {
input_layer_name: "__seq_slice_layer_0__"
input_parameter_name: "___fc_layer_1__.w0"
}
bias_parameter_name: "___fc_layer_1__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_2__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "__fc_layer_1__"
}
beam_size: 5
}
layers {
name: "sentences_ids"
type: "data"
size: 1
active_type: ""
}
layers {
name: "start_ids"
type: "data"
size: 1
active_type: ""
}
layers {
name: "end_ids"
type: "data"
size: 1
active_type: ""
}
layers {
name: "__cross_entropy_over_beam_0__"
type: "cross_entropy_over_beam"
active_type: ""
inputs {
input_layer_name: "sentence_scores"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_0__"
}
inputs {
input_layer_name: "sentences_ids"
}
inputs {
input_layer_name: "__fc_layer_0__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_1__"
}
inputs {
input_layer_name: "start_ids"
}
inputs {
input_layer_name: "__fc_layer_1__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_2__"
}
inputs {
input_layer_name: "end_ids"
}
}
parameters {
name: "___fc_layer_0__.w0"
size: 32
initial_mean: 0.0
initial_std: 0.176776695297
dims: 32
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__.wbias"
size: 1
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_1__.w0"
size: 32
initial_mean: 0.0
initial_std: 0.176776695297
dims: 32
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_1__.wbias"
size: 1
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: false
}
input_layer_names: "sentence_scores"
input_layer_names: "sentences_ids"
input_layer_names: "sentence_states"
input_layer_names: "start_ids"
input_layer_names: "end_ids"
output_layer_names: "__cross_entropy_over_beam_0__"
sub_models {
name: "root"
layer_names: "sentence_states"
layer_names: "sentence_scores"
layer_names: "__kmax_sequence_score_layer_0__"
layer_names: "__sub_nested_seq_layer_0__"
layer_names: "__fc_layer_0__"
layer_names: "__kmax_sequence_score_layer_1__"
layer_names: "__seq_slice_layer_0__"
layer_names: "__fc_layer_1__"
layer_names: "__kmax_sequence_score_layer_2__"
layer_names: "sentences_ids"
layer_names: "start_ids"
layer_names: "end_ids"
layer_names: "__cross_entropy_over_beam_0__"
input_layer_names: "sentence_scores"
input_layer_names: "sentences_ids"
input_layer_names: "sentence_states"
input_layer_names: "start_ids"
input_layer_names: "end_ids"
output_layer_names: "__cross_entropy_over_beam_0__"
is_recurrent_layer_group: false
}
python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
0 → 100644
浏览文件 @
b66af308
#!/usr/bin/env python
#coding=utf-8
from
paddle.trainer_config_helpers
import
*
beam_size
=
5
# the first beam expansion.
sentence_states
=
data_layer
(
name
=
"sentence_states"
,
size
=
32
)
sentence_scores
=
data_layer
(
name
=
"sentence_scores"
,
size
=
1
)
topk_sentence_ids
=
kmax_sequence_score_layer
(
input
=
sentence_scores
,
beam_size
=
beam_size
)
# the second beam expansion.
topk_sen
=
sub_nested_seq_layer
(
input
=
sentence_states
,
selected_indices
=
topk_sentence_ids
)
start_pos_scores
=
fc_layer
(
input
=
topk_sen
,
size
=
1
,
act
=
LinearActivation
())
topk_start_pos_ids
=
kmax_sequence_score_layer
(
input
=
sentence_scores
,
beam_size
=
beam_size
)
# the final beam expansion.
topk_start_spans
=
seq_slice_layer
(
input
=
topk_sen
,
starts
=
topk_start_pos_ids
,
ends
=
None
)
end_pos_scores
=
fc_layer
(
input
=
topk_start_spans
,
size
=
1
,
act
=
LinearActivation
())
topk_end_pos_ids
=
kmax_sequence_score_layer
(
input
=
end_pos_scores
,
beam_size
=
beam_size
)
# define the cost
sentence_idx
=
data_layer
(
name
=
"sentences_ids"
,
size
=
1
)
start_idx
=
data_layer
(
name
=
"start_ids"
,
size
=
1
)
end_idx
=
data_layer
(
name
=
"end_ids"
,
size
=
1
)
cost
=
cross_entropy_over_beam
(
input
=
[
BeamInput
(
candidate_scores
=
sentence_scores
,
selected_candidates
=
topk_sentence_ids
,
gold
=
sentence_idx
),
BeamInput
(
candidate_scores
=
start_pos_scores
,
selected_candidates
=
topk_start_pos_ids
,
gold
=
start_idx
),
BeamInput
(
candidate_scores
=
end_pos_scores
,
selected_candidates
=
topk_end_pos_ids
,
gold
=
end_idx
)
])
outputs
(
cost
)
python/paddle/v2/inference.py
浏览文件 @
b66af308
...
...
@@ -70,7 +70,7 @@ class Inference(object):
item
=
[
each_result
[
each_field
]
for
each_field
in
field
]
yield
item
def
infer
(
self
,
input
,
field
=
'value'
,
**
kwargs
):
def
infer
(
self
,
input
,
field
=
'value'
,
flatten_result
=
True
,
**
kwargs
):
"""
Infer a data by model.
:param input: input data batch. Should be python iterable object.
...
...
@@ -83,7 +83,10 @@ class Inference(object):
retv
=
[[]
for
i
in
xrange
(
len
(
result
))]
for
i
,
item
in
enumerate
(
result
):
retv
[
i
].
append
(
item
)
retv
=
[
numpy
.
concatenate
(
out
)
for
out
in
retv
]
if
flatten_result
:
retv
=
[
numpy
.
concatenate
(
out
)
for
out
in
retv
]
if
len
(
retv
)
==
1
:
return
retv
[
0
]
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录