Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5e59ca7c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5e59ca7c
编写于
8月 23, 2017
作者:
C
caoying03
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix config helper.
上级
25083de9
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
162 addition
and
62 deletion
+162
-62
paddle/gserver/layers/CrossEntropyOverBeam.cpp
paddle/gserver/layers/CrossEntropyOverBeam.cpp
+10
-0
paddle/gserver/layers/CrossEntropyOverBeam.h
paddle/gserver/layers/CrossEntropyOverBeam.h
+11
-5
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+8
-14
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+6
-6
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+107
-22
python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
...ts/configs/protostr/test_cross_entropy_over_beam.protostr
+8
-9
python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
...fig_helpers/tests/configs/test_cross_entropy_over_beam.py
+12
-6
未找到文件。
paddle/gserver/layers/CrossEntropyOverBeam.cpp
浏览文件 @
5e59ca7c
...
...
@@ -161,7 +161,17 @@ real CostForOneSequence::forward() {
}
void
CostForOneSequence
::
backward
()
{
/*
* when softmax layer is the output layer, and it is combined with
* cross-entropy as cost. The derivate with regard to softmax's input
* is simply:
*
* grad_i = softmax_out_i - target_i,
*
* and here hard label is used.
*/
softmaxOut_
->
getData
()[
goldIdsInFinalExpansion_
]
-=
1.
;
MatrixPtr
tmp
=
Matrix
::
create
(
softmaxOut_
->
getData
(),
softmaxOut_
->
getWidth
(),
1
,
false
,
false
);
...
...
paddle/gserver/layers/CrossEntropyOverBeam.h
浏览文件 @
5e59ca7c
...
...
@@ -19,8 +19,8 @@ limitations under the License. */
namespace
paddle
{
/* This struct stores the beams in all search steps for a single sequence. */
struct
BeamExpansion
{
// store the entire beam expansion for a single sequence
std
::
vector
<
MatrixPtr
>
scores
;
std
::
vector
<
IVectorPtr
>
seqInfo
;
...
...
@@ -111,8 +111,11 @@ private:
size_t
batchSize_
;
size_t
beamSize_
;
// Currently, this layer only works on CPU, if its inputs is on GPU,
// copy them to CPU memory.
/*
* the process of constructing beams is not friendly to GPU, currently, this
* layer only runs on CPU, if any of its inputs is on GPU memory, then copy
* it to CPU memory.
*/
std
::
vector
<
MatrixPtr
>
candidateScores_
;
std
::
vector
<
MatrixPtr
>
candidateScoreGrad_
;
std
::
vector
<
MatrixPtr
>
candidateInBeam_
;
...
...
@@ -120,9 +123,12 @@ private:
std
::
vector
<
IVectorPtr
>
goldSequence_
;
std
::
vector
<
std
::
vector
<
int
>>
beamSplitPos_
;
// split entire bath of beams into beam per sequnence.
/*
* split entire bath of beams into beam per sequnence and store the result
* into this member.
*/
std
::
vector
<
BeamExpansion
>
beamPerSeq_
;
/
/ beamCosts_ is used to propagate error in one sequence.
/
* beamCosts_ is used to propagate error in one sequence. */
std
::
vector
<
CostForOneSequence
>
beamCosts_
;
};
...
...
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
浏览文件 @
5e59ca7c
...
...
@@ -28,16 +28,10 @@ using namespace paddle; // NOLINT
DECLARE_int32
(
gpu_id
);
DECLARE_bool
(
thread_local_rand_use_global_seed
);
// const size_t MAX_SEQ_NUM = 5;
// const size_t MAX_SEQ_LEN = 10;
// const size_t MAX_BEAM_SIZE = 3;
const
size_t
MAX_SEQ_NUM
=
23
;
const
size_t
MAX_SEQ_LEN
=
50
;
const
size_t
MAX_BEAM_SIZE
=
27
;
// const size_t SEED = 1503391792;
// const size_t SEED = 1;
const
size_t
SEED
=
(
size_t
)(
time
(
NULL
));
struct
SingleBeamExpansion
{
...
...
@@ -176,10 +170,12 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
beam
.
resetGroundTruth
(
seqNum
);
for
(
size_t
i
=
0
;
i
<
seqNum
;
++
i
)
{
if
(
randFloat
()
>
0.5
)
{
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
/*
* force the randomly generated label falls in the beam by chance 0.5.
* otherwise, when sequence length is relatively long and beam size is
* relatively small, the gold sequences falls off the beam at in the
* first search.
*/
real
*
begPos
=
beam
.
selectedIndices
.
data
()
+
i
*
beamSize
;
beam
.
colIdxInBeam
[
i
]
=
rand
()
%
count_if
(
begPos
,
begPos
+
beamSize
,
[](
const
real
&
val
)
{
...
...
@@ -222,9 +218,7 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
if
(
randFloat
()
>
0.5
)
{
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real
*
start
=
curBeam
.
selectedIndices
.
data
()
+
curBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
int
n
=
rand
()
%
count_if
(
start
,
start
+
beamSize
,
[](
const
real
&
val
)
{
...
...
@@ -339,7 +333,7 @@ TEST(Layer, CrossEntropyOverBeam) {
const
size_t
beamSize
=
1
+
rand
()
%
MAX_BEAM_SIZE
;
LOG
(
INFO
)
<<
"beamSize = "
<<
beamSize
;
// TODO(caoying): test with
more
beam expansions.
// TODO(caoying): test with
random
beam expansions.
const
size_t
expansionCount
=
3
;
vector
<
SingleBeamExpansion
>
beams
;
genRandomBeamExpansion
(
expansionCount
,
beamSize
,
beams
);
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
5e59ca7c
...
...
@@ -1605,16 +1605,16 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
@
config_layer
(
'cross_entropy_over_beam'
)
class
CrossEntropyOverBeamLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
**
xargs
):
config_assert
(
len
(
inputs
)
%
3
==
0
,
"Error input number
s
."
)
config_assert
(
len
(
inputs
)
%
3
==
0
,
"Error input number."
)
super
(
CrossEntropyOverBeamLayer
,
self
).
__init__
(
name
,
'cross_entropy_over_beam'
,
0
,
inputs
,
**
xargs
)
input_num
=
len
(
inputs
)
/
3
for
i
in
range
(
input_num
):
input_layer
=
self
.
get_input_layer
(
i
*
2
)
config_assert
(
input_layer
.
size
==
1
,
"Inputs for this layer are made up of "
"several
pairs and the first one in a pair is scores fo
r "
"all
the candidates, so its size should be equal to 1."
)
input_layer
=
self
.
get_input_layer
(
i
*
3
)
config_assert
(
input_layer
.
size
==
1
,
(
"Inputs for this layer are made up of "
"several
triples, in which the first one is scores ove
r "
"all
candidate paths, whose size should be equal to 1."
)
)
@
config_layer
(
'fc'
)
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
5e59ca7c
...
...
@@ -103,6 +103,7 @@ __all__ = [
'nce_layer'
,
'cross_entropy_with_selfnorm'
,
'cross_entropy'
,
'BeamInput'
,
'cross_entropy_over_beam'
,
'multi_binary_label_cross_entropy'
,
'sum_cost'
,
...
...
@@ -5681,10 +5682,10 @@ def multi_binary_label_cross_entropy(input,
if
input
.
activation
is
None
or
\
not
isinstance
(
input
.
activation
,
SigmoidActivation
):
logger
.
log
(
logging
.
WARN
,
"%s is not recommend for multi_binary_label_cross_entropy's activation, "
"maybe the sigmoid is better"
%
repr
(
input
.
activation
))
logger
.
log
(
logging
.
WARN
,
(
"%s is not a recommended activation for "
"multi_binary_label_cross_entropy, sigmoid is better"
)
%
repr
(
input
.
activation
))
Layer
(
name
=
name
,
...
...
@@ -5699,26 +5700,110 @@ def multi_binary_label_cross_entropy(input,
size
=
1
)
class
BeamInput
(
object
):
"""
Define the input for cross_entropy_over_beam layer.
A beam is made up of a triple: the first one is scores over all
candidates; the second one is indices of top k selected candidates; the
third one is the index of ground truth, which is also always called
gold.
"""
def
__init__
(
self
,
candidate_scores
,
selected_candidates
,
gold
):
assert
isinstance
(
candidate_scores
,
LayerOutput
)
self
.
candidate_scores
=
candidate_scores
assert
candidate_scores
.
size
==
1
assert
isinstance
(
selected_candidates
,
LayerOutput
)
self
.
selected_candidates
=
selected_candidates
assert
isinstance
(
gold
,
LayerOutput
)
self
.
gold
=
gold
@
wrap_name_default
()
@
layer_support
()
def
cross_entropy_over_beam
(
input
,
label
,
name
=
None
,
coeff
=
1.0
,
weight
=
None
):
"""
TODO(caoying) add comments.
def
cross_entropy_over_beam
(
input
,
name
=
None
):
"""
This layer is used in learning to search models, which is to solve complex
joint prediction problems based on learning to search through a
problem-defined search space.
assert
len
(
input
)
/
2
==
len
(
label
),
"Error input numbers."
for
i
in
range
(
0
,
len
(
input
),
2
):
assert
(
input
[
i
].
size
==
1
),
(
"Inputs for this layer are made up of "
"several pairs and the first one in a pair is scores for "
"all the candidates, so its size should be equal to 1."
)
Specifically, the learning to search process for this layer begins with
searching a target sequence from a nested sequence. In the first search
step, top beam size sequences with highest scores, indices of these top k
sequences in the original nested sequence, and the ground truth (also
called gold) altogether (a triple) make up of the first beam.
ipts
,
parents
=
__cost_input__
(
input
,
label
,
weight
)
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY_OVER_BEAM
,
inputs
=
ipts
,
coeff
=
coeff
)
Then, several special positions, for example, start and end positions
that define meaningful segments are searched. In these searches, top k
positions with highest scores are selected, and then sequence, starting
from the selected starts till ends of the sequences (or a fixed position)
are taken to search next.
We call the possible top k results returned in one search the beam. This
search process can be repeated for pre-defined turns and leads to several
beam expansions.
Finally, the layer cross_entropy_over_beam takes all the beam expansions
which contain several candidate targets found along the multi-step search.
cross_entropy_over_beam calculates cross entropy over the expanded beams
which all the candidates in the beam as the normalized factor.
Note that, if gold falls off the beam at search step t, then the cost is
calculated over the beam at step t.
This cost layer always works together with kmax_sequence_score_layer,
sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
sub-search space.
The example usage is:
.. code-block:: python
cost = cross_entropy_over_beam(input=[
BeamInput(
candidate_scores=beam1_candidates,
selected_candidates=beam1_topk,
gold=gold1),
BeamInput(
candidate_scores=beam2_candidates,
selected_candidates=beam2_topk,
gold=gold2),
])
:param input: input beams for this layer.
:type input: BeamInput
:param name: input beams for this layer.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
if
isinstance
(
input
,
BeamInput
):
input
=
[
input
]
else
:
assert
isinstance
(
input
,
list
),
(
'input for cross_entropy_over_beam shold be a python list '
'of BeamInput object.'
)
for
ipt
in
input
:
assert
isinstance
(
ipt
,
BeamInput
),
(
'input for cross_entropy_over_beam '
'should be a BeamInput object.'
)
ipts
=
[]
parents
=
[]
for
beam
in
input
:
parents
+=
[
beam
.
candidate_scores
,
beam
.
selected_candidates
,
beam
.
gold
]
ipts
+=
[
beam
.
candidate_scores
.
name
,
beam
.
selected_candidates
.
name
,
beam
.
gold
.
name
]
Layer
(
name
=
name
,
type
=
LayerType
.
CROSS_ENTROPY_OVER_BEAM
,
inputs
=
ipts
)
return
LayerOutput
(
name
,
LayerType
.
CROSS_ENTROPY
,
parents
=
parents
,
size
=
1
)
...
...
@@ -6247,11 +6332,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
@
wrap_bias_attr_default
()
def
scale_shift_layer
(
input
,
name
=
None
,
param_attr
=
None
,
bias_attr
=
None
):
"""
A layer applies a linear transformation to each element in each row of
the input matrix. For each element, the layer first re-scale it and then
A layer applies a linear transformation to each element in each row of
the input matrix. For each element, the layer first re-scale it and then
adds a bias to it.
This layer is very like the SlopeInterceptLayer, except the scale and
This layer is very like the SlopeInterceptLayer, except the scale and
bias are trainable.
.. math::
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_cross_entropy_over_beam.protostr
浏览文件 @
5e59ca7c
...
...
@@ -114,27 +114,26 @@ layers {
input_layer_name: "__kmax_sequence_score_layer_0__"
}
inputs {
input_layer_name: "
__fc_layer_0__
"
input_layer_name: "
sentences_ids
"
}
inputs {
input_layer_name: "__
kmax_sequence_score_layer_1
__"
input_layer_name: "__
fc_layer_0
__"
}
inputs {
input_layer_name: "__
fc
_layer_1__"
input_layer_name: "__
kmax_sequence_score
_layer_1__"
}
inputs {
input_layer_name: "
__kmax_sequence_score_layer_2__
"
input_layer_name: "
start_ids
"
}
inputs {
input_layer_name: "
sentences_ids
"
input_layer_name: "
__fc_layer_1__
"
}
inputs {
input_layer_name: "
start_ids
"
input_layer_name: "
__kmax_sequence_score_layer_2__
"
}
inputs {
input_layer_name: "end_ids"
}
coeff: 1.0
}
parameters {
name: "___fc_layer_0__.w0"
...
...
@@ -177,8 +176,8 @@ parameters {
initial_smart: false
}
input_layer_names: "sentence_scores"
input_layer_names: "sentence_states"
input_layer_names: "sentences_ids"
input_layer_names: "sentence_states"
input_layer_names: "start_ids"
input_layer_names: "end_ids"
output_layer_names: "__cross_entropy_over_beam_0__"
...
...
@@ -198,8 +197,8 @@ sub_models {
layer_names: "end_ids"
layer_names: "__cross_entropy_over_beam_0__"
input_layer_names: "sentence_scores"
input_layer_names: "sentence_states"
input_layer_names: "sentences_ids"
input_layer_names: "sentence_states"
input_layer_names: "start_ids"
input_layer_names: "end_ids"
output_layer_names: "__cross_entropy_over_beam_0__"
...
...
python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
浏览文件 @
5e59ca7c
...
...
@@ -29,11 +29,17 @@ topk_end_pos_ids = kmax_sequence_score_layer(
sentence_idx
=
data_layer
(
name
=
"sentences_ids"
,
size
=
1
)
start_idx
=
data_layer
(
name
=
"start_ids"
,
size
=
1
)
end_idx
=
data_layer
(
name
=
"end_ids"
,
size
=
1
)
cost
=
cross_entropy_over_beam
(
input
=
[
sentence_scores
,
topk_sentence_ids
,
start_pos_scores
,
topk_start_pos_ids
,
end_pos_scores
,
topk_end_pos_ids
],
label
=
[
sentence_idx
,
start_idx
,
end_idx
])
cost
=
cross_entropy_over_beam
(
input
=
[
BeamInput
(
candidate_scores
=
sentence_scores
,
selected_candidates
=
topk_sentence_ids
,
gold
=
sentence_idx
),
BeamInput
(
candidate_scores
=
start_pos_scores
,
selected_candidates
=
topk_start_pos_ids
,
gold
=
start_idx
),
BeamInput
(
candidate_scores
=
end_pos_scores
,
selected_candidates
=
topk_end_pos_ids
,
gold
=
end_idx
)
])
outputs
(
cost
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录