Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8f4ca2d1
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8f4ca2d1
编写于
8月 16, 2017
作者:
C
caoying03
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add implementations.
上级
a037b099
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
549 addition
and
59 deletion
+549
-59
paddle/gserver/layers/CrossEntropyOverBeam.cpp
paddle/gserver/layers/CrossEntropyOverBeam.cpp
+341
-3
paddle/gserver/layers/CrossEntropyOverBeam.h
paddle/gserver/layers/CrossEntropyOverBeam.h
+98
-0
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
+110
-56
未找到文件。
paddle/gserver/layers/CrossEntropyOverBeam.cpp
浏览文件 @
8f4ca2d1
...
...
@@ -16,6 +16,168 @@ limitations under the License. */
namespace
paddle
{
void
CostForOneSequence
::
calValidExpandStep
()
{
validExpansionCount_
=
0
;
goldAsExtraPath_
=
true
;
for
(
size_t
i
=
0
;
i
<
beams_
->
expansionCount
;
++
i
)
{
real
gold
=
static_cast
<
real
>
(
beams_
->
gold
[
i
]);
if
(
i
)
{
real
*
start
=
beams_
->
candidateIds
[
i
-
1
]
->
getData
();
goldRowIds_
[
i
]
=
std
::
count_if
(
start
,
start
+
goldRowIds_
[
i
-
1
]
*
beamSize_
+
goldColIds_
[
i
-
1
],
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
}
else
goldRowIds_
[
i
]
=
0
;
real
*
start
=
beams_
->
candidateIds
[
i
]
->
getData
()
+
goldRowIds_
[
i
]
*
beamSize_
;
real
*
findEnd
=
std
::
find
(
start
,
start
+
beamSize_
,
gold
);
validExpansionCount_
++
;
if
(
start
+
beamSize_
==
findEnd
)
return
;
goldColIds_
[
i
]
=
findEnd
-
start
;
}
if
(
goldColIds_
[
beams_
->
expansionCount
-
1
]
!=
-
1
)
goldAsExtraPath_
=
false
;
}
size_t
CostForOneSequence
::
initLastExpansion
()
{
int
beamId
=
validExpansionCount_
-
1
;
const
MatrixPtr
candidates
=
beams_
->
candidateIds
[
beamId
];
size_t
height
=
candidates
->
getHeight
();
/* initialization the last expansion. */
size_t
pathCount
=
std
::
count_if
(
candidates
->
getData
(),
candidates
->
getData
()
+
height
*
beamSize_
,
[](
const
real
&
val
)
{
return
val
!=
-
1
;
});
/*
* if the gold sequence falls off the beam during search,
* add the gold sequence as the last path into all expanded paths.
*/
if
(
goldAsExtraPath_
)
goldIdsInFinalExpansion_
=
pathCount
++
;
pathRowIdsInEachBeam_
.
clear
();
pathRowIdsInEachBeam_
.
resize
(
validExpansionCount_
,
std
::
vector
<
int
>
(
pathCount
,
0
));
parentIdsInBeam_
.
clear
();
parentIdsInBeam_
.
resize
(
pathCount
,
0
);
if
(
goldAsExtraPath_
)
{
/* add gold sequence into the total expansion. */
pathRowIdsInEachBeam_
[
beamId
].
back
()
=
beams_
->
gold
[
beamId
]
+
getSeqStartPos
(
beamId
,
goldRowIds_
[
validExpansionCount_
-
1
]);
parentIdsInBeam_
.
back
()
=
goldRowIds_
[
validExpansionCount_
-
1
];
}
else
{
size_t
goldOffset
=
goldRowIds_
[
beamId
]
*
beamSize_
+
goldColIds_
[
beamId
];
goldIdsInFinalExpansion_
=
std
::
count_if
(
candidates
->
getData
(),
candidates
->
getData
()
+
goldOffset
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
}
/*
* TODO(caoying): fix this, store the indices of selected candidate
* paths into Argument.ids
*/
real
*
ids
=
candidates
->
getData
();
size_t
curIdx
=
0
;
for
(
size_t
i
=
0
;
i
<
height
;
++
i
)
{
int
basePos
=
getSeqStartPos
(
beamId
,
i
);
for
(
size_t
j
=
0
;
j
<
beamSize_
;
++
j
)
{
int
id
=
ids
[
i
*
beamSize_
+
j
];
if
(
id
==
-
1
)
continue
;
pathRowIdsInEachBeam_
[
beamId
][
curIdx
]
=
id
+
basePos
;
parentIdsInBeam_
[
curIdx
++
]
=
i
;
}
}
return
pathCount
;
}
void
CostForOneSequence
::
constructTotalExpansion
()
{
/*
* construct the entire expanded beam by begining with the last search
* in which gold falls off the beam.
*/
size_t
totalPathCount
=
initLastExpansion
();
for
(
int
beamId
=
validExpansionCount_
-
2
;
beamId
>=
0
;
--
beamId
)
{
const
MatrixPtr
candidates
=
beams_
->
candidateIds
[
beamId
];
real
*
ids
=
candidates
->
getData
();
int
lastParentIdInBeam
=
-
1
;
int
basePos
=
-
1
;
for
(
size_t
i
=
0
;
i
<
(
goldAsExtraPath_
?
totalPathCount
-
1
:
totalPathCount
);
++
i
)
{
int
id
=
ids
[
parentIdsInBeam_
[
i
]];
int
parentRowId
=
std
::
div
(
parentIdsInBeam_
[
i
],
beamSize_
).
quot
;
if
(
parentIdsInBeam_
[
i
]
!=
lastParentIdInBeam
)
basePos
=
getSeqStartPos
(
beamId
,
parentRowId
);
pathRowIdsInEachBeam_
[
beamId
][
i
]
=
id
+
basePos
;
lastParentIdInBeam
=
parentIdsInBeam_
[
i
];
parentIdsInBeam_
[
i
]
=
parentRowId
;
if
(
goldAsExtraPath_
)
pathRowIdsInEachBeam_
[
beamId
][
totalPathCount
-
1
]
=
beams_
->
gold
[
beamId
]
+
getSeqStartPos
(
beamId
,
goldRowIds_
[
beamId
]);
}
}
}
real
CostForOneSequence
::
globallyNormalizedScore
()
{
expandedPathScores_
.
resize
(
validExpansionCount_
);
Matrix
::
resizeOrCreate
(
softmaxOut_
,
1
,
pathRowIdsInEachBeam_
[
0
].
size
(),
false
,
false
);
softmaxOut_
->
zero
();
MatrixPtr
tmp
=
Matrix
::
create
(
softmaxOut_
->
getData
(),
softmaxOut_
->
getWidth
(),
1
,
false
,
false
);
for
(
size_t
i
=
0
;
i
<
validExpansionCount_
;
++
i
)
{
Matrix
::
resizeOrCreate
(
expandedPathScores_
[
i
],
pathRowIdsInEachBeam_
[
i
].
size
(),
1
,
false
,
false
);
IVectorPtr
rowIds
=
IVector
::
create
(
pathRowIdsInEachBeam_
[
i
].
data
(),
pathRowIdsInEachBeam_
[
i
].
size
(),
false
);
expandedPathScores_
[
i
]
->
selectRows
(
*
(
beams_
->
scores
[
i
]),
*
rowIds
);
tmp
->
add
(
*
expandedPathScores_
[
i
]);
}
softmaxOut_
->
softmax
(
*
softmaxOut_
);
return
-
std
::
log
(
softmaxOut_
->
getData
()[
goldIdsInFinalExpansion_
]);
}
real
CostForOneSequence
::
forward
()
{
calValidExpandStep
();
constructTotalExpansion
();
return
globallyNormalizedScore
();
}
void
CostForOneSequence
::
backward
()
{
softmaxOut_
->
getData
()[
goldIdsInFinalExpansion_
]
-=
1.
;
MatrixPtr
tmp
=
Matrix
::
create
(
softmaxOut_
->
getData
(),
softmaxOut_
->
getWidth
(),
1
,
false
,
false
);
for
(
size_t
i
=
0
;
i
<
validExpansionCount_
;
++
i
)
{
IVectorPtr
rowIds
=
IVector
::
create
(
pathRowIdsInEachBeam_
[
i
].
data
(),
pathRowIdsInEachBeam_
[
i
].
size
(),
false
);
/*
beams_->scoreGrad[i] has been intialized outside this class, this
class only keeps a pointer pointing to the original input gradients,
so here does not need to allocate or initalize the memory.
*/
tmp
->
addToRows
(
*
beams_
->
scoreGrad
[
i
],
*
rowIds
);
}
}
REGISTER_LAYER
(
cross_entropy_over_beam
,
CrossEntropyOverBeam
);
bool
CrossEntropyOverBeam
::
init
(
const
LayerMap
&
layerMap
,
...
...
@@ -24,13 +186,189 @@ bool CrossEntropyOverBeam::init(const LayerMap& layerMap,
Layer
::
init
(
layerMap
,
parameterMap
);
CHECK_EQ
(
0U
,
inputLayers_
.
size
()
%
3
)
<<
"Error input number."
;
setNeedSequenceInfo
(
false
);
beamExpanCount_
=
inputLayers_
.
size
()
/
3
;
candidateScores_
.
resize
(
beamExpanCount_
);
candidateScoreGrad_
.
resize
(
beamExpanCount_
);
candidateInBeam_
.
resize
(
beamExpanCount_
);
goldSequence_
.
resize
(
beamExpanCount_
);
gradToInputs_
.
resize
(
beamExpanCount_
);
setNeedSequenceInfo
(
false
);
return
true
;
}
void
CrossEntropyOverBeam
::
forward
(
PassType
passType
)
{}
void
CrossEntropyOverBeam
::
checkInputs
()
{
batchSize_
=
0
;
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
const
Argument
&
scores
=
getInput
(
i
*
3
);
const
Argument
&
selCandidates
=
getInput
(
i
*
3
+
1
);
const
Argument
&
goldSeq
=
getInput
(
i
*
3
+
2
);
if
(
i
)
{
CHECK
(
scores
.
hasSubseq
())
<<
"Beam expansion expect the first one, "
"should be a nested sequence"
;
CHECK_EQ
(
getInputValue
(
i
*
3
+
1
)
->
getWidth
(),
beamSize_
);
CHECK_EQ
(
scores
.
getNumSequences
(),
batchSize_
);
CHECK_EQ
(
scores
.
getNumSubSequences
(),
selCandidates
.
getBatchSize
());
}
else
{
CHECK
(
scores
.
hasSeq
())
<<
"The first beam expansion should be a sequence"
;
batchSize_
=
scores
.
getNumSequences
();
beamSize_
=
getInputValue
(
i
*
3
+
1
)
->
getWidth
();
CHECK_EQ
(
batchSize_
,
selCandidates
.
getBatchSize
());
}
CHECK_EQ
(
1U
,
scores
.
value
->
getWidth
());
CHECK_EQ
(
batchSize_
,
goldSeq
.
getBatchSize
());
}
}
void
CrossEntropyOverBeam
::
copyInputsToCpu
()
{
auto
copyValue
=
[](
const
MatrixPtr
&
src
,
MatrixPtr
&
trg
)
{
if
(
dynamic_cast
<
GpuMatrix
*>
(
src
.
get
()))
{
Matrix
::
resizeOrCreate
(
trg
,
src
->
getHeight
(),
src
->
getWidth
(),
false
,
false
);
trg
->
copyFrom
(
*
src
);
}
else
{
trg
=
std
::
move
(
src
);
}
};
auto
copyIds
=
[](
const
IVectorPtr
&
src
,
IVectorPtr
&
trg
)
{
if
(
dynamic_cast
<
GpuIVector
*>
(
src
.
get
()))
{
IVector
::
resizeOrCreate
(
trg
,
src
->
getSize
(),
false
);
trg
->
copyFrom
(
*
src
);
}
else
{
trg
=
std
::
move
(
src
);
}
};
beamSplitPos_
.
clear
();
beamSplitPos_
.
resize
(
batchSize_
,
std
::
vector
<
int
>
(
beamExpanCount_
,
0
));
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
copyValue
(
getInputValue
(
i
*
3
),
candidateScores_
[
i
]);
copyValue
(
getInputValue
(
i
*
3
+
1
),
candidateInBeam_
[
i
]);
copyIds
(
getInput
(
i
*
3
+
2
).
ids
,
goldSequence_
[
i
]);
if
(
i
)
{
ICpuGpuVectorPtr
seqInfo
=
getInput
(
i
*
3
).
sequenceStartPositions
;
const
int
*
seqStarts
=
seqInfo
->
getMutableData
(
false
);
ICpuGpuVectorPtr
subSeqInfo
=
getInput
(
i
*
3
).
subSequenceStartPositions
;
const
int
*
subSeqStarts
=
subSeqInfo
->
getMutableData
(
false
);
size_t
seqId
=
1
;
for
(
size_t
subSeqId
=
0
;
subSeqId
<
subSeqInfo
->
getSize
()
-
1
;
++
subSeqId
)
{
CHECK_LT
(
seqId
,
seqInfo
->
getSize
());
if
(
subSeqStarts
[
subSeqId
]
==
seqStarts
[
seqId
])
{
beamSplitPos_
[
seqId
][
i
]
=
beamSplitPos_
[
seqId
-
1
][
i
];
seqId
++
;
}
beamSplitPos_
[
seqId
-
1
][
i
]
++
;
}
}
else
{
for
(
size_t
j
=
0
;
j
<
batchSize_
;
++
j
)
beamSplitPos_
[
j
][
i
]
=
j
+
1
;
}
}
}
void
CrossEntropyOverBeam
::
splitBatchBeams
()
{
beamCosts_
.
resize
(
batchSize_
);
beamPerSeq_
.
resize
(
batchSize_
,
beamExpanCount_
);
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
int
*
seqStarts
=
getInput
(
i
*
3
).
sequenceStartPositions
->
getMutableData
(
false
);
int
*
subSeqStarts
=
nullptr
;
int
maxLen
=
0
;
if
(
i
)
{
subSeqStarts
=
getInput
(
i
*
3
).
subSequenceStartPositions
->
getMutableData
(
false
);
maxLen
=
getInput
(
i
*
3
).
subSequenceStartPositions
->
getSize
()
-
1
;
}
else
maxLen
=
getInput
(
i
).
sequenceStartPositions
->
getSize
()
-
1
;
for
(
size_t
j
=
0
;
j
<
batchSize_
;
++
j
)
{
beamPerSeq_
[
j
].
scores
[
i
]
=
Matrix
::
create
(
candidateScores_
[
i
]
->
getData
()
+
seqStarts
[
j
],
seqStarts
[
j
+
1
]
-
seqStarts
[
j
],
1
,
false
,
false
);
beamPerSeq_
[
j
].
scoreGrad
[
i
]
=
Matrix
::
create
(
candidateScoreGrad_
[
i
]
->
getData
()
+
seqStarts
[
j
],
seqStarts
[
j
+
1
]
-
seqStarts
[
j
],
1
,
false
,
false
);
int
offset
=
j
?
beamSplitPos_
[
j
-
1
][
i
]
:
0
;
int
height
=
beamSplitPos_
[
j
][
i
]
-
(
j
?
beamSplitPos_
[
j
-
1
][
i
]
:
0
);
CHECK_GE
(
maxLen
,
offset
+
height
);
beamPerSeq_
[
j
].
seqInfo
[
i
]
=
IVector
::
create
(
(
i
?
subSeqStarts
:
seqStarts
)
+
offset
,
height
+
1
,
false
);
void
CrossEntropyOverBeam
::
backward
(
const
UpdateCallback
&
callback
)
{}
beamPerSeq_
[
j
].
candidateIds
[
i
]
=
Matrix
::
create
(
candidateInBeam_
[
i
]
->
getData
()
+
offset
*
beamSize_
,
height
,
beamSize_
,
false
,
false
);
beamPerSeq_
[
j
].
gold
[
i
]
=
goldSequence_
[
i
]
->
getData
()[
j
];
}
}
}
void
CrossEntropyOverBeam
::
resizeOutput
()
{
Matrix
::
resizeOrCreate
(
output_
.
value
,
batchSize_
,
1
,
false
,
false
);
output_
.
value
->
zero
();
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
MatrixPtr
inGrad
=
getInputGrad
(
i
*
3
);
if
(
dynamic_cast
<
GpuMatrix
*>
(
inGrad
.
get
()))
{
Matrix
::
resizeOrCreate
(
candidateScoreGrad_
[
i
],
inGrad
->
getHeight
(),
inGrad
->
getWidth
(),
false
,
false
);
}
else
candidateScoreGrad_
[
i
]
=
std
::
move
(
inGrad
);
candidateScoreGrad_
[
i
]
->
zero
();
}
}
void
CrossEntropyOverBeam
::
copyGradToGpu
(
size_t
copyCount
)
{
for
(
size_t
i
=
0
;
i
<
beamExpanCount_
;
++
i
)
{
if
(
dynamic_cast
<
GpuMatrix
*>
(
getInputGrad
(
i
*
3
).
get
()))
getInputGrad
(
i
*
3
)
->
copyFrom
(
*
candidateScoreGrad_
[
i
]);
if
(
i
==
copyCount
-
1
)
break
;
}
}
void
CrossEntropyOverBeam
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
checkInputs
();
copyInputsToCpu
();
resizeOutput
();
splitBatchBeams
();
MatrixPtr
outputValue
=
getOutputValue
();
for
(
size_t
i
=
0
;
i
<
batchSize_
;
++
i
)
{
beamCosts_
[
i
].
setData
(
std
::
move
(
std
::
make_shared
<
BeamExpansion
>
(
beamPerSeq_
[
i
])),
beamSize_
);
outputValue
->
getData
()[
i
]
=
beamCosts_
[
i
].
forward
();
}
}
void
CrossEntropyOverBeam
::
backward
(
const
UpdateCallback
&
callback
)
{
for
(
size_t
i
=
0
;
i
<
batchSize_
;
++
i
)
{
beamCosts_
[
i
].
backward
();
copyGradToGpu
(
beamCosts_
[
i
].
getValidExpansionCount
());
}
}
}
// namespace paddle
paddle/gserver/layers/CrossEntropyOverBeam.h
浏览文件 @
8f4ca2d1
...
...
@@ -19,6 +19,79 @@ limitations under the License. */
namespace
paddle
{
struct
BeamExpansion
{
// store the entire beam expansion for a single sequence
std
::
vector
<
MatrixPtr
>
scores
;
std
::
vector
<
IVectorPtr
>
seqInfo
;
std
::
vector
<
MatrixPtr
>
candidateIds
;
std
::
vector
<
int
>
gold
;
std
::
vector
<
MatrixPtr
>
scoreGrad
;
size_t
expansionCount
;
BeamExpansion
(
int
n
)
{
expansionCount
=
n
;
scores
.
resize
(
expansionCount
);
seqInfo
.
resize
(
expansionCount
);
candidateIds
.
resize
(
expansionCount
);
scoreGrad
.
resize
(
expansionCount
);
gold
.
resize
(
expansionCount
);
};
};
typedef
std
::
shared_ptr
<
BeamExpansion
>
BeamExpansionPtr
;
class
CostForOneSequence
{
public:
CostForOneSequence
()
:
beamSize_
(
0
),
validExpansionCount_
(
0
),
goldAsExtraPath_
(
false
)
{}
void
setData
(
const
BeamExpansionPtr
bPtr
,
size_t
beamSize
)
{
beams_
=
bPtr
;
beamSize_
=
beamSize
;
expandedPathScores_
.
clear
();
expandedPathScores_
.
resize
(
beams_
->
expansionCount
);
goldRowIds_
.
clear
();
goldRowIds_
.
resize
(
beams_
->
expansionCount
,
0
);
goldColIds_
.
clear
();
goldColIds_
.
resize
(
beams_
->
expansionCount
,
-
1
);
}
size_t
getValidExpansionCount
()
{
return
validExpansionCount_
;
}
real
forward
();
void
backward
();
private:
void
calValidExpandStep
();
void
constructTotalExpansion
();
size_t
initLastExpansion
();
real
globallyNormalizedScore
();
int
getSeqStartPos
(
size_t
beamId
,
size_t
rowId
)
{
CHECK_GT
(
beams_
->
seqInfo
[
beamId
]
->
getSize
()
-
1
,
rowId
);
int
*
starts
=
beams_
->
seqInfo
[
beamId
]
->
getData
();
return
starts
[
rowId
]
-
starts
[
0
];
};
size_t
beamSize_
;
size_t
validExpansionCount_
;
bool
goldAsExtraPath_
;
std
::
vector
<
int
>
goldRowIds_
;
std
::
vector
<
int
>
goldColIds_
;
BeamExpansionPtr
beams_
;
std
::
vector
<
std
::
vector
<
int
>>
pathRowIdsInEachBeam_
;
std
::
vector
<
int
>
parentIdsInBeam_
;
size_t
goldIdsInFinalExpansion_
;
std
::
vector
<
MatrixPtr
>
expandedPathScores_
;
MatrixPtr
softmaxOut_
;
};
class
CrossEntropyOverBeam
:
public
Layer
{
public:
explicit
CrossEntropyOverBeam
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
...
@@ -26,6 +99,31 @@ public:
const
ParameterMap
&
parameterMap
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
private:
void
checkInputs
();
void
copyInputsToCpu
();
void
resizeOutput
();
void
copyGradToGpu
(
size_t
copyCount
);
void
splitBatchBeams
();
size_t
beamExpanCount_
;
size_t
batchSize_
;
size_t
beamSize_
;
// Currently, this layer only works on CPU, if its inputs is on GPU,
// copy them to CPU memory.
std
::
vector
<
MatrixPtr
>
candidateScores_
;
std
::
vector
<
MatrixPtr
>
candidateScoreGrad_
;
std
::
vector
<
MatrixPtr
>
candidateInBeam_
;
std
::
vector
<
MatrixPtr
>
gradToInputs_
;
std
::
vector
<
IVectorPtr
>
goldSequence_
;
std
::
vector
<
std
::
vector
<
int
>>
beamSplitPos_
;
// split entire bath of beams into beam per sequnence.
std
::
vector
<
BeamExpansion
>
beamPerSeq_
;
// beamCosts_ is used to propagate error in one sequence.
std
::
vector
<
CostForOneSequence
>
beamCosts_
;
};
}
// namespace paddle
paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp
浏览文件 @
8f4ca2d1
...
...
@@ -28,9 +28,17 @@ using namespace paddle; // NOLINT
DECLARE_int32
(
gpu_id
);
DECLARE_bool
(
thread_local_rand_use_global_seed
);
const
size_t
MAX_SEQ_NUM
=
10
;
const
size_t
MAX_SEQ_LEN
=
27
;
const
size_t
MAX_BEAM_SIZE
=
10
;
// const size_t MAX_SEQ_NUM = 5;
// const size_t MAX_SEQ_LEN = 10;
// const size_t MAX_BEAM_SIZE = 3;
const
size_t
MAX_SEQ_NUM
=
23
;
const
size_t
MAX_SEQ_LEN
=
50
;
const
size_t
MAX_BEAM_SIZE
=
27
;
// const size_t SEED = 1503391792;
// const size_t SEED = 1;
const
size_t
SEED
=
(
size_t
)(
time
(
NULL
));
struct
SingleBeamExpansion
{
vector
<
int
>
seqStartPos
;
...
...
@@ -43,11 +51,30 @@ struct SingleBeamExpansion {
vector
<
int
>
groundTruth
;
vector
<
size_t
>
inBeam
;
vector
<
int
>
rowIdxInBeam
;
vector
<
int
>
colIdxInBeam
;
void
resetGroundTruth
(
size_t
n
)
{
groundTruth
.
clear
();
groundTruth
.
resize
(
n
,
-
1
);
inBeam
.
clear
();
inBeam
.
resize
(
n
,
0
);
rowIdxInBeam
.
clear
();
rowIdxInBeam
.
resize
(
n
,
-
1
);
colIdxInBeam
.
clear
();
colIdxInBeam
.
resize
(
n
,
-
1
);
}
};
inline
float
randFloat
()
{
return
static_cast
<
float
>
(
rand
())
/
static_cast
<
float
>
(
RAND_MAX
);
}
void
genRand
(
real
*
numbers
,
size_t
n
)
{
default_random_engine
generator
;
uniform_real_distribution
<
double
>
distribution
(
0.0
,
1.0
);
uniform_real_distribution
<
real
>
distribution
(
0.0
,
1.0
);
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
numbers
[
i
]
=
distribution
(
generator
);
}
...
...
@@ -72,8 +99,7 @@ void genCandidateScores(bool hasSubseq,
vector
<
int
>&
subSeqStartPos
=
curBeam
.
subSeqStartPos
;
subSeqStartPos
.
resize
(
1
,
0
);
srand
((
size_t
)(
time
(
NULL
)));
// srand(1);
srand
(
SEED
);
if
(
prevBeam
.
selectedIndices
.
size
())
{
if
(
prevBeam
.
subSeqStartPos
.
size
()
>
1
)
{
int
seqIdx
=
1
;
...
...
@@ -81,7 +107,6 @@ void genCandidateScores(bool hasSubseq,
for
(
size_t
i
=
1
;
i
<
prevBeam
.
subSeqStartPos
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
beamSize
;
++
j
)
{
if
(
prevBeam
.
selectedIndices
[(
i
-
1
)
*
beamSize
+
j
]
==
-
1.
)
break
;
for
(
size_t
k
=
0
;
k
<
beamSize
;
++
k
)
subSeqStartPos
.
push_back
(
1
+
(
rand
()
%
MAX_SEQ_LEN
)
+
subSeqStartPos
.
back
());
}
...
...
@@ -91,7 +116,6 @@ void genCandidateScores(bool hasSubseq,
}
}
}
else
{
// samples in previous beam are sequences.
for
(
size_t
i
=
0
;
i
<=
prevBeam
.
selectedIndices
.
size
();
++
i
)
{
if
(
i
&&
i
%
beamSize
==
0
)
{
seqStartPos
.
push_back
(
subSeqStartPos
.
back
());
...
...
@@ -141,27 +165,41 @@ void genSelectedIndices(size_t beamSize,
void
genGroundTruth
(
vector
<
SingleBeamExpansion
>&
beamExpansions
,
size_t
beamSize
)
{
size_t
seqNum
=
beamExpansions
[
1
].
seqStartPos
.
size
()
-
1
;
SingleBeamExpansion
&
beam
=
beamExpansions
[
1
];
size_t
seqNum
=
beam
.
seqStartPos
.
size
()
-
1
;
for
(
size_t
i
=
2
;
i
<
beamExpansions
.
size
();
++
i
)
CHECK_EQ
(
seqNum
,
beamExpansions
[
i
-
1
].
seqStartPos
.
size
()
-
1
);
CHECK_EQ
(
seqNum
,
beamExpansions
[
i
].
seqStartPos
.
size
()
-
1
);
// srand(1);
srand
((
size_t
)(
time
(
NULL
)));
srand
(
SEED
);
// initialize the first beam.
SingleBeamExpansion
&
beam
=
beamExpansions
[
1
];
beam
.
groundTruth
.
resize
(
seqNum
,
0
);
beam
.
inBeam
.
resize
(
seqNum
,
0
);
beam
.
rowIdxInBeam
.
resize
(
seqNum
,
-
1
);
auto
begPos
=
beam
.
selectedIndices
.
begin
();
beam
.
resetGroundTruth
(
seqNum
);
for
(
size_t
i
=
0
;
i
<
seqNum
;
++
i
)
{
int
seqLen
=
beam
.
seqStartPos
[
i
+
1
]
-
beam
.
seqStartPos
[
i
];
int
label
=
rand
()
%
seqLen
;
auto
endPos
=
begPos
+
beamSize
;
if
(
randFloat
()
>
0.5
)
{
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real
*
begPos
=
beam
.
selectedIndices
.
data
()
+
i
*
beamSize
;
beam
.
colIdxInBeam
[
i
]
=
rand
()
%
count_if
(
begPos
,
begPos
+
beamSize
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
beam
.
groundTruth
[
i
]
=
beam
.
selectedIndices
[
i
*
beamSize
+
beam
.
colIdxInBeam
[
i
]];
beam
.
inBeam
[
i
]
=
1
;
}
else
{
int
label
=
rand
()
%
(
beam
.
seqStartPos
[
i
+
1
]
-
beam
.
seqStartPos
[
i
]);
beam
.
groundTruth
[
i
]
=
label
;
if
(
find
(
begPos
,
endPos
,
real
(
label
))
!=
endPos
)
beam
.
inBeam
[
i
]
=
1
;
begPos
=
endPos
;
real
*
begPos
=
beam
.
selectedIndices
.
data
()
+
i
*
beamSize
;
real
*
endPos
=
begPos
+
beamSize
;
real
*
lblPos
=
find
(
begPos
,
endPos
,
real
(
label
));
if
(
lblPos
!=
endPos
)
{
beam
.
inBeam
[
i
]
=
1
;
beam
.
colIdxInBeam
[
i
]
=
lblPos
-
begPos
;
}
}
beam
.
rowIdxInBeam
[
i
]
=
i
;
}
...
...
@@ -169,22 +207,33 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
for
(
size_t
i
=
2
;
i
<
beamExpansions
.
size
();
++
i
)
{
SingleBeamExpansion
&
curBeam
=
beamExpansions
[
i
];
SingleBeamExpansion
&
prevBeam
=
beamExpansions
[
i
-
1
];
curBeam
.
groundTruth
.
resize
(
seqNum
,
0
);
curBeam
.
inBeam
.
resize
(
seqNum
,
0
);
curBeam
.
rowIdxInBeam
.
resize
(
seqNum
,
-
1
);
curBeam
.
resetGroundTruth
(
seqNum
);
// iterate over each sequence
for
(
size_t
j
=
0
;
j
<
seqNum
;
++
j
)
{
if
(
prevBeam
.
inBeam
[
j
])
{
// gold sequence falls in the beam in previous search.
auto
begPos
=
prevBeam
.
selectedIndices
.
begin
();
auto
endPos
=
begPos
+
prevBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
size_t
totalExpansion
=
prevBeam
.
rowIdxInBeam
[
j
]
*
beamSize
-
count
(
begPos
,
endPos
,
-
1.
);
curBeam
.
rowIdxInBeam
[
j
]
=
totalExpansion
+
prevBeam
.
groundTruth
[
j
];
if
(
!
prevBeam
.
inBeam
[
j
])
continue
;
// gold sequence falls in the beam in previous search.
real
*
begPos
=
prevBeam
.
selectedIndices
.
data
();
int
offset
=
prevBeam
.
rowIdxInBeam
[
j
]
*
beamSize
+
prevBeam
.
colIdxInBeam
[
j
];
curBeam
.
rowIdxInBeam
[
j
]
=
count_if
(
begPos
,
begPos
+
offset
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
if
(
randFloat
()
>
0.5
)
{
// force the randomly generated label falls in the beam by chance 0.5.
// otherwise, when sequence length is relatively long and beam size is
// relatively small, the gold sequences falls off the beam at in
// the first search.
real
*
start
=
curBeam
.
selectedIndices
.
data
()
+
curBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
int
n
=
rand
()
%
count_if
(
start
,
start
+
beamSize
,
[](
const
real
&
val
)
{
return
val
!=
-
1.
;
});
curBeam
.
colIdxInBeam
[
j
]
=
n
;
curBeam
.
groundTruth
[
j
]
=
*
(
start
+
n
);
curBeam
.
inBeam
[
j
]
=
1
;
}
else
{
CHECK_LE
(
curBeam
.
rowIdxInBeam
[
j
]
+
1
,
curBeam
.
subSeqStartPos
.
size
()
-
1
);
int
start
=
curBeam
.
subSeqStartPos
[
curBeam
.
rowIdxInBeam
[
j
]];
...
...
@@ -193,16 +242,14 @@ void genGroundTruth(vector<SingleBeamExpansion>& beamExpansions,
int
label
=
rand
()
%
(
end
-
start
);
curBeam
.
groundTruth
[
j
]
=
label
;
auto
findBeg
=
curBeam
.
selectedIndices
.
begin
()
+
curBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
auto
findEnd
=
findBeg
+
beamSize
;
if
(
find
(
findBeg
,
findEnd
,
real
(
label
))
!=
findEnd
)
real
*
findBeg
=
curBeam
.
selectedIndices
.
data
()
+
curBeam
.
rowIdxInBeam
[
j
]
*
beamSize
;
real
*
lblPos
=
find
(
findBeg
,
findBeg
+
beamSize
,
static_cast
<
real
>
(
label
));
if
(
lblPos
!=
(
findBeg
+
beamSize
))
{
curBeam
.
inBeam
[
j
]
=
1
;
}
else
{
// in previous search, gold sequence has fallen off the beam,
// the beam search stops, here use -1 as a dummy label.
// It will not used in calculation the cost.
beamExpansions
[
i
].
groundTruth
[
j
]
=
-
1
;
curBeam
.
colIdxInBeam
[
j
]
=
lblPos
-
findBeg
;
}
}
}
}
...
...
@@ -230,15 +277,12 @@ void genRandomBeamExpansion(size_t expansionCount,
genGroundTruth
(
beamExpansions
,
beamSize
);
}
void
testCrossEntropyOverBeam
(
bool
useGpu
)
{
void
testCrossEntropyOverBeam
(
bool
useGpu
,
size_t
beamSize
,
vector
<
SingleBeamExpansion
>&
beams
)
{
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"cross_entropy_over_beam"
);
const
size_t
expansionCount
=
3
;
const
size_t
beamSize
=
MAX_BEAM_SIZE
;
vector
<
SingleBeamExpansion
>
beams
;
genRandomBeamExpansion
(
expansionCount
,
beamSize
,
beams
);
size_t
seqNum
=
0
;
for
(
size_t
i
=
1
;
i
<
beams
.
size
();
++
i
)
{
const
SingleBeamExpansion
&
beam
=
beams
[
i
];
...
...
@@ -291,7 +335,17 @@ void testCrossEntropyOverBeam(bool useGpu) {
}
TEST
(
Layer
,
CrossEntropyOverBeam
)
{
for
(
bool
useGpu
:
{
false
,
true
})
testCrossEntropyOverBeam
(
useGpu
);
LOG
(
INFO
)
<<
"SEED = "
<<
SEED
;
const
size_t
beamSize
=
1
+
rand
()
%
MAX_BEAM_SIZE
;
LOG
(
INFO
)
<<
"beamSize = "
<<
beamSize
;
// TODO(caoying): test with more beam expansions.
const
size_t
expansionCount
=
3
;
vector
<
SingleBeamExpansion
>
beams
;
genRandomBeamExpansion
(
expansionCount
,
beamSize
,
beams
);
for
(
bool
useGpu
:
{
false
,
true
})
testCrossEntropyOverBeam
(
useGpu
,
beamSize
,
beams
);
}
int
main
(
int
argc
,
char
**
argv
)
{
...
...
@@ -299,7 +353,7 @@ int main(int argc, char** argv) {
hl_start
();
hl_init
(
FLAGS_gpu_id
);
FLAGS_thread_local_rand_use_global_seed
=
true
;
srand
(
1
);
srand
(
SEED
);
testing
::
InitGoogleTest
(
&
argc
,
argv
);
return
RUN_ALL_TESTS
();
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录