Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7c8acd4f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7c8acd4f
编写于
8月 23, 2017
作者:
C
Cao Ying
提交者:
GitHub
8月 23, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2871 from lcy-seso/print_attention_weight
enable layer group to output a sequence inside it during generation.
上级
14f791f2
3bf44002
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
146 addition
and
64 deletion
+146
-64
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+78
-32
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+38
-9
paddle/parameter/Argument.cpp
paddle/parameter/Argument.cpp
+29
-23
paddle/parameter/Argument.h
paddle/parameter/Argument.h
+1
-0
未找到文件。
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
7c8acd4f
...
...
@@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
/* width */
resultNum
,
false
,
/* useGpu */
false
);
Matrix
::
resizeOrCreate
(
generator_
.
outArg
.
value
,
/* height */
maxGenWordCount
,
/* width */
1
,
false
,
/* useGpu */
false
);
}
ICpuGpuVector
::
resizeOrCreate
(
generator_
.
outArg
.
sequenceStartPositions
,
numSequences
+
1
,
...
...
@@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
}
else
{
oneWaySearch
(
numSequences
);
}
if
(
dataArgsSize_
)
createDataOutlink
(
batchMachineIdVec_
);
if
(
dataArgsSize_
)
createDataOutlink
();
size_t
size
=
generator_
.
ids
.
size
();
generator_
.
outArg
.
ids
->
resize
(
size
);
...
...
@@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
}
batchMachineIdVec_
.
clear
();
batchMachineStartPos_
.
clear
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
generator_
.
ids
.
clear
();
...
...
@@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
finalPaths_
[
i
].
resize
(
minFinalPathsSize
);
}
batchMachineIdVec_
.
clear
();
generator_
.
ids
.
clear
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
if
(
numResults
>
1
)
{
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
int
idsProbSaveSize
=
0
;
for
(
auto
inSeq
:
finalPaths_
)
{
for
(
auto
path
:
inSeq
)
idsProbSaveSize
+=
path
.
ids
.
size
();
idsProbSaveSize
+=
inSeq
.
size
();
}
Matrix
::
resizeOrCreate
(
generator_
.
outArg
.
value
,
idsProbSaveSize
,
1
,
false
,
false
);
real
*
idsProb
=
generator_
.
outArg
.
value
->
getData
();
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
size_t
curPos
=
0
;
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
...
...
@@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
curPos
+=
genLen
;
idsProb
[
curPos
++
]
=
-
1.0
;
probs
[
i
*
numResults
+
j
]
=
path
.
logProb
;
if
(
!
j
&&
dataArgsSize_
)
{
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_
.
insert
(
batchMachineIdVec_
.
end
(),
path
.
machineIdVec
.
begin
(),
path
.
machineIdVec
.
end
());
}
}
starts
[
i
+
1
]
=
generator_
.
ids
.
size
();
}
}
else
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
CHECK
(
!
finalPaths_
[
i
].
empty
());
generator_
.
ids
.
insert
(
generator_
.
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
begin
(),
finalPaths_
[
i
][
0
]
.
ids
.
end
());
starts
[
i
+
1
]
=
starts
[
i
]
+
finalPaths_
[
i
][
0
]
.
ids
.
size
();
Path
&
path
=
finalPaths_
[
i
][
0
];
generator_
.
ids
.
insert
(
generator_
.
ids
.
begin
(),
path
.
ids
.
begin
(),
path
.
ids
.
end
());
starts
[
i
+
1
]
=
starts
[
i
]
+
path
.
ids
.
size
();
}
}
}
...
...
@@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
}
}
void
RecurrentGradientMachine
::
createDataOutlink
(
std
::
vector
<
int
>&
machineIdVec
)
{
size_t
seqNum
=
getBeamSize
()
>
1UL
?
finalPaths_
.
size
()
:
finalPaths_
[
0
].
size
();
std
::
vector
<
int
>
starts
(
seqNum
+
1
,
0
);
for
(
size_t
i
=
0
;
i
<
seqNum
;
++
i
)
{
size_t
seqLen
=
getBeamSize
()
>
1UL
?
finalPaths_
[
i
][
0
].
ids
.
size
()
:
finalPaths_
[
0
][
i
].
ids
.
size
();
starts
[
i
+
1
]
=
starts
[
i
]
+
seqLen
;
void
RecurrentGradientMachine
::
createDataOutlinkSelRowsInfo
(
bool
isSeq
,
std
::
vector
<
Argument
>&
outArgs
)
{
batchMachineIdVec_
.
clear
();
size_t
seqIdx
=
0
;
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
std
::
vector
<
int
>&
machineIdVec
=
finalPaths_
[
i
][
j
].
machineIdVec
;
if
(
isSeq
)
{
for
(
size_t
i
=
0
;
i
<
machineIdVec
.
size
();
++
i
)
{
size_t
rowId
=
machineIdVec
[
i
];
int
*
seqPos
=
outArgs
[
i
].
sequenceStartPositions
->
getMutableData
(
false
);
batchMachineIdVec_
.
push_back
(
seqPos
[
rowId
]);
}
}
else
{
batchMachineIdVec_
.
insert
(
batchMachineIdVec_
.
end
(),
machineIdVec
.
begin
(),
machineIdVec
.
end
());
}
seqIdx
++
;
}
}
}
void
RecurrentGradientMachine
::
createDataOutlinkCopySizeInfo
(
bool
isSeq
,
std
::
vector
<
Argument
>&
outArgs
,
std
::
vector
<
int
>&
copySize
)
{
size_t
totalSeqNum
=
std
::
accumulate
(
finalPaths_
.
begin
(),
finalPaths_
.
end
(),
0UL
,
[](
size_t
a
,
const
std
::
vector
<
Path
>&
b
)
{
return
a
+
b
.
size
();
});
copySize
.
resize
(
totalSeqNum
,
1
);
batchMachineStartPos_
.
resize
(
totalSeqNum
+
1
,
0
);
if
(
isSeq
)
{
ICpuGpuVectorPtr
inputSeqStartPos
=
outArgs
[
0
].
sequenceStartPositions
;
CHECK_EQ
(
static_cast
<
size_t
>
(
inputSeqStartPos
->
getSize
()
-
1
),
getBeamSize
()
>
1
?
finalPaths_
.
size
()
:
finalPaths_
[
0
].
size
());
int
*
starts
=
inputSeqStartPos
->
getMutableData
(
false
);
int
seqId
=
0
;
for
(
int
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
int
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
copySize
[
seqId
]
=
getBeamSize
()
>
1
?
starts
[
i
+
1
]
-
starts
[
i
]
:
starts
[
j
+
1
]
-
starts
[
j
];
batchMachineStartPos_
[
seqId
+
1
]
=
batchMachineStartPos_
[
seqId
]
+
finalPaths_
[
i
][
j
].
ids
.
size
();
seqId
++
;
}
}
}
else
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
[
0
].
size
();
++
i
)
batchMachineStartPos_
[
i
+
1
]
=
batchMachineStartPos_
[
i
]
+
finalPaths_
[
0
][
i
].
ids
.
size
();
}
}
void
RecurrentGradientMachine
::
createDataOutlink
()
{
for
(
size_t
i
=
0
;
i
<
dataArgsSize_
;
i
++
)
{
bool
isSeq
=
dataArgsFrame_
[
i
][
0
].
hasSeq
();
std
::
vector
<
int
>
copySize
;
createDataOutlinkCopySizeInfo
(
isSeq
,
dataArgsFrame_
[
i
],
copySize
);
createDataOutlinkSelRowsInfo
(
isSeq
,
dataArgsFrame_
[
i
]);
dataArgs_
[
i
].
concat
(
dataArgsFrame_
[
i
],
machineIdVec
,
starts
,
batchMachineIdVec_
,
batchMachineStartPos_
,
copySize
,
useGpu_
,
HPPL_STREAM_1
,
PASS_TEST
);
auto
dataAgent
=
dynamic_cast
<
DataLayer
*>
(
outFrameLines_
[
i
+
1
].
agentLayer
.
get
());
CHECK_NOTNULL
(
dataAgent
);
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
7c8acd4f
...
...
@@ -190,7 +190,7 @@ public:
std
::
vector
<
int
>
ids
;
/**
* @brief idsProb, log probability of each generated word
s
.
* @brief idsProb, log probability of each generated word.
*/
std
::
vector
<
real
>
idsProb
;
...
...
@@ -472,15 +472,43 @@ private:
void
copyDataOutlinkFrame
(
size_t
machineCur
);
/*
* @brief In generation, if the layer group has more than 1 outlink, outlinks
* except the first one are data outlinks. This function creates the data
* outlinks.
* @note In beam search, only one generated sequence with the hightest log
* probabilites are retained.
* @param machineIdVec : select a row of output matrix in each frame
* that the generation process expanded.
* @brief In generation, if the layer group has more than 1 outlink, outlink
* except the first one is a data outlink. In RecurrentLayerGroup, each time
* step is a separate Network, outputs of a layer inside the
* RecurrentLayerGroup are stored in separate Arguments. If one layer is
* specified as an outlink of RecurrentLayerGroup. This function will
* collect outputs in each time step of each generated sequence which are
* dispersed in separate Arguments to form a new single Argument as output of
* RecurrentLayerGroup.
*/
void
createDataOutlink
(
std
::
vector
<
int
>&
machineIdVec
);
void
createDataOutlink
();
/*
* @brief decide to select how many rows from the Matrix stored the forward
* pass results from a start position.
*
* @param isSeq: a flag indicating whetehr the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the the returned Arguments of the forward pass
* during the generation process.
* @param copySize: the returned result, number of rows to select from the
* Matrix stored the forward pass results from a start position.
*/
void
createDataOutlinkCopySizeInfo
(
bool
isSeq
,
std
::
vector
<
Argument
>&
outArgs
,
std
::
vector
<
int
>&
copySize
);
/*
* @brief decide index of the start row for each time step of a generated
* sequence in Matrix stored the entire beam search batch's forward pass
* results.
*
* @param isSeq: a flag indicating whether the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the returned Arguments of the forward pass
* during the generation process.
*/
void
createDataOutlinkSelRowsInfo
(
bool
isSeq
,
std
::
vector
<
Argument
>&
outArgs
);
/*
* @brief used in beam search, connect previous frame to form recurrent link
...
...
@@ -543,6 +571,7 @@ private:
std
::
vector
<
int
>
topIds_
;
std
::
vector
<
int
>
seqIds_
;
std
::
vector
<
int
>
batchMachineIdVec_
;
std
::
vector
<
int
>
batchMachineStartPos_
;
std
::
vector
<
std
::
vector
<
Path
>>
finalPaths_
;
std
::
vector
<
real
>
minFinalPathLogProb_
;
BeamSearchControlCallbacks
*
beamSearchCtrlCallbacks_
;
...
...
paddle/parameter/Argument.cpp
浏览文件 @
7c8acd4f
...
...
@@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
void
Argument
::
concat
(
const
std
::
vector
<
Argument
>&
args
,
const
std
::
vector
<
int
>&
selectRows
,
const
std
::
vector
<
int
>&
seqStartPos
,
const
std
::
vector
<
int
>&
copySize
,
bool
useGpu
,
hl_stream_t
stream
,
PassType
passType
)
{
CHECK
(
!
subSequenceStartPositions
)
<<
"undefined behavior for subsequence positions"
;
size_t
batchSize
=
selectRows
.
size
();
size_t
batchSize
=
0
;
for
(
size_t
i
=
0
;
i
<
copySize
.
size
();
++
i
)
batchSize
+=
copySize
[
i
]
*
(
seqStartPos
[
i
+
1
]
-
seqStartPos
[
i
]);
auto
copyArg
=
[
batchSize
,
stream
](
MatrixPtr
&
dst
,
MatrixPtr
src
,
int
s
tartRow
,
int
pos
,
int
desS
tartRow
,
int
srcStartRow
,
int
size
,
bool
useGpu
)
{
if
(
!
src
)
{
...
...
@@ -300,14 +304,14 @@ void Argument::concat(const std::vector<Argument>& args,
dst
->
resize
(
batchSize
,
width
);
}
MatrixPtr
tmpMatrix
=
dst
->
subMatrix
(
s
tartRow
,
size
);
tmpMatrix
->
copyFrom
(
*
src
->
subMatrix
(
pos
,
size
),
stream
);
MatrixPtr
tmpMatrix
=
dst
->
subMatrix
(
desS
tartRow
,
size
);
tmpMatrix
->
copyFrom
(
*
src
->
subMatrix
(
srcStartRow
,
size
),
stream
);
};
auto
copyIds
=
[
batchSize
,
stream
](
IVectorPtr
&
dst
,
const
IVectorPtr
&
src
,
int
s
tartRow
,
int
pos
,
int
desS
tartRow
,
int
srcStartRow
,
int
size
,
bool
useGpu
)
{
if
(
!
src
)
{
...
...
@@ -315,13 +319,14 @@ void Argument::concat(const std::vector<Argument>& args,
return
;
}
IVector
::
resizeOrCreate
(
dst
,
batchSize
,
useGpu
);
dst
->
subVec
(
startRow
,
size
)
->
copyFrom
(
*
src
->
subVec
(
pos
,
size
),
stream
);
dst
->
subVec
(
desStartRow
,
size
)
->
copyFrom
(
*
src
->
subVec
(
srcStartRow
,
size
),
stream
);
};
auto
copyStrs
=
[
batchSize
,
stream
](
SVectorPtr
&
dst
,
const
SVectorPtr
&
src
,
int
s
tartRow
,
int
pos
,
int
desS
tartRow
,
int
srcStartRow
,
int
size
,
bool
useGpu
)
{
if
(
!
src
)
{
...
...
@@ -333,30 +338,31 @@ void Argument::concat(const std::vector<Argument>& args,
}
else
{
dst
->
resize
(
batchSize
);
}
std
::
copy
(
src
->
begin
()
+
pos
,
src
->
begin
()
+
pos
+
size
,
dst
->
begin
()
+
startRow
);
std
::
copy
(
src
->
begin
()
+
srcStartRow
,
src
->
begin
()
+
srcStartRow
+
size
,
dst
->
begin
()
+
desStartRow
);
};
dataId
=
args
[
0
].
dataId
;
CHECK_NE
(
seqStartPos
.
size
(),
0UL
);
size_t
sampleNum
=
seqStartPos
.
size
()
-
1
;
for
(
size_t
i
=
0
;
i
<
sampleNum
;
++
i
)
{
int
desStartRow
=
0
;
for
(
size_t
i
=
0
;
i
<
copySize
.
size
()
;
++
i
)
{
int
startPos
=
seqStartPos
[
i
];
int
endPos
=
seqStartPos
[
i
+
1
];
CHECK_GE
(
args
.
size
(),
static_cast
<
size_t
>
(
endPos
-
startPos
));
for
(
int
j
=
startPos
;
j
<
endPos
;
++
j
)
{
const
Argument
&
arg
=
args
[
j
-
startPos
];
CHECK_EQ
(
arg
.
dataId
,
dataId
)
<<
"Arguments in concat should have"
<<
" same dataId"
;
const
int
copySize
=
1
;
const
int
rowIdx
=
selectRows
[
j
];
copyArg
(
in
,
arg
.
in
,
j
,
rowIdx
,
copySize
,
useGpu
);
copyArg
(
value
,
arg
.
value
,
j
,
rowIdx
,
copySize
,
useGpu
);
CHECK_EQ
(
arg
.
dataId
,
dataId
)
<<
"Arguments to concatenate should have "
<<
"the same dataId."
;
const
int
srcStartRow
=
selectRows
[
j
];
copyArg
(
in
,
arg
.
in
,
desStartRow
,
srcStartRow
,
copySize
[
i
],
useGpu
);
copyArg
(
value
,
arg
.
value
,
desStartRow
,
srcStartRow
,
copySize
[
i
],
useGpu
);
if
(
passType
!=
PASS_TEST
)
{
copyArg
(
grad
,
arg
.
grad
,
j
,
rowIdx
,
copySize
,
useGpu
);
copyArg
(
grad
,
arg
.
grad
,
desStartRow
,
srcStartRow
,
copySize
[
i
]
,
useGpu
);
}
copyIds
(
ids
,
arg
.
ids
,
j
,
rowIdx
,
copySize
,
useGpu
);
copyStrs
(
strs
,
arg
.
strs
,
j
,
rowIdx
,
copySize
,
useGpu
);
copyIds
(
ids
,
arg
.
ids
,
desStartRow
,
srcStartRow
,
copySize
[
i
],
useGpu
);
copyStrs
(
strs
,
arg
.
strs
,
desStartRow
,
srcStartRow
,
copySize
[
i
],
useGpu
);
desStartRow
+=
copySize
[
i
];
}
}
ICpuGpuVector
::
resizeOrCreate
(
...
...
paddle/parameter/Argument.h
浏览文件 @
7c8acd4f
...
...
@@ -240,6 +240,7 @@ struct Argument {
void
concat
(
const
std
::
vector
<
Argument
>&
args
,
const
std
::
vector
<
int
>&
selectRows
,
const
std
::
vector
<
int
>&
seqStartPos
,
const
std
::
vector
<
int
>&
copySize
,
bool
useGpu
,
hl_stream_t
stream
,
PassType
passType
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录