Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
6fa84d40
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6fa84d40
编写于
6月 22, 2017
作者:
C
Cao Ying
提交者:
GitHub
6月 22, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2521 from emailweixu/fix_rnn_gen
Fix bugs for rnn generation.
上级
65aa8941
b6910529
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
114 addition
and
86 deletion
+114
-86
paddle/gserver/gradientmachines/NeuralNetwork.cpp
paddle/gserver/gradientmachines/NeuralNetwork.cpp
+5
-4
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+11
-5
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+1
-0
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+66
-64
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+6
-1
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
+4
-4
paddle/trainer/tests/sample_trainer_rnn_gen.conf
paddle/trainer/tests/sample_trainer_rnn_gen.conf
+3
-3
paddle/utils/CustomStackTrace.h
paddle/utils/CustomStackTrace.h
+5
-1
paddle/utils/tests/test_CustomStackTrace.cpp
paddle/utils/tests/test_CustomStackTrace.cpp
+0
-1
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+13
-3
未找到文件。
paddle/gserver/gradientmachines/NeuralNetwork.cpp
浏览文件 @
6fa84d40
...
...
@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
dataLayers_
[
i
]
->
setData
(
inArgs
[
i
]);
}
gLayerStackTrace
.
set_stage
(
true
);
{
for
(
auto
&
layer
:
layers_
)
{
REGISTER_TIMER_INFO
(
"ForwardTimer"
,
layer
->
getName
().
c_str
());
gLayerStackTrace
.
push
(
layer
->
getName
());
layer
->
forward
(
passType
);
gLayerStackTrace
.
pop
(
layer
->
getName
());
}
}
...
...
@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
for
(
auto
&
layer
:
outputLayers_
)
{
outArgs
->
push_back
(
layer
->
getOutput
());
}
if
(
passType
==
PASS_TEST
)
{
gLayerStackTrace
.
clear
();
}
}
void
NeuralNetwork
::
resetState
()
{
...
...
@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) {
}
void
NeuralNetwork
::
backward
(
const
UpdateCallback
&
callback
)
{
gLayerStackTrace
.
pop
(
""
);
// tell layer trace is during backward.
gLayerStackTrace
.
set_stage
(
false
);
FOR_EACH_R
(
layer
,
layers_
)
{
REGISTER_TIMER_INFO
(
"BackwardTimer"
,
(
*
layer
)
->
getName
().
c_str
());
gLayerStackTrace
.
push
((
*
layer
)
->
getName
());
if
((
*
layer
)
->
needGradient
())
{
(
*
layer
)
->
backward
(
callback
);
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
6fa84d40
...
...
@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init(
});
CHECK
(
subModelConfig
!=
config
.
sub_models
().
end
());
reversed_
=
subModelConfig
->
reversed
();
generating_
=
subModelConfig
->
has_generator
();
inFrameLines_
.
resize
(
subModelConfig
->
in_links_size
());
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
...
...
@@ -538,7 +539,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
The outputs are outFramesLines_[i].agentLayer
*/
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
if
(
generating_
)
{
generateSequence
();
return
;
}
// else forward..
...
...
@@ -569,6 +570,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
}
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
if
(
generating_
)
{
return
;
}
REGISTER_TIMER_INFO
(
"RecurrentBwTime"
,
"RecurrentBwTime"
);
AsyncGpuBlock
asyncGpuBlock
;
for
(
int
i
=
maxSequenceLength_
-
1
;
i
>=
0
;
--
i
)
{
...
...
@@ -1321,11 +1325,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
batchMachineIdVec_
.
clear
();
generator_
.
ids
.
clear
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
if
(
numResults
>
1
)
{
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
Path
&
path
=
finalPaths_
[
i
][
j
];
...
...
@@ -1348,7 +1351,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
}
else
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
CHECK
(
!
finalPaths_
[
i
].
empty
());
generator_
.
ids
=
finalPaths_
[
i
][
0
].
ids
;
generator_
.
ids
.
insert
(
generator_
.
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
end
());
starts
[
i
+
1
]
=
starts
[
i
]
+
finalPaths_
[
i
][
0
].
ids
.
size
();
}
}
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
6fa84d40
...
...
@@ -414,6 +414,7 @@ protected:
std
::
vector
<
int
>
ids
;
// store generated sequences
Argument
outArg
;
// final output argument
};
bool
generating_
;
Generator
generator_
;
std
::
vector
<
std
::
unique_ptr
<
NeuralNetwork
>>
frames_
;
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
6fa84d40
...
...
@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) {
}
}
namespace
{
// dest[index[i]] <- src[i] for each i
void
copyElements
(
const
IVector
&
srcVec
,
const
IVector
&
indexVec
,
IVector
&
destVec
)
{
const
int
*
src
=
srcVec
.
getData
();
const
int
*
index
=
indexVec
.
getData
();
int
*
dest
=
destVec
.
getData
();
int
len
=
indexVec
.
getSize
();
CHECK_EQ
(
srcVec
.
getSize
(),
indexVec
.
getSize
());
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
dest
[
index
[
i
]]
=
src
[
i
];
}
}
}
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
IVectorPtr
realId
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
!
realId
)
return
;
IVector
::
resizeOrCreate
(
output_
.
ids
,
allIds_
->
getSize
(),
useGpu_
);
IVectorPtr
outId
=
output_
.
ids
;
idsVec_
.
resize
(
idIndex_
.
size
());
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
const
IVectorPtr
&
realId
=
realLayers_
[
i
]
->
getOutputLabel
();
idsVec_
[
i
]
=
IVector
::
create
(
allIds_
->
getData
()
+
idIndex_
[
i
],
/* size */
realId
->
getSize
(),
useGpu_
);
execViaCpu
(
&
copyElements
,
*
realId
,
*
idsVec_
[
i
],
*
outId
);
}
}
void
GatherAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
const
MatrixPtr
&
outputGrad
=
getOutputGrad
();
...
...
@@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
if
(
selectionMode_
)
{
forwardWithSelection
(
passType
);
}
else
{
if
(
realOutArg_
.
hasSeq
())
{
forwardSequence
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
}
else
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
if
(
realLayer_
->
getOutput
().
ids
)
{
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
if
(
realLayer_
->
getOutput
().
value
)
{
int
height
=
ids_
->
getSize
();
resetOutput
(
height
,
width
);
const
MatrixPtr
&
outV
=
getOutputValue
();
const
MatrixPtr
&
realV
=
realLayer_
->
getOutputValue
();
outV
->
selectRows
(
*
realV
,
*
ids_
);
}
}
}
...
...
@@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) {
void
ScatterAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
CHECK
(
!
selectionMode_
);
const
MatrixPtr
&
outputGrad
=
realOutArg_
.
grad
;
const
MatrixPtr
&
realGrad
=
realLayer_
->
getOutputGrad
();
if
(
realGrad
)
{
...
...
@@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
int
height
=
0
;
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
// The first element stores first result size
idReal
=
realLayers_
[
i
]
->
getOutputLabel
();
idReal
->
subVecFrom
(
*
idReal
,
1
,
idReal
->
getData
()[
0
]);
}
}
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
CHECK
(
realLayers_
[
i
]
->
getOutputLabel
());
starts
[
i
]
=
height
;
height
+=
realLayers_
[
i
]
->
getOutputLabel
()
->
getSize
();
}
starts
[
realLayers_
.
size
()]
=
height
;
output_
.
sequenceStartPositions
->
getMutableData
(
false
)[
1
]
=
height
;
IVector
::
resizeOrCreate
(
output_
.
ids
,
height
,
false
);
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
output_
.
ids
->
subVec
(
starts
[
i
],
starts
[
i
+
1
]
-
starts
[
i
])
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
else
{
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
void
ScatterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
void
ScatterAgentLayer
::
forwardWithSelection
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
...
@@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) {
AsyncGpuBlock
asyncGpuBlock
;
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
CHECK
(
realOutArg_
.
sequenceStartPositions
);
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
if
(
!
input
.
hasSeq
())
{
if
(
realLayer_
->
getOutput
().
ids
)
{
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
if
(
realLayer_
->
getOutput
().
value
)
{
int
height
=
ids_
->
getSize
();
resetOutput
(
height
,
width
);
const
MatrixPtr
&
outV
=
getOutputValue
();
const
MatrixPtr
&
realV
=
realLayer_
->
getOutputValue
();
outV
->
selectRows
(
*
realV
,
*
ids_
);
}
}
else
{
// Putting the generation logic here is really an ugly hack!
// used in generation
...
...
paddle/gserver/layers/AgentLayer.h
浏览文件 @
6fa84d40
...
...
@@ -110,6 +110,9 @@ protected:
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
// true for setRealLayer, false for setRealLayerAndOutput
bool
selectionMode_
;
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
...
@@ -137,6 +140,7 @@ public:
}
else
{
cpuIds_
=
ids_
;
}
selectionMode_
=
true
;
}
// set real layer and output, [idIndex, idIndex + idSize) of *ids*
...
...
@@ -153,6 +157,7 @@ public:
idIndex_
=
idIndex
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
selectionMode_
=
false
;
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
...
@@ -166,7 +171,7 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
forward
Sequence
(
PassType
passType
);
void
forward
WithSelection
(
PassType
passType
);
};
}
// namespace paddle
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
浏览文件 @
6fa84d40
...
...
@@ -53,7 +53,7 @@ def outer_step(dummy_data):
bos_id
=
0
,
eos_id
=
num_words
-
1
,
beam_size
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
1
,
max_length
=
10
)
return
beam_gen
...
...
paddle/trainer/tests/sample_trainer_rnn_gen.conf
浏览文件 @
6fa84d40
paddle/utils/CustomStackTrace.h
浏览文件 @
6fa84d40
...
...
@@ -55,13 +55,17 @@ public:
* Else, just set status to popping.
*/
void
pop
(
const
T
&
item
)
{
pushing
()
=
false
;
auto
&
s
=
this
->
stack
();
if
(
item
==
s
.
top
())
{
s
.
pop
();
}
}
/**
* @brief Indicate whether we are at forward or backward stage of computation
*/
void
set_stage
(
bool
isForward
)
{
pushing
()
=
isForward
;
}
/**
* @brief clear current thread stack.
*/
...
...
paddle/utils/tests/test_CustomStackTrace.cpp
浏览文件 @
6fa84d40
...
...
@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) {
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
tracer
.
push
(
"layer_"
+
paddle
::
str
::
to_string
(
i
));
}
tracer
.
pop
(
""
);
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
tracer
.
pop
(
"layer_"
+
paddle
::
str
::
to_string
(
layerSize
-
1
-
i
));
}
...
...
python/paddle/v2/layer.py
浏览文件 @
6fa84d40
...
...
@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network']
def
__need_to_keep__
(
name
):
return
name
in
[
'StaticInput'
,
'SubsequenceInput'
,
'GeneratedInput'
,
'LayerType'
,
'layer_support'
'layer_support'
,
'BaseGeneratedInput'
]
def
__need_to_wrap__
(
name
):
return
name
not
in
[
'AggregateLevel'
,
'ExpandLevel'
]
return
name
not
in
[
'AggregateLevel'
,
'ExpandLevel'
,
'BaseGeneratedInput'
]
def
__convert_name__
(
inname
):
...
...
@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names):
return
submodel_names
def
__get_submodel_data_out_links__
():
data_links
=
set
()
for
submodel
in
cp
.
g_config
.
model_config
.
sub_models
:
for
link
in
submodel
.
out_links
:
if
cp
.
g_layer_map
[
link
.
link_name
].
type
==
'data'
:
data_links
.
add
(
link
.
link_name
)
return
data_links
def
__get_used_evaluators__
(
layer_names
):
evaluator_names
=
set
()
for
e
in
cp
.
g_config
.
model_config
.
evaluators
:
...
...
@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None):
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
.
add
(
'root'
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
data_out_links
=
__get_submodel_data_out_links__
()
input_layer_names
=
set
()
output_layer_names
=
set
()
...
...
@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None):
continue
model_config
.
layers
.
extend
([
l
])
if
l
.
type
==
'data'
:
if
l
.
name
in
model_config
.
output_layer_name
s
:
if
l
.
name
in
data_out_link
s
:
"""
In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录