Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
6fa84d40
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6fa84d40
编写于
6月 22, 2017
作者:
C
Cao Ying
提交者:
GitHub
6月 22, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2521 from emailweixu/fix_rnn_gen
Fix bugs for rnn generation.
上级
65aa8941
b6910529
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
114 addition
and
86 deletion
+114
-86
paddle/gserver/gradientmachines/NeuralNetwork.cpp
paddle/gserver/gradientmachines/NeuralNetwork.cpp
+5
-4
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+11
-5
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+1
-0
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+66
-64
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+6
-1
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
+4
-4
paddle/trainer/tests/sample_trainer_rnn_gen.conf
paddle/trainer/tests/sample_trainer_rnn_gen.conf
+3
-3
paddle/utils/CustomStackTrace.h
paddle/utils/CustomStackTrace.h
+5
-1
paddle/utils/tests/test_CustomStackTrace.cpp
paddle/utils/tests/test_CustomStackTrace.cpp
+0
-1
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+13
-3
未找到文件。
paddle/gserver/gradientmachines/NeuralNetwork.cpp
浏览文件 @
6fa84d40
...
...
@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
dataLayers_
[
i
]
->
setData
(
inArgs
[
i
]);
}
gLayerStackTrace
.
set_stage
(
true
);
{
for
(
auto
&
layer
:
layers_
)
{
REGISTER_TIMER_INFO
(
"ForwardTimer"
,
layer
->
getName
().
c_str
());
gLayerStackTrace
.
push
(
layer
->
getName
());
layer
->
forward
(
passType
);
gLayerStackTrace
.
pop
(
layer
->
getName
());
}
}
...
...
@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
for
(
auto
&
layer
:
outputLayers_
)
{
outArgs
->
push_back
(
layer
->
getOutput
());
}
if
(
passType
==
PASS_TEST
)
{
gLayerStackTrace
.
clear
();
}
}
void
NeuralNetwork
::
resetState
()
{
...
...
@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) {
}
void
NeuralNetwork
::
backward
(
const
UpdateCallback
&
callback
)
{
gLayerStackTrace
.
pop
(
""
);
// tell layer trace is during backward.
gLayerStackTrace
.
set_stage
(
false
);
FOR_EACH_R
(
layer
,
layers_
)
{
REGISTER_TIMER_INFO
(
"BackwardTimer"
,
(
*
layer
)
->
getName
().
c_str
());
gLayerStackTrace
.
push
((
*
layer
)
->
getName
());
if
((
*
layer
)
->
needGradient
())
{
(
*
layer
)
->
backward
(
callback
);
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
6fa84d40
...
...
@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init(
});
CHECK
(
subModelConfig
!=
config
.
sub_models
().
end
());
reversed_
=
subModelConfig
->
reversed
();
generating_
=
subModelConfig
->
has_generator
();
inFrameLines_
.
resize
(
subModelConfig
->
in_links_size
());
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
...
...
@@ -538,7 +539,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
The outputs are outFramesLines_[i].agentLayer
*/
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
if
(
generating_
)
{
generateSequence
();
return
;
}
// else forward..
...
...
@@ -569,6 +570,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
}
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
if
(
generating_
)
{
return
;
}
REGISTER_TIMER_INFO
(
"RecurrentBwTime"
,
"RecurrentBwTime"
);
AsyncGpuBlock
asyncGpuBlock
;
for
(
int
i
=
maxSequenceLength_
-
1
;
i
>=
0
;
--
i
)
{
...
...
@@ -1321,11 +1325,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
batchMachineIdVec_
.
clear
();
generator_
.
ids
.
clear
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
if
(
numResults
>
1
)
{
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
Path
&
path
=
finalPaths_
[
i
][
j
];
...
...
@@ -1348,7 +1351,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
}
else
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
CHECK
(
!
finalPaths_
[
i
].
empty
());
generator_
.
ids
=
finalPaths_
[
i
][
0
].
ids
;
generator_
.
ids
.
insert
(
generator_
.
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
end
());
starts
[
i
+
1
]
=
starts
[
i
]
+
finalPaths_
[
i
][
0
].
ids
.
size
();
}
}
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
6fa84d40
...
...
@@ -414,6 +414,7 @@ protected:
std
::
vector
<
int
>
ids
;
// store generated sequences
Argument
outArg
;
// final output argument
};
bool
generating_
;
Generator
generator_
;
std
::
vector
<
std
::
unique_ptr
<
NeuralNetwork
>>
frames_
;
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
6fa84d40
...
...
@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) {
}
}
namespace
{
// dest[index[i]] <- src[i] for each i
void
copyElements
(
const
IVector
&
srcVec
,
const
IVector
&
indexVec
,
IVector
&
destVec
)
{
const
int
*
src
=
srcVec
.
getData
();
const
int
*
index
=
indexVec
.
getData
();
int
*
dest
=
destVec
.
getData
();
int
len
=
indexVec
.
getSize
();
CHECK_EQ
(
srcVec
.
getSize
(),
indexVec
.
getSize
());
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
dest
[
index
[
i
]]
=
src
[
i
];
}
}
}
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
IVectorPtr
realId
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
!
realId
)
return
;
IVector
::
resizeOrCreate
(
output_
.
ids
,
allIds_
->
getSize
(),
useGpu_
);
IVectorPtr
outId
=
output_
.
ids
;
idsVec_
.
resize
(
idIndex_
.
size
());
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
const
IVectorPtr
&
realId
=
realLayers_
[
i
]
->
getOutputLabel
();
idsVec_
[
i
]
=
IVector
::
create
(
allIds_
->
getData
()
+
idIndex_
[
i
],
/* size */
realId
->
getSize
(),
useGpu_
);
execViaCpu
(
&
copyElements
,
*
realId
,
*
idsVec_
[
i
],
*
outId
);
}
}
void
GatherAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
const
MatrixPtr
&
outputGrad
=
getOutputGrad
();
...
...
@@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
hasSeq
())
{
forwardSequence
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
else
{
// used in generation
if
(
realLayer_
->
getOutput
().
ids
)
{
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
if
(
realLayer_
->
getOutput
().
value
)
{
int
height
=
ids_
->
getSize
();
resetOutput
(
height
,
width
);
const
MatrixPtr
&
outV
=
getOutputValue
();
const
MatrixPtr
&
realV
=
realLayer_
->
getOutputValue
();
outV
->
selectRows
(
*
realV
,
*
ids_
);
if
(
selectionMode_
)
{
forwardWithSelection
(
passType
);
}
else
{
if
(
realOutArg_
.
hasSeq
())
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
}
else
{
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
}
}
}
...
...
@@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) {
void
ScatterAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
CHECK
(
!
selectionMode_
);
const
MatrixPtr
&
outputGrad
=
realOutArg_
.
grad
;
const
MatrixPtr
&
realGrad
=
realLayer_
->
getOutputGrad
();
if
(
realGrad
)
{
...
...
@@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
int
height
=
0
;
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
// The first element stores first result size
idReal
=
realLayers_
[
i
]
->
getOutputLabel
();
idReal
->
subVecFrom
(
*
idReal
,
1
,
idReal
->
getData
()[
0
]);
}
}
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
CHECK
(
realLayers_
[
i
]
->
getOutputLabel
());
starts
[
i
]
=
height
;
height
+=
realLayers_
[
i
]
->
getOutputLabel
()
->
getSize
();
}
starts
[
realLayers_
.
size
()]
=
height
;
output_
.
sequenceStartPositions
->
getMutableData
(
false
)[
1
]
=
height
;
IVector
::
resizeOrCreate
(
output_
.
ids
,
height
,
false
);
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
output_
.
ids
->
subVec
(
starts
[
i
],
starts
[
i
+
1
]
-
starts
[
i
])
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
else
{
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
void
ScatterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
void
ScatterAgentLayer
::
forwardWithSelection
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
...
@@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) {
AsyncGpuBlock
asyncGpuBlock
;
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
CHECK
(
realOutArg_
.
sequenceStartPositions
);
output_
.
subArgFrom
(
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
,
/* trans */
false
,
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
/* seqSize */
numSequences_
);
if
(
!
input
.
hasSeq
())
{
if
(
realLayer_
->
getOutput
().
ids
)
{
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
}
if
(
realLayer_
->
getOutput
().
value
)
{
int
height
=
ids_
->
getSize
();
resetOutput
(
height
,
width
);
const
MatrixPtr
&
outV
=
getOutputValue
();
const
MatrixPtr
&
realV
=
realLayer_
->
getOutputValue
();
outV
->
selectRows
(
*
realV
,
*
ids_
);
}
}
else
{
// Putting the generation logic here is really an ugly hack!
// used in generation
...
...
paddle/gserver/layers/AgentLayer.h
浏览文件 @
6fa84d40
...
...
@@ -110,6 +110,9 @@ protected:
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
// true for setRealLayer, false for setRealLayerAndOutput
bool
selectionMode_
;
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
...
@@ -137,6 +140,7 @@ public:
}
else
{
cpuIds_
=
ids_
;
}
selectionMode_
=
true
;
}
// set real layer and output, [idIndex, idIndex + idSize) of *ids*
...
...
@@ -153,6 +157,7 @@ public:
idIndex_
=
idIndex
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
selectionMode_
=
false
;
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
...
@@ -166,7 +171,7 @@ public:
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
forward
Sequence
(
PassType
passType
);
void
forward
WithSelection
(
PassType
passType
);
};
}
// namespace paddle
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
浏览文件 @
6fa84d40
...
...
@@ -35,7 +35,7 @@ def outer_step(dummy_data):
embedding_size
=
num_words
)]
def
inner_step
(
dummy_memory
,
predict_word
):
# simplified RNN for testing
with
mixed_layer
(
size
=
num_words
)
as
layer
:
layer
+=
full_matrix_projection
(
input
=
predict_word
,
...
...
@@ -46,15 +46,15 @@ def outer_step(dummy_data):
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
return
out
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
step
=
inner_step
,
input
=
gen_inputs
,
bos_id
=
0
,
eos_id
=
num_words
-
1
,
beam_size
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
max_length
=
10
)
num_results_per_sample
=
1
,
max_length
=
10
)
return
beam_gen
beam_gen_concat
=
recurrent_group
(
name
=
"rnn_gen_concat"
,
...
...
paddle/trainer/tests/sample_trainer_rnn_gen.conf
浏览文件 @
6fa84d40
...
...
@@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2),
embedding_size
=
num_words
)]
def
step
(
dummy_memory
,
predict_word
):
# simplified RNN for testing
with
mixed_layer
(
size
=
num_words
)
as
layer
:
layer
+=
full_matrix_projection
(
input
=
predict_word
,
...
...
@@ -44,7 +44,7 @@ def step(dummy_memory, predict_word):
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
return
out
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
step
=
step
,
input
=
gen_inputs
,
...
...
@@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen",
eos_id
=
num_words
-
1
,
beam_size
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
max_length
=
10
)
max_length
=
10
)
seqtext_printer_evaluator
(
input
=
beam_gen
,
id_input
=
sent_id
,
...
...
paddle/utils/CustomStackTrace.h
浏览文件 @
6fa84d40
...
...
@@ -55,13 +55,17 @@ public:
* Else, just set status to popping.
*/
void
pop
(
const
T
&
item
)
{
pushing
()
=
false
;
auto
&
s
=
this
->
stack
();
if
(
item
==
s
.
top
())
{
s
.
pop
();
}
}
/**
* @brief Indicate whether we are at forward or backward stage of computation
*/
void
set_stage
(
bool
isForward
)
{
pushing
()
=
isForward
;
}
/**
* @brief clear current thread stack.
*/
...
...
paddle/utils/tests/test_CustomStackTrace.cpp
浏览文件 @
6fa84d40
...
...
@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) {
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
tracer
.
push
(
"layer_"
+
paddle
::
str
::
to_string
(
i
));
}
tracer
.
pop
(
""
);
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
tracer
.
pop
(
"layer_"
+
paddle
::
str
::
to_string
(
layerSize
-
1
-
i
));
}
...
...
python/paddle/v2/layer.py
浏览文件 @
6fa84d40
...
...
@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network']
def
__need_to_keep__
(
name
):
return
name
in
[
'StaticInput'
,
'SubsequenceInput'
,
'GeneratedInput'
,
'LayerType'
,
'layer_support'
'layer_support'
,
'BaseGeneratedInput'
]
def
__need_to_wrap__
(
name
):
return
name
not
in
[
'AggregateLevel'
,
'ExpandLevel'
]
return
name
not
in
[
'AggregateLevel'
,
'ExpandLevel'
,
'BaseGeneratedInput'
]
def
__convert_name__
(
inname
):
...
...
@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names):
return
submodel_names
def
__get_submodel_data_out_links__
():
data_links
=
set
()
for
submodel
in
cp
.
g_config
.
model_config
.
sub_models
:
for
link
in
submodel
.
out_links
:
if
cp
.
g_layer_map
[
link
.
link_name
].
type
==
'data'
:
data_links
.
add
(
link
.
link_name
)
return
data_links
def
__get_used_evaluators__
(
layer_names
):
evaluator_names
=
set
()
for
e
in
cp
.
g_config
.
model_config
.
evaluators
:
...
...
@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None):
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
.
add
(
'root'
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
data_out_links
=
__get_submodel_data_out_links__
()
input_layer_names
=
set
()
output_layer_names
=
set
()
...
...
@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None):
continue
model_config
.
layers
.
extend
([
l
])
if
l
.
type
==
'data'
:
if
l
.
name
in
model_config
.
output_layer_name
s
:
if
l
.
name
in
data_out_link
s
:
"""
In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录