Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e732bdd4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e732bdd4
编写于
6月 22, 2017
作者:
Y
Yi Wang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/paddlepaddle/paddle
into memory_design
上级
0a92908b
6b3c33d0
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
153 addition
and
107 deletion
+153
-107
.travis.yml
.travis.yml
+1
-0
paddle/gserver/gradientmachines/NeuralNetwork.cpp
paddle/gserver/gradientmachines/NeuralNetwork.cpp
+39
-5
paddle/gserver/gradientmachines/NeuralNetwork.h
paddle/gserver/gradientmachines/NeuralNetwork.h
+2
-0
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
+13
-23
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
+1
-2
paddle/gserver/layers/AgentLayer.cpp
paddle/gserver/layers/AgentLayer.cpp
+66
-64
paddle/gserver/layers/AgentLayer.h
paddle/gserver/layers/AgentLayer.h
+6
-1
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
+4
-4
paddle/trainer/tests/sample_trainer_rnn_gen.conf
paddle/trainer/tests/sample_trainer_rnn_gen.conf
+3
-3
paddle/utils/CustomStackTrace.h
paddle/utils/CustomStackTrace.h
+5
-1
paddle/utils/tests/test_CustomStackTrace.cpp
paddle/utils/tests/test_CustomStackTrace.cpp
+0
-1
python/paddle/v2/layer.py
python/paddle/v2/layer.py
+13
-3
未找到文件。
.travis.yml
浏览文件 @
e732bdd4
group
:
deprecated-2017Q2
language
:
cpp
language
:
cpp
cache
:
cache
:
directories
:
directories
:
...
...
paddle/gserver/gradientmachines/NeuralNetwork.cpp
浏览文件 @
e732bdd4
...
@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
...
@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
dataLayers_
[
i
]
->
setData
(
inArgs
[
i
]);
dataLayers_
[
i
]
->
setData
(
inArgs
[
i
]);
}
}
gLayerStackTrace
.
set_stage
(
true
);
{
{
for
(
auto
&
layer
:
layers_
)
{
for
(
auto
&
layer
:
layers_
)
{
REGISTER_TIMER_INFO
(
"ForwardTimer"
,
layer
->
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"ForwardTimer"
,
layer
->
getName
().
c_str
());
gLayerStackTrace
.
push
(
layer
->
getName
());
gLayerStackTrace
.
push
(
layer
->
getName
());
layer
->
forward
(
passType
);
layer
->
forward
(
passType
);
gLayerStackTrace
.
pop
(
layer
->
getName
());
}
}
}
}
...
@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
...
@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
for
(
auto
&
layer
:
outputLayers_
)
{
for
(
auto
&
layer
:
outputLayers_
)
{
outArgs
->
push_back
(
layer
->
getOutput
());
outArgs
->
push_back
(
layer
->
getOutput
());
}
}
if
(
passType
==
PASS_TEST
)
{
gLayerStackTrace
.
clear
();
}
}
}
void
NeuralNetwork
::
resetState
()
{
void
NeuralNetwork
::
resetState
()
{
...
@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) {
...
@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) {
}
}
void
NeuralNetwork
::
backward
(
const
UpdateCallback
&
callback
)
{
void
NeuralNetwork
::
backward
(
const
UpdateCallback
&
callback
)
{
gLayerStackTrace
.
pop
(
""
);
// tell layer trace is during backward.
gLayerStackTrace
.
set_stage
(
false
);
FOR_EACH_R
(
layer
,
layers_
)
{
FOR_EACH_R
(
layer
,
layers_
)
{
REGISTER_TIMER_INFO
(
"BackwardTimer"
,
(
*
layer
)
->
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"BackwardTimer"
,
(
*
layer
)
->
getName
().
c_str
());
gLayerStackTrace
.
push
((
*
layer
)
->
getName
());
if
((
*
layer
)
->
needGradient
())
{
if
((
*
layer
)
->
needGradient
())
{
(
*
layer
)
->
backward
(
callback
);
(
*
layer
)
->
backward
(
callback
);
}
}
...
@@ -320,7 +321,7 @@ public:
...
@@ -320,7 +321,7 @@ public:
}
}
}
}
virtual
void
eval
(
const
NeuralNetwork
&
nn
)
{
virtual
void
eval
(
const
NeuralNetwork
&
nn
)
override
{
for
(
auto
&
evaluator
:
evaluators_
)
{
for
(
auto
&
evaluator
:
evaluators_
)
{
evaluator
->
eval
(
nn
);
evaluator
->
eval
(
nn
);
}
}
...
@@ -395,6 +396,30 @@ private:
...
@@ -395,6 +396,30 @@ private:
}
}
};
};
class
SubnetEvaluator
:
public
CombinedEvaluator
{
public:
SubnetEvaluator
(
const
std
::
string
&
layerName
,
std
::
unique_ptr
<
Evaluator
>&&
evaluator
)
:
layerName_
(
layerName
)
{
addEvaluator
(
std
::
move
(
evaluator
));
}
virtual
void
eval
(
const
NeuralNetwork
&
nn
)
override
{
const
LayerPtr
&
layer
=
nn
.
getLayer
(
layerName_
);
CHECK
(
layer
)
<<
"Nonexisted layer: "
<<
layerName_
<<
" in submodel "
<<
nn
.
getName
();
bool
accessed
=
false
;
layer
->
accessSubNetwork
([
this
,
&
accessed
](
NeuralNetwork
&
subnet
)
{
subnet
.
eval
(
evaluators_
[
0
].
get
());
accessed
=
true
;
});
CHECK
(
accessed
)
<<
"There is no subnetwork for layer "
<<
layerName_
<<
" in submodel "
<<
nn
.
getName
();
}
protected:
std
::
string
layerName_
;
};
Evaluator
*
NeuralNetwork
::
makeEvaluator
()
const
{
Evaluator
*
NeuralNetwork
::
makeEvaluator
()
const
{
CombinedEvaluator
*
combinedEvaluator
=
new
CombinedEvaluator
();
CombinedEvaluator
*
combinedEvaluator
=
new
CombinedEvaluator
();
auto
subModelConfig
=
std
::
find_if
(
config_
.
sub_models
().
begin
(),
auto
subModelConfig
=
std
::
find_if
(
config_
.
sub_models
().
begin
(),
...
@@ -421,6 +446,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const {
...
@@ -421,6 +446,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const {
combinedEvaluator
->
addEvaluator
(
std
::
move
(
evaluator
));
combinedEvaluator
->
addEvaluator
(
std
::
move
(
evaluator
));
}
}
}
}
for
(
auto
&
layer
:
layers_
)
{
layer
->
accessSubNetwork
(
[
layer
,
combinedEvaluator
](
NeuralNetwork
&
subnet
)
{
std
::
unique_ptr
<
Evaluator
>
subEvaluator
(
new
SubnetEvaluator
(
layer
->
getName
(),
std
::
unique_ptr
<
Evaluator
>
(
subnet
.
makeEvaluator
())));
combinedEvaluator
->
addEvaluator
(
std
::
move
(
subEvaluator
));
});
}
}
else
{
}
else
{
for
(
const
EvaluatorConfig
&
evalConfig
:
config_
.
evaluators
())
{
for
(
const
EvaluatorConfig
&
evalConfig
:
config_
.
evaluators
())
{
std
::
unique_ptr
<
Evaluator
>
evaluator
(
Evaluator
::
create
(
evalConfig
));
std
::
unique_ptr
<
Evaluator
>
evaluator
(
Evaluator
::
create
(
evalConfig
));
...
...
paddle/gserver/gradientmachines/NeuralNetwork.h
浏览文件 @
e732bdd4
...
@@ -129,6 +129,8 @@ public:
...
@@ -129,6 +129,8 @@ public:
static
NeuralNetwork
*
newNeuralNetwork
(
const
std
::
string
&
name
=
""
,
static
NeuralNetwork
*
newNeuralNetwork
(
const
std
::
string
&
name
=
""
,
NeuralNetwork
*
rootNetwork
=
nullptr
);
NeuralNetwork
*
rootNetwork
=
nullptr
);
const
std
::
string
&
getName
()
const
{
return
subModelName_
;
}
protected:
protected:
/**
/**
* The constructor of NeuralNetwork.
* The constructor of NeuralNetwork.
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
浏览文件 @
e732bdd4
...
@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init(
...
@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init(
});
});
CHECK
(
subModelConfig
!=
config
.
sub_models
().
end
());
CHECK
(
subModelConfig
!=
config
.
sub_models
().
end
());
reversed_
=
subModelConfig
->
reversed
();
reversed_
=
subModelConfig
->
reversed
();
generating_
=
subModelConfig
->
has_generator
();
inFrameLines_
.
resize
(
subModelConfig
->
in_links_size
());
inFrameLines_
.
resize
(
subModelConfig
->
in_links_size
());
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
inFrameLines_
.
size
();
++
i
)
{
...
@@ -287,10 +288,6 @@ void RecurrentGradientMachine::init(
...
@@ -287,10 +288,6 @@ void RecurrentGradientMachine::init(
parameterIds_
.
push_back
(
para
->
getID
());
parameterIds_
.
push_back
(
para
->
getID
());
}
}
}
}
if
(
subModelConfig
->
evaluator_names_size
()
>
0
)
{
evaluator_
.
reset
(
frames_
[
0
]
->
makeEvaluator
());
}
}
}
void
RecurrentGradientMachine
::
resizeOrCreateFrames
(
int
numFrames
)
{
void
RecurrentGradientMachine
::
resizeOrCreateFrames
(
int
numFrames
)
{
...
@@ -538,7 +535,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -538,7 +535,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
The outputs are outFramesLines_[i].agentLayer
The outputs are outFramesLines_[i].agentLayer
*/
*/
if
(
inFrameLines_
.
empty
()
&&
passType
==
PASS_TEST
)
{
if
(
generating_
)
{
generateSequence
();
generateSequence
();
return
;
return
;
}
// else forward..
}
// else forward..
...
@@ -561,14 +558,14 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
...
@@ -561,14 +558,14 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
std
::
vector
<
Argument
>
outArgs
;
std
::
vector
<
Argument
>
outArgs
;
frames_
[
i
]
->
forward
(
inArgs
,
&
outArgs
,
passType
);
frames_
[
i
]
->
forward
(
inArgs
,
&
outArgs
,
passType
);
}
}
if
(
evaluator_
&&
passType
==
PASS_TEST
)
{
this
->
eval
(
evaluator_
.
get
());
}
reorganizeOutput
(
passType
);
reorganizeOutput
(
passType
);
}
}
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
void
RecurrentGradientMachine
::
backward
(
const
UpdateCallback
&
callback
)
{
if
(
generating_
)
{
return
;
}
REGISTER_TIMER_INFO
(
"RecurrentBwTime"
,
"RecurrentBwTime"
);
REGISTER_TIMER_INFO
(
"RecurrentBwTime"
,
"RecurrentBwTime"
);
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
for
(
int
i
=
maxSequenceLength_
-
1
;
i
>=
0
;
--
i
)
{
for
(
int
i
=
maxSequenceLength_
-
1
;
i
>=
0
;
--
i
)
{
...
@@ -577,11 +574,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) {
...
@@ -577,11 +574,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) {
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
for
(
auto
&
memoryFrameLine
:
memoryFrameLines_
)
{
memoryFrameLine
.
bootLayer
->
backward
(
nullptr
);
memoryFrameLine
.
bootLayer
->
backward
(
nullptr
);
}
}
// call printers here so the gradient can be printed
if
(
evaluator_
)
{
this
->
eval
(
evaluator_
.
get
());
}
}
}
void
RecurrentGradientMachine
::
forwardBackward
(
void
RecurrentGradientMachine
::
forwardBackward
(
...
@@ -595,9 +587,9 @@ void RecurrentGradientMachine::forwardBackward(
...
@@ -595,9 +587,9 @@ void RecurrentGradientMachine::forwardBackward(
void
RecurrentGradientMachine
::
eval
(
Evaluator
*
evaluator
)
const
{
void
RecurrentGradientMachine
::
eval
(
Evaluator
*
evaluator
)
const
{
// call printers frame by frame
// call printers frame by frame
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
maxSequenceLength_
;
++
i
)
{
LOG
(
INFO
)
<<
"Recurrent Layer Group eval frame "
<<
i
<<
" begin"
;
VLOG
(
2
)
<<
"Recurrent Layer Group eval frame "
<<
i
<<
" begin"
;
evaluator
->
eval
(
*
(
frames_
[
i
].
get
()));
evaluator
->
eval
(
*
(
frames_
[
i
].
get
()));
LOG
(
INFO
)
<<
"Recurrent Layer Group eval frame "
<<
i
<<
" end"
;
VLOG
(
2
)
<<
"Recurrent Layer Group eval frame "
<<
i
<<
" end"
;
}
}
}
}
...
@@ -1093,10 +1085,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
...
@@ -1093,10 +1085,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
copyDataOutlinkFrame
(
machineCur
);
copyDataOutlinkFrame
(
machineCur
);
// call value printer
if
(
evaluator_
)
{
evaluator_
->
eval
(
*
(
frames_
[
machineCur
].
get
()));
}
// check eos
// check eos
const
IVectorPtr
&
eosVec
=
const
IVectorPtr
&
eosVec
=
eosFrameLine_
->
layers
[
machineCur
]
->
getOutput
().
ids
;
eosFrameLine_
->
layers
[
machineCur
]
->
getOutput
().
ids
;
...
@@ -1321,11 +1309,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
...
@@ -1321,11 +1309,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
batchMachineIdVec_
.
clear
();
batchMachineIdVec_
.
clear
();
generator_
.
ids
.
clear
();
generator_
.
ids
.
clear
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
if
(
numResults
>
1
)
{
if
(
numResults
>
1
)
{
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
real
*
probs
=
generator_
.
outArg
.
in
->
getData
();
int
*
starts
=
generator_
.
outArg
.
sequenceStartPositions
->
getMutableData
(
false
);
starts
[
0
]
=
0
;
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
finalPaths_
[
i
].
size
();
++
j
)
{
Path
&
path
=
finalPaths_
[
i
][
j
];
Path
&
path
=
finalPaths_
[
i
][
j
];
...
@@ -1348,7 +1335,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
...
@@ -1348,7 +1335,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
}
else
{
}
else
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
finalPaths_
.
size
();
++
i
)
{
CHECK
(
!
finalPaths_
[
i
].
empty
());
CHECK
(
!
finalPaths_
[
i
].
empty
());
generator_
.
ids
=
finalPaths_
[
i
][
0
].
ids
;
generator_
.
ids
.
insert
(
generator_
.
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
begin
(),
finalPaths_
[
i
][
0
].
ids
.
end
());
starts
[
i
+
1
]
=
starts
[
i
]
+
finalPaths_
[
i
][
0
].
ids
.
size
();
}
}
}
}
}
}
...
...
paddle/gserver/gradientmachines/RecurrentGradientMachine.h
浏览文件 @
e732bdd4
...
@@ -414,6 +414,7 @@ protected:
...
@@ -414,6 +414,7 @@ protected:
std
::
vector
<
int
>
ids
;
// store generated sequences
std
::
vector
<
int
>
ids
;
// store generated sequences
Argument
outArg
;
// final output argument
Argument
outArg
;
// final output argument
};
};
bool
generating_
;
Generator
generator_
;
Generator
generator_
;
std
::
vector
<
std
::
unique_ptr
<
NeuralNetwork
>>
frames_
;
std
::
vector
<
std
::
unique_ptr
<
NeuralNetwork
>>
frames_
;
...
@@ -428,8 +429,6 @@ protected:
...
@@ -428,8 +429,6 @@ protected:
std
::
vector
<
int
>
std
::
vector
<
int
>
parameterIds_
;
// parameters actually used by this Layer Group
parameterIds_
;
// parameters actually used by this Layer Group
std
::
unique_ptr
<
Evaluator
>
evaluator_
;
// frame printers in this layer group
// store final argument of outFrameLines_
// store final argument of outFrameLines_
std
::
vector
<
Argument
>
dataArgs_
;
std
::
vector
<
Argument
>
dataArgs_
;
// store each frame's output argument of outFrameLines_
// store each frame's output argument of outFrameLines_
...
...
paddle/gserver/layers/AgentLayer.cpp
浏览文件 @
e732bdd4
...
@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) {
...
@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) {
}
}
}
}
namespace
{
// dest[index[i]] <- src[i] for each i
void
copyElements
(
const
IVector
&
srcVec
,
const
IVector
&
indexVec
,
IVector
&
destVec
)
{
const
int
*
src
=
srcVec
.
getData
();
const
int
*
index
=
indexVec
.
getData
();
int
*
dest
=
destVec
.
getData
();
int
len
=
indexVec
.
getSize
();
CHECK_EQ
(
srcVec
.
getSize
(),
indexVec
.
getSize
());
for
(
int
i
=
0
;
i
<
len
;
++
i
)
{
dest
[
index
[
i
]]
=
src
[
i
];
}
}
}
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
IVectorPtr
realId
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
!
realId
)
return
;
IVector
::
resizeOrCreate
(
output_
.
ids
,
allIds_
->
getSize
(),
useGpu_
);
IVectorPtr
outId
=
output_
.
ids
;
idsVec_
.
resize
(
idIndex_
.
size
());
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
const
IVectorPtr
&
realId
=
realLayers_
[
i
]
->
getOutputLabel
();
idsVec_
[
i
]
=
IVector
::
create
(
allIds_
->
getData
()
+
idIndex_
[
i
],
/* size */
realId
->
getSize
(),
useGpu_
);
execViaCpu
(
&
copyElements
,
*
realId
,
*
idsVec_
[
i
],
*
outId
);
}
}
void
GatherAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
void
GatherAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
(
void
)
callback
;
const
MatrixPtr
&
outputGrad
=
getOutputGrad
();
const
MatrixPtr
&
outputGrad
=
getOutputGrad
();
...
@@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) {
...
@@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
int
width
=
this
->
getSize
();
int
width
=
this
->
getSize
();
if
(
realOutArg_
.
hasSeq
())
{
if
(
selectionMode_
)
{
forwardSequence
(
passType
);
forwardWithSelection
(
passType
);
}
else
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
}
else
{
output_
.
subArgFrom
(
if
(
realOutArg_
.
hasSeq
())
{
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
output_
.
subArgFrom
(
realOutArg_
,
}
else
{
// used in generation
/* offset */
idIndex_
,
if
(
realLayer_
->
getOutput
().
ids
)
{
idSize_
,
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
width
,
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
useGpu_
,
}
/* trans */
false
,
if
(
realLayer_
->
getOutput
().
value
)
{
/* seqFlag */
true
,
int
height
=
ids_
->
getSize
();
/* seqStart */
seqStartPosIndex_
,
resetOutput
(
height
,
width
);
/* seqSize */
numSequences_
);
}
else
{
const
MatrixPtr
&
outV
=
getOutputValue
();
output_
.
subArgFrom
(
const
MatrixPtr
&
realV
=
realLayer_
->
getOutputValue
();
realOutArg_
,
/* offset */
idIndex_
,
idSize_
,
width
,
useGpu_
);
outV
->
selectRows
(
*
realV
,
*
ids_
);
}
}
}
}
}
}
...
@@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) {
...
@@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) {
void
ScatterAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
void
ScatterAgentLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
(
void
)
callback
;
CHECK
(
!
selectionMode_
);
const
MatrixPtr
&
outputGrad
=
realOutArg_
.
grad
;
const
MatrixPtr
&
outputGrad
=
realOutArg_
.
grad
;
const
MatrixPtr
&
realGrad
=
realLayer_
->
getOutputGrad
();
const
MatrixPtr
&
realGrad
=
realLayer_
->
getOutputGrad
();
if
(
realGrad
)
{
if
(
realGrad
)
{
...
@@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
...
@@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
gather_agent
,
GatherAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
REGISTER_LAYER
(
scatter_agent
,
ScatterAgentLayer
);
void
GatherAgentLayer
::
forwardIds
(
PassType
passType
)
{
void
ScatterAgentLayer
::
forwardWithSelection
(
PassType
passType
)
{
int
height
=
0
;
IVectorPtr
idReal
=
realLayers_
[
0
]
->
getOutputLabel
();
if
(
!
idReal
)
return
;
if
(
output_
.
subSequenceStartPositions
)
{
int
*
starts
=
output_
.
subSequenceStartPositions
->
getMutableData
(
false
);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if
(
idReal
->
getData
()[
idReal
->
getSize
()
-
1
]
==
-
1
)
{
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
// The first element stores first result size
idReal
=
realLayers_
[
i
]
->
getOutputLabel
();
idReal
->
subVecFrom
(
*
idReal
,
1
,
idReal
->
getData
()[
0
]);
}
}
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
CHECK
(
realLayers_
[
i
]
->
getOutputLabel
());
starts
[
i
]
=
height
;
height
+=
realLayers_
[
i
]
->
getOutputLabel
()
->
getSize
();
}
starts
[
realLayers_
.
size
()]
=
height
;
output_
.
sequenceStartPositions
->
getMutableData
(
false
)[
1
]
=
height
;
IVector
::
resizeOrCreate
(
output_
.
ids
,
height
,
false
);
for
(
size_t
i
=
0
;
i
<
realLayers_
.
size
();
++
i
)
{
output_
.
ids
->
subVec
(
starts
[
i
],
starts
[
i
+
1
]
-
starts
[
i
])
->
copyFrom
(
*
realLayers_
[
i
]
->
getOutputLabel
());
}
}
else
{
LOG
(
FATAL
)
<<
"Not implemented"
;
}
}
void
ScatterAgentLayer
::
forwardSequence
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
Layer
::
forward
(
passType
);
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
CHECK_EQ
(
realLayer_
->
getDeviceId
(),
this
->
getDeviceId
());
...
@@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) {
...
@@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) {
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"SequenceAgentLayerForward"
,
getName
().
c_str
());
if
(
realOutArg_
.
value
||
realOutArg_
.
ids
)
{
if
(
!
input
.
hasSeq
())
{
CHECK
(
realOutArg_
.
sequenceStartPositions
);
if
(
realLayer_
->
getOutput
().
ids
)
{
output_
.
subArgFrom
(
realOutArg_
,
IVector
::
resizeOrCreate
(
output_
.
ids
,
ids_
->
getSize
(),
useGpu_
);
/* offset */
idIndex_
,
output_
.
ids
->
selectFrom
(
*
realLayer_
->
getOutput
().
ids
,
*
ids_
);
idSize_
,
}
width
,
if
(
realLayer_
->
getOutput
().
value
)
{
useGpu_
,
int
height
=
ids_
->
getSize
();
/* trans */
false
,
resetOutput
(
height
,
width
);
/* seqFlag */
true
,
/* seqStart */
seqStartPosIndex_
,
const
MatrixPtr
&
outV
=
getOutputValue
();
/* seqSize */
numSequences_
);
const
MatrixPtr
&
realV
=
realLayer_
->
getOutputValue
();
outV
->
selectRows
(
*
realV
,
*
ids_
);
}
}
else
{
}
else
{
// Putting the generation logic here is really an ugly hack!
// Putting the generation logic here is really an ugly hack!
// used in generation
// used in generation
...
...
paddle/gserver/layers/AgentLayer.h
浏览文件 @
e732bdd4
...
@@ -110,6 +110,9 @@ protected:
...
@@ -110,6 +110,9 @@ protected:
// of real layer.
// of real layer.
ICpuGpuVectorPtr
inputStartPos_
;
ICpuGpuVectorPtr
inputStartPos_
;
// true for setRealLayer, false for setRealLayerAndOutput
bool
selectionMode_
;
public:
public:
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
explicit
ScatterAgentLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
...
@@ -137,6 +140,7 @@ public:
...
@@ -137,6 +140,7 @@ public:
}
else
{
}
else
{
cpuIds_
=
ids_
;
cpuIds_
=
ids_
;
}
}
selectionMode_
=
true
;
}
}
// set real layer and output, [idIndex, idIndex + idSize) of *ids*
// set real layer and output, [idIndex, idIndex + idSize) of *ids*
...
@@ -153,6 +157,7 @@ public:
...
@@ -153,6 +157,7 @@ public:
idIndex_
=
idIndex
;
idIndex_
=
idIndex
;
idSize_
=
idSize
;
idSize_
=
idSize
;
handleBackward_
=
handleBackward
;
handleBackward_
=
handleBackward
;
selectionMode_
=
false
;
}
}
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
void
setSequenceStartPositions
(
const
ICpuGpuVectorPtr
&
sequenceStartPositions
,
...
@@ -166,7 +171,7 @@ public:
...
@@ -166,7 +171,7 @@ public:
void
forward
(
PassType
passType
)
override
;
void
forward
(
PassType
passType
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
backward
(
const
UpdateCallback
&
callback
)
override
;
void
forward
Sequence
(
PassType
passType
);
void
forward
WithSelection
(
PassType
passType
);
};
};
}
// namespace paddle
}
// namespace paddle
paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf
浏览文件 @
e732bdd4
...
@@ -35,7 +35,7 @@ def outer_step(dummy_data):
...
@@ -35,7 +35,7 @@ def outer_step(dummy_data):
embedding_size
=
num_words
)]
embedding_size
=
num_words
)]
def
inner_step
(
dummy_memory
,
predict_word
):
def
inner_step
(
dummy_memory
,
predict_word
):
# simplified RNN for testing
# simplified RNN for testing
with
mixed_layer
(
size
=
num_words
)
as
layer
:
with
mixed_layer
(
size
=
num_words
)
as
layer
:
layer
+=
full_matrix_projection
(
input
=
predict_word
,
layer
+=
full_matrix_projection
(
input
=
predict_word
,
...
@@ -46,15 +46,15 @@ def outer_step(dummy_data):
...
@@ -46,15 +46,15 @@ def outer_step(dummy_data):
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
return
out
return
out
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
step
=
inner_step
,
step
=
inner_step
,
input
=
gen_inputs
,
input
=
gen_inputs
,
bos_id
=
0
,
bos_id
=
0
,
eos_id
=
num_words
-
1
,
eos_id
=
num_words
-
1
,
beam_size
=
2
if
beam_flag
else
1
,
beam_size
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
1
,
max_length
=
10
)
max_length
=
10
)
return
beam_gen
return
beam_gen
beam_gen_concat
=
recurrent_group
(
name
=
"rnn_gen_concat"
,
beam_gen_concat
=
recurrent_group
(
name
=
"rnn_gen_concat"
,
...
...
paddle/trainer/tests/sample_trainer_rnn_gen.conf
浏览文件 @
e732bdd4
...
@@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2),
...
@@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2),
embedding_size
=
num_words
)]
embedding_size
=
num_words
)]
def
step
(
dummy_memory
,
predict_word
):
def
step
(
dummy_memory
,
predict_word
):
# simplified RNN for testing
# simplified RNN for testing
with
mixed_layer
(
size
=
num_words
)
as
layer
:
with
mixed_layer
(
size
=
num_words
)
as
layer
:
layer
+=
full_matrix_projection
(
input
=
predict_word
,
layer
+=
full_matrix_projection
(
input
=
predict_word
,
...
@@ -44,7 +44,7 @@ def step(dummy_memory, predict_word):
...
@@ -44,7 +44,7 @@ def step(dummy_memory, predict_word):
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
param_attr
=
ParamAttr
(
name
=
"wordvec"
))
return
out
return
out
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
beam_gen
=
beam_search
(
name
=
"rnn_gen"
,
step
=
step
,
step
=
step
,
input
=
gen_inputs
,
input
=
gen_inputs
,
...
@@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen",
...
@@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen",
eos_id
=
num_words
-
1
,
eos_id
=
num_words
-
1
,
beam_size
=
2
if
beam_flag
else
1
,
beam_size
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
num_results_per_sample
=
2
if
beam_flag
else
1
,
max_length
=
10
)
max_length
=
10
)
seqtext_printer_evaluator
(
input
=
beam_gen
,
seqtext_printer_evaluator
(
input
=
beam_gen
,
id_input
=
sent_id
,
id_input
=
sent_id
,
...
...
paddle/utils/CustomStackTrace.h
浏览文件 @
e732bdd4
...
@@ -55,13 +55,17 @@ public:
...
@@ -55,13 +55,17 @@ public:
* Else, just set status to popping.
* Else, just set status to popping.
*/
*/
void
pop
(
const
T
&
item
)
{
void
pop
(
const
T
&
item
)
{
pushing
()
=
false
;
auto
&
s
=
this
->
stack
();
auto
&
s
=
this
->
stack
();
if
(
item
==
s
.
top
())
{
if
(
item
==
s
.
top
())
{
s
.
pop
();
s
.
pop
();
}
}
}
}
/**
* @brief Indicate whether we are at forward or backward stage of computation
*/
void
set_stage
(
bool
isForward
)
{
pushing
()
=
isForward
;
}
/**
/**
* @brief clear current thread stack.
* @brief clear current thread stack.
*/
*/
...
...
paddle/utils/tests/test_CustomStackTrace.cpp
浏览文件 @
e732bdd4
...
@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) {
...
@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) {
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
tracer
.
push
(
"layer_"
+
paddle
::
str
::
to_string
(
i
));
tracer
.
push
(
"layer_"
+
paddle
::
str
::
to_string
(
i
));
}
}
tracer
.
pop
(
""
);
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
layerSize
;
++
i
)
{
tracer
.
pop
(
"layer_"
+
paddle
::
str
::
to_string
(
layerSize
-
1
-
i
));
tracer
.
pop
(
"layer_"
+
paddle
::
str
::
to_string
(
layerSize
-
1
-
i
));
}
}
...
...
python/paddle/v2/layer.py
浏览文件 @
e732bdd4
...
@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network']
...
@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network']
def
__need_to_keep__
(
name
):
def
__need_to_keep__
(
name
):
return
name
in
[
return
name
in
[
'StaticInput'
,
'SubsequenceInput'
,
'GeneratedInput'
,
'LayerType'
,
'StaticInput'
,
'SubsequenceInput'
,
'GeneratedInput'
,
'LayerType'
,
'layer_support'
'layer_support'
,
'BaseGeneratedInput'
]
]
def
__need_to_wrap__
(
name
):
def
__need_to_wrap__
(
name
):
return
name
not
in
[
'AggregateLevel'
,
'ExpandLevel'
]
return
name
not
in
[
'AggregateLevel'
,
'ExpandLevel'
,
'BaseGeneratedInput'
]
def
__convert_name__
(
inname
):
def
__convert_name__
(
inname
):
...
@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names):
...
@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names):
return
submodel_names
return
submodel_names
def
__get_submodel_data_out_links__
():
data_links
=
set
()
for
submodel
in
cp
.
g_config
.
model_config
.
sub_models
:
for
link
in
submodel
.
out_links
:
if
cp
.
g_layer_map
[
link
.
link_name
].
type
==
'data'
:
data_links
.
add
(
link
.
link_name
)
return
data_links
def
__get_used_evaluators__
(
layer_names
):
def
__get_used_evaluators__
(
layer_names
):
evaluator_names
=
set
()
evaluator_names
=
set
()
for
e
in
cp
.
g_config
.
model_config
.
evaluators
:
for
e
in
cp
.
g_config
.
model_config
.
evaluators
:
...
@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None):
...
@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None):
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
=
__get_used_submodels__
(
layer_names
)
submodel_names
.
add
(
'root'
)
submodel_names
.
add
(
'root'
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
evaluator_names
=
__get_used_evaluators__
(
layer_names
)
data_out_links
=
__get_submodel_data_out_links__
()
input_layer_names
=
set
()
input_layer_names
=
set
()
output_layer_names
=
set
()
output_layer_names
=
set
()
...
@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None):
...
@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None):
continue
continue
model_config
.
layers
.
extend
([
l
])
model_config
.
layers
.
extend
([
l
])
if
l
.
type
==
'data'
:
if
l
.
type
==
'data'
:
if
l
.
name
in
model_config
.
output_layer_name
s
:
if
l
.
name
in
data_out_link
s
:
"""
"""
In text generation, the outlink to save the generated word
In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This
indices is a data_layer defined in recurrent_group. This
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录