Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
e1f57bfd
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
696
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e1f57bfd
编写于
10月 19, 2016
作者:
L
luotao1
提交者:
emailweixu
10月 19, 2016
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add base class for seqlastin/max/average layer (#187)
上级
76fb74dc
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
188 addition
and
226 deletion
+188
-226
paddle/gserver/layers/AverageLayer.cpp
paddle/gserver/layers/AverageLayer.cpp
+8
-66
paddle/gserver/layers/AverageLayer.h
paddle/gserver/layers/AverageLayer.h
+8
-11
paddle/gserver/layers/MaxLayer.cpp
paddle/gserver/layers/MaxLayer.cpp
+8
-71
paddle/gserver/layers/MaxLayer.h
paddle/gserver/layers/MaxLayer.h
+10
-9
paddle/gserver/layers/SequenceLastInstanceLayer.cpp
paddle/gserver/layers/SequenceLastInstanceLayer.cpp
+13
-69
paddle/gserver/layers/SequencePoolLayer.cpp
paddle/gserver/layers/SequencePoolLayer.cpp
+84
-0
paddle/gserver/layers/SequencePoolLayer.h
paddle/gserver/layers/SequencePoolLayer.h
+57
-0
未找到文件。
paddle/gserver/layers/AverageLayer.cpp
浏览文件 @
e1f57bfd
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "AverageLayer.h"
#include "AverageLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
...
@@ -25,13 +24,8 @@ REGISTER_LAYER(average, AverageLayer);
...
@@ -25,13 +24,8 @@ REGISTER_LAYER(average, AverageLayer);
bool
AverageLayer
::
init
(
const
LayerMap
&
layerMap
,
bool
AverageLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
const
ParameterMap
&
parameterMap
)
{
/* Initialize the basic parent class */
SequencePoolLayer
::
init
(
layerMap
,
parameterMap
);
Layer
::
init
(
layerMap
,
parameterMap
);
/* initialize biases_ */
if
(
biasParameter_
.
get
()
!=
NULL
)
{
biases_
=
std
::
unique_ptr
<
Weight
>
(
new
Weight
(
1
,
getSize
(),
biasParameter_
));
}
dataMtx_
=
Matrix
::
create
(
nullptr
,
1
,
1
,
false
,
useGpu_
);
dataMtx_
=
Matrix
::
create
(
nullptr
,
1
,
1
,
false
,
useGpu_
);
outMtx_
=
Matrix
::
create
(
nullptr
,
1
,
getSize
(),
false
,
useGpu_
);
outMtx_
=
Matrix
::
create
(
nullptr
,
1
,
getSize
(),
false
,
useGpu_
);
// average strategy
// average strategy
...
@@ -44,57 +38,15 @@ bool AverageLayer::init(const LayerMap& layerMap,
...
@@ -44,57 +38,15 @@ bool AverageLayer::init(const LayerMap& layerMap,
}
else
{
}
else
{
LOG
(
FATAL
)
<<
"Unknown average strategy: "
<<
config_
.
average_strategy
();
LOG
(
FATAL
)
<<
"Unknown average strategy: "
<<
config_
.
average_strategy
();
}
}
// transform to which sequence type
if
(
config_
.
trans_type
()
==
"non-seq"
)
{
type_
=
kNonSeq
;
}
else
if
(
config_
.
trans_type
()
==
"seq"
)
{
type_
=
kSeq
;
}
else
{
LOG
(
FATAL
)
<<
"Unknown trans_type: "
<<
config_
.
trans_type
();
}
setNeedSequenceInfo
(
false
);
return
true
;
return
true
;
}
}
void
AverageLayer
::
forward
(
PassType
passType
)
{
void
AverageLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
SequencePoolLayer
::
forward
(
passType
);
// average layer should have exactly 1 input
CHECK_EQ
(
1U
,
inputLayers_
.
size
());
size_t
dim
=
getSize
();
const
Argument
&
input
=
getInput
(
0
);
CHECK
(
input
.
sequenceStartPositions
);
if
(
type_
)
{
CHECK
(
input
.
subSequenceStartPositions
)
<<
"when trans_type = seq, input must hasSubseq"
;
}
int64_t
newBatchSize
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
ICpuGpuVectorPtr
startPositions
=
type_
?
input
.
subSequenceStartPositions
:
input
.
sequenceStartPositions
;
const
int
*
starts
=
startPositions
->
getData
(
false
);
size_t
numSequences
=
startPositions
->
getSize
()
-
1
;
// check
CHECK_EQ
(
numSequences
,
(
size_t
)
newBatchSize
);
CHECK_EQ
(
starts
[
numSequences
],
input
.
getBatchSize
());
CHECK_EQ
(
dim
,
input
.
value
->
getWidth
());
resetOutput
(
newBatchSize
,
dim
);
auto
startsPos
=
startPositions
->
getVector
(
useGpu_
);
MatrixPtr
inputValue
=
getInputValue
(
0
);
MatrixPtr
inputValue
=
getInputValue
(
0
);
getOutputValue
()
->
sequenceAvgForward
(
*
inputValue
,
*
startsPos
,
mode_
);
getOutputValue
()
->
sequenceAvgForward
(
*
inputValue
,
*
startPositions_
->
getVector
(
useGpu_
),
mode_
);
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if
(
type_
)
{
output_
.
degradeSequence
(
input
,
useGpu_
);
}
/* add the bias-vector AFTER average operation */
/* add the bias-vector AFTER average operation */
if
(
biases_
.
get
()
!=
NULL
)
{
if
(
biases_
.
get
()
!=
NULL
)
{
...
@@ -106,26 +58,16 @@ void AverageLayer::forward(PassType passType) {
...
@@ -106,26 +58,16 @@ void AverageLayer::forward(PassType passType) {
}
}
void
AverageLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
void
AverageLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
const
Argument
&
input
=
getInput
(
0
);
SequencePoolLayer
::
backward
(
callback
);
ICpuGpuVectorPtr
startPositions
=
type_
?
input
.
subSequenceStartPositions
:
input
.
sequenceStartPositions
;
const
int
*
starts
=
startPositions
->
getData
(
false
);
/* Do derivation */
{
backwardActivation
();
}
if
(
biases_
&&
biases_
->
getWGrad
())
{
biases_
->
getWGrad
()
->
collectBias
(
*
getOutputGrad
(),
1
);
// Increasing the number of gradient
biases_
->
getParameterPtr
()
->
incUpdate
(
callback
);
}
const
int
*
starts
=
startPositions_
->
getData
(
false
);
MatrixPtr
grad
=
getInputGrad
(
0
);
MatrixPtr
grad
=
getInputGrad
(
0
);
if
(
grad
)
{
if
(
grad
)
{
size_t
dim
=
getSize
();
size_t
dim
=
getSize
();
real
*
gradientData
=
getInputGrad
(
0
)
->
getData
();
real
*
gradientData
=
getInputGrad
(
0
)
->
getData
();
real
*
gradient
=
getOutputGrad
()
->
getData
();
real
*
gradient
=
getOutputGrad
()
->
getData
();
size_t
numSequences
=
startPositions
->
getSize
()
-
1
;
size_t
numSequences
=
startPositions
_
->
getSize
()
-
1
;
for
(
size_t
sequenceId
=
0
;
sequenceId
<
numSequences
;
++
sequenceId
)
{
for
(
size_t
sequenceId
=
0
;
sequenceId
<
numSequences
;
++
sequenceId
)
{
// TODO(Dangqingqing) optimization for GPU
// TODO(Dangqingqing) optimization for GPU
int
sequenceLength
=
starts
[
sequenceId
+
1
]
-
starts
[
sequenceId
];
int
sequenceLength
=
starts
[
sequenceId
+
1
]
-
starts
[
sequenceId
];
...
...
paddle/gserver/layers/AverageLayer.h
浏览文件 @
e1f57bfd
...
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include "Layer.h"
#include "
SequencePool
Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Matrix.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -23,20 +22,21 @@ namespace paddle {
...
@@ -23,20 +22,21 @@ namespace paddle {
/**
/**
* A layer for "internal average" for sequence input.
* A layer for "internal average" for sequence input.
* Input: one or more sequences. Each sequence contains some instances.
* Input: one or more sequences. Each sequence contains some instances.
* If
Averag
eLevel = kNonSeq:
* If
Sequenc
eLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = average_{for each instance in this sequence}{input[i]}
* output[i] = average_{for each instance in this sequence}{input[i]}
* If
Averag
eLevel = kSeq:
* If
Sequenc
eLevel = kSeq:
* Check input sequence must has sub-sequence
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* Output: output size is the number of input sub-sequences
* output[i] = average_{for each instance in this sub-sequence}{input[i]}
* output[i] = average_{for each instance in this sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
*/
class
AverageLayer
:
public
SequencePoolLayer
{
class
AverageLayer
:
public
Layer
{
public:
public:
enum
AverageStrategy
{
kAverage
=
0
,
kSum
=
1
,
kAverageSquareRootN
=
2
};
enum
AverageStrategy
{
kAverage
=
0
,
kSum
=
1
,
kAverageSquareRootN
=
2
};
e
num
AverageLevel
{
kNonSeq
=
0
,
kSeq
=
1
};
e
xplicit
AverageLayer
(
const
LayerConfig
&
config
)
explicit
AverageLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
:
SequencePool
Layer
(
config
)
{}
~
AverageLayer
()
{}
~
AverageLayer
()
{}
...
@@ -46,11 +46,8 @@ public:
...
@@ -46,11 +46,8 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
protected:
protected:
std
::
unique_ptr
<
Weight
>
biases_
;
MatrixPtr
outMtx_
;
MatrixPtr
outMtx_
;
MatrixPtr
dataMtx_
;
MatrixPtr
dataMtx_
;
int
mode_
;
int
mode_
;
int
type_
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/gserver/layers/MaxLayer.cpp
浏览文件 @
e1f57bfd
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "MaxLayer.h"
#include "MaxLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Stat.h"
...
@@ -21,55 +20,11 @@ namespace paddle {
...
@@ -21,55 +20,11 @@ namespace paddle {
REGISTER_LAYER
(
max
,
MaxLayer
);
REGISTER_LAYER
(
max
,
MaxLayer
);
bool
MaxLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
/* Initialize the basic parent class */
Layer
::
init
(
layerMap
,
parameterMap
);
/* initialize biases_ */
if
(
biasParameter_
.
get
()
!=
NULL
)
{
biases_
=
std
::
unique_ptr
<
Weight
>
(
new
Weight
(
1
,
getSize
(),
biasParameter_
));
}
// transform to which sequence type
if
(
config_
.
trans_type
()
==
"non-seq"
)
{
type_
=
kNonSeq
;
}
else
if
(
config_
.
trans_type
()
==
"seq"
)
{
type_
=
kSeq
;
}
else
{
LOG
(
FATAL
)
<<
"Unknown trans_type: "
<<
config_
.
trans_type
();
}
setNeedSequenceInfo
(
false
);
return
true
;
}
void
MaxLayer
::
forward
(
PassType
passType
)
{
void
MaxLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
SequencePoolLayer
::
forward
(
passType
);
// max layer should have exactly 1 input
CHECK_EQ
(
1U
,
inputLayers_
.
size
());
size_t
dim
=
getSize
();
const
Argument
&
input
=
getInput
(
0
);
int64_t
newBatchSize
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
ICpuGpuVectorPtr
startPositions
=
type_
?
input
.
subSequenceStartPositions
:
input
.
sequenceStartPositions
;
auto
starts
=
startPositions
->
getVector
(
useGpu_
);
size_t
numSequences
=
startPositions
->
getSize
()
-
1
;
CHECK_EQ
(
dim
,
input
.
value
->
getWidth
());
IVector
::
resizeOrCreate
(
maxIndex_
,
newBatchSize_
*
getSize
(),
CHECK_EQ
(
numSequences
,
(
size_t
)
newBatchSize
);
useGpu
(
deviceId_
));
CHECK_EQ
(
startPositions
->
getData
(
false
)[
numSequences
],
input
.
getBatchSize
());
if
(
type_
)
{
// when trans_type = seq, input must hasSubseq
CHECK_EQ
(
input
.
hasSubseq
(),
1UL
);
}
// reset output: resize to "num of sequences", not "batch size".
resetOutput
(
newBatchSize
,
dim
);
IVector
::
resizeOrCreate
(
maxIndex_
,
newBatchSize
*
dim
,
useGpu
(
deviceId_
));
maxIndex_
->
zeroMem
();
maxIndex_
->
zeroMem
();
MatrixPtr
inputValue
=
getInputValue
(
0
);
MatrixPtr
inputValue
=
getInputValue
(
0
);
...
@@ -77,16 +32,8 @@ void MaxLayer::forward(PassType passType) {
...
@@ -77,16 +32,8 @@ void MaxLayer::forward(PassType passType) {
{
{
REGISTER_TIMER_INFO
(
"MaxLayerForward"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"MaxLayerForward"
,
getName
().
c_str
());
outputValue
->
maxSequenceForward
(
*
inputValue
,
*
starts
,
*
maxIndex_
);
outputValue
->
maxSequenceForward
(
}
*
inputValue
,
*
startPositions_
->
getVector
(
useGpu_
),
*
maxIndex_
);
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no cpuSequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new cpuSequenceStartPositions.
*/
if
(
type_
)
{
output_
.
degradeSequence
(
input
,
useGpu_
);
}
}
if
(
config_
.
output_max_index
())
{
if
(
config_
.
output_max_index
())
{
...
@@ -104,24 +51,14 @@ void MaxLayer::forward(PassType passType) {
...
@@ -104,24 +51,14 @@ void MaxLayer::forward(PassType passType) {
void
MaxLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
void
MaxLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
CHECK
(
!
config_
.
output_max_index
())
CHECK
(
!
config_
.
output_max_index
())
<<
"backward is not available when output_max_index is set"
;
<<
"backward is not available when output_max_index is set"
;
/* Do derivation */
{
backwardActivation
();
}
SequencePoolLayer
::
backward
(
callback
);
if
(
biases_
&&
biases_
->
getWGrad
())
{
biases_
->
getWGrad
()
->
collectBias
(
*
getOutputGrad
(),
1
);
// Increasing the number of gradient
biases_
->
getParameterPtr
()
->
incUpdate
(
callback
);
}
MatrixPtr
inputGrad
=
getInputGrad
(
0
);
MatrixPtr
inputGrad
=
getInputGrad
(
0
);
MatrixPtr
outputGrad
=
getOutputGrad
();
MatrixPtr
outputGrad
=
getOutputGrad
();
if
(
inputGrad
)
{
if
(
inputGrad
)
{
ICpuGpuVectorPtr
starts
=
type_
?
getInput
(
0
).
subSequenceStartPositions
:
getInput
(
0
).
sequenceStartPositions
;
REGISTER_TIMER_INFO
(
"MaxLayerBackward"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"MaxLayerBackward"
,
getName
().
c_str
());
inputGrad
->
maxSequenceBackward
(
*
outputGrad
,
inputGrad
->
maxSequenceBackward
(
*
(
starts
->
getVector
(
useGpu_
)),
*
maxIndex_
);
*
outputGrad
,
*
(
startPositions_
->
getVector
(
useGpu_
)),
*
maxIndex_
);
}
}
}
}
...
...
paddle/gserver/layers/MaxLayer.h
浏览文件 @
e1f57bfd
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include "Layer.h"
#include "
SequencePool
Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/ThreadLocal.h"
#include "paddle/utils/ThreadLocal.h"
...
@@ -24,29 +24,30 @@ namespace paddle {
...
@@ -24,29 +24,30 @@ namespace paddle {
/**
/**
* A layer for "internal max" for sequence input.
* A layer for "internal max" for sequence input.
* Input: one or more sequences. Each sequence contains some instances.
* Input: one or more sequences. Each sequence contains some instances.
* If
Max
Level = kNonSeq:
* If
Sequence
Level = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = max_{for each instance in this sequence}{input[i]}
* output[i] = max_{for each instance in this sequence}{input[i]}
* If
Max
Level = kSeq:
* If
Sequence
Level = kSeq:
* Check input sequence must has sub-sequence
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* Output: output size is the number of input sub-sequences
* output[i] = max_{for each instance in this sub-sequence}{input[i]}
* output[i] = max_{for each instance in this sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
*/
class
MaxLayer
:
public
Layer
{
class
MaxLayer
:
public
SequencePool
Layer
{
protected:
protected:
std
::
unique_ptr
<
Weight
>
biases_
;
// maxIndex_[i][j] = k : the value at (i, j) is from input[k].
// maxIndex_[i][j] = k : the value at (i, j) is from input[k].
IVectorPtr
maxIndex_
;
IVectorPtr
maxIndex_
;
int
type_
;
public:
public:
explicit
MaxLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
explicit
MaxLayer
(
const
LayerConfig
&
config
)
:
SequencePoolLayer
(
config
)
{}
enum
MaxLevel
{
kNonSeq
=
0
,
kSeq
=
1
};
~
MaxLayer
()
{}
~
MaxLayer
()
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
return
SequencePoolLayer
::
init
(
layerMap
,
parameterMap
);
}
void
forward
(
PassType
passType
);
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
...
...
paddle/gserver/layers/SequenceLastInstanceLayer.cpp
浏览文件 @
e1f57bfd
...
@@ -15,7 +15,7 @@ limitations under the License. */
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Logging.h"
#include "Layer.h"
#include "
SequencePool
Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Stat.h"
...
@@ -30,19 +30,18 @@ namespace paddle {
...
@@ -30,19 +30,18 @@ namespace paddle {
* Check input sequence must has sub-sequence
* Check input sequence must has sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence
* Output: a sequence containing only the last instance of each sub-sequence
* of the input sequence
* of the input sequence
*
* The config file api is last_seq and first_seq.
*/
*/
class
SequenceLastInstanceLayer
:
public
Layer
{
class
SequenceLastInstanceLayer
:
public
SequencePool
Layer
{
protected:
protected:
std
::
unique_ptr
<
Weight
>
biases_
;
MatrixPtr
tmpSrc_
;
MatrixPtr
tmpSrc_
;
MatrixPtr
tmpDest_
;
MatrixPtr
tmpDest_
;
enum
SequenceLevel
{
kNonSeq
=
0
,
kSeq
=
1
};
int
type_
;
public:
public:
explicit
SequenceLastInstanceLayer
(
const
LayerConfig
&
config
)
explicit
SequenceLastInstanceLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
:
SequencePool
Layer
(
config
)
{}
~
SequenceLastInstanceLayer
()
{}
~
SequenceLastInstanceLayer
()
{}
...
@@ -56,56 +55,20 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
...
@@ -56,56 +55,20 @@ REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer);
bool
SequenceLastInstanceLayer
::
init
(
const
LayerMap
&
layerMap
,
bool
SequenceLastInstanceLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
const
ParameterMap
&
parameterMap
)
{
/* Initialize the basic parent class */
SequencePoolLayer
::
init
(
layerMap
,
parameterMap
);
Layer
::
init
(
layerMap
,
parameterMap
);
// seqlastins layer should have exactly 1 input
CHECK_EQ
(
1U
,
inputLayers_
.
size
());
/* initialize biases_ */
if
(
biasParameter_
.
get
()
!=
NULL
)
{
biases_
=
std
::
unique_ptr
<
Weight
>
(
new
Weight
(
1
,
getSize
(),
biasParameter_
));
}
tmpSrc_
=
tmpSrc_
=
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
/* trans= */
false
,
useGpu_
);
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
/* trans= */
false
,
useGpu_
);
tmpDest_
=
tmpDest_
=
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
/* trans= */
false
,
useGpu_
);
Matrix
::
create
(
nullptr
,
/* height= */
1
,
1
,
/* trans= */
false
,
useGpu_
);
// transform to which sequence type
if
(
config_
.
trans_type
()
==
"non-seq"
)
{
type_
=
kNonSeq
;
}
else
if
(
config_
.
trans_type
()
==
"seq"
)
{
type_
=
kSeq
;
}
else
{
LOG
(
FATAL
)
<<
"Unknown trans_type: "
<<
config_
.
trans_type
();
}
setNeedSequenceInfo
(
false
);
return
true
;
return
true
;
}
}
void
SequenceLastInstanceLayer
::
forward
(
PassType
passType
)
{
void
SequenceLastInstanceLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
SequencePoolLayer
::
forward
(
passType
);
size_t
dim
=
getSize
();
const
Argument
&
input
=
getInput
(
0
);
// check
const
int
*
starts
=
startPositions_
->
getData
(
false
);
CHECK
(
input
.
sequenceStartPositions
);
if
(
type_
)
{
CHECK
(
input
.
subSequenceStartPositions
)
<<
"when trans_type = seq, input must hasSubseq"
;
}
auto
startPositions
=
type_
?
input
.
subSequenceStartPositions
->
getVector
(
false
)
:
input
.
sequenceStartPositions
->
getVector
(
false
);
size_t
height
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
CHECK_EQ
(
dim
,
input
.
value
->
getWidth
());
CHECK_EQ
(
startPositions
->
getData
()[
height
],
input
.
getBatchSize
());
CHECK_EQ
(
height
,
startPositions
->
getSize
()
-
1
);
reserveOutput
(
height
,
dim
);
const
int
*
starts
=
startPositions
->
getData
();
MatrixPtr
inputValue
=
getInputValue
(
0
);
MatrixPtr
inputValue
=
getInputValue
(
0
);
MatrixPtr
outputValue
=
getOutputValue
();
MatrixPtr
outputValue
=
getOutputValue
();
...
@@ -113,21 +76,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
...
@@ -113,21 +76,13 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
REGISTER_TIMER_INFO
(
"SequenceLastInstanceLayerForward"
,
getName
().
c_str
());
REGISTER_TIMER_INFO
(
"SequenceLastInstanceLayerForward"
,
getName
().
c_str
());
for
(
size_t
seqId
=
0
;
seqId
<
height
;
++
seqId
)
{
for
(
size_t
seqId
=
0
;
seqId
<
newBatchSize_
;
++
seqId
)
{
int
insId
=
int
insId
=
config_
.
select_first
()
?
starts
[
seqId
]
:
starts
[
seqId
+
1
]
-
1
;
config_
.
select_first
()
?
starts
[
seqId
]
:
starts
[
seqId
+
1
]
-
1
;
outputValue
->
subMatrix
(
seqId
,
1
,
tmpDest_
)
outputValue
->
subMatrix
(
seqId
,
1
,
tmpDest_
)
->
assign
(
*
(
inputValue
->
subMatrix
(
insId
,
1
,
tmpSrc_
)));
->
assign
(
*
(
inputValue
->
subMatrix
(
insId
,
1
,
tmpSrc_
)));
}
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if
(
type_
)
{
output_
.
degradeSequence
(
input
,
useGpu_
);
}
}
}
if
(
biases_
.
get
()
!=
NULL
)
{
if
(
biases_
.
get
()
!=
NULL
)
{
...
@@ -139,23 +94,12 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
...
@@ -139,23 +94,12 @@ void SequenceLastInstanceLayer::forward(PassType passType) {
}
}
void
SequenceLastInstanceLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
void
SequenceLastInstanceLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
/* activation, should set to 'linear' in most cases */
SequencePoolLayer
::
backward
(
callback
);
backwardActivation
();
if
(
biases_
&&
biases_
->
getWGrad
())
{
biases_
->
getWGrad
()
->
collectBias
(
*
getOutputGrad
(),
1
);
// Increasing the number of gradient
biases_
->
getParameterPtr
()
->
incUpdate
(
callback
);
}
MatrixPtr
inputGrad
=
getInputGrad
(
0
);
MatrixPtr
inputGrad
=
getInputGrad
(
0
);
MatrixPtr
outputGrad
=
getOutputGrad
();
MatrixPtr
outputGrad
=
getOutputGrad
();
auto
startPositions
=
const
int
*
starts
=
startPositions_
->
getData
(
false
);
type_
?
getInput
(
0
).
subSequenceStartPositions
->
getVector
(
false
)
size_t
numSequences
=
startPositions_
->
getSize
()
-
1
;
:
getInput
(
0
).
sequenceStartPositions
->
getVector
(
false
);
const
int
*
starts
=
startPositions
->
getData
();
size_t
numSequences
=
startPositions
->
getSize
()
-
1
;
if
(
inputGrad
)
{
if
(
inputGrad
)
{
AsyncGpuBlock
asyncGpuBlock
;
AsyncGpuBlock
asyncGpuBlock
;
...
...
paddle/gserver/layers/SequencePoolLayer.cpp
0 → 100644
浏览文件 @
e1f57bfd
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "SequencePoolLayer.h"
namespace
paddle
{
bool
SequencePoolLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
/* Initialize the basic parent class */
Layer
::
init
(
layerMap
,
parameterMap
);
// seqlastins/max/average layer should have exactly 1 input
CHECK_EQ
(
1U
,
inputLayers_
.
size
());
/* initialize biases_ */
if
(
biasParameter_
.
get
()
!=
NULL
)
{
biases_
=
std
::
unique_ptr
<
Weight
>
(
new
Weight
(
1
,
getSize
(),
biasParameter_
));
}
// transform to which sequence type
if
(
config_
.
trans_type
()
==
"non-seq"
)
{
type_
=
kNonSeq
;
}
else
if
(
config_
.
trans_type
()
==
"seq"
)
{
type_
=
kSeq
;
}
else
{
LOG
(
FATAL
)
<<
"Unknown trans_type: "
<<
config_
.
trans_type
();
}
setNeedSequenceInfo
(
false
);
return
true
;
}
void
SequencePoolLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
const
Argument
&
input
=
getInput
(
0
);
newBatchSize_
=
type_
?
input
.
getNumSubSequences
()
:
input
.
getNumSequences
();
size_t
dim
=
getSize
();
// check
CHECK_EQ
(
dim
,
input
.
value
->
getWidth
());
startPositions_
=
type_
?
input
.
subSequenceStartPositions
:
input
.
sequenceStartPositions
;
auto
starts
=
startPositions_
->
getVector
(
false
);
CHECK_EQ
(
starts
->
getData
()[
newBatchSize_
],
input
.
getBatchSize
());
CHECK_EQ
(
newBatchSize_
,
starts
->
getSize
()
-
1
);
resetOutput
(
newBatchSize_
,
dim
);
if
(
type_
)
{
CHECK
(
input
.
subSequenceStartPositions
)
<<
"when trans_type = seq, input must hasSubseq"
;
}
/* If type_ = kNonSeq, both seq has or not has sub-seq degrade to a non-seq,
* thus, in this case, output_ has no sequenceStartPositions.
* If type_ = kSeq, seq has sub-seq degrades to a seq, thus, only in this
* case, we should compute the new sequenceStartPositions.
*/
if
(
type_
)
{
output_
.
degradeSequence
(
input
,
useGpu_
);
}
}
void
SequencePoolLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
/* Do derivation */
{
backwardActivation
();
}
if
(
biases_
&&
biases_
->
getWGrad
())
{
biases_
->
getWGrad
()
->
collectBias
(
*
getOutputGrad
(),
1
);
// Increasing the number of gradient
biases_
->
getParameterPtr
()
->
incUpdate
(
callback
);
}
}
}
// namespace paddle
paddle/gserver/layers/SequencePoolLayer.h
0 → 100644
浏览文件 @
e1f57bfd
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
#include "paddle/math/Matrix.h"
namespace
paddle
{
/**
* A base layer for SequenceLastInstanceLayer/AverageLayer/MaxLayer.
*
* Input: one or more sequences. Each sequence contains some instances.
* If SequenceLevel = kNonSeq:
* Output: output size is the number of input sequences (NOT input instances)
* output[i] = seqlastin/average/max_{for each instance in this
* sequence}{input[i]}
* If SequenceLevel = kSeq:
* Check input sequence must has sub-sequence
* Output: output size is the number of input sub-sequences
* output[i] = seqlastin/average/max_{for each instance in this
* sub-sequence}{input[i]}
*
* The config file api is pooling_layer.
*/
class
SequencePoolLayer
:
public
Layer
{
protected:
int
type_
;
std
::
unique_ptr
<
Weight
>
biases_
;
enum
SequenceLevel
{
kNonSeq
=
0
,
kSeq
=
1
};
size_t
newBatchSize_
;
ICpuGpuVectorPtr
startPositions_
;
public:
explicit
SequencePoolLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
virtual
~
SequencePoolLayer
()
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
};
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录