Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
65969dad
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
65969dad
编写于
6月 16, 2017
作者:
Y
yangyaming
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add DetectionOutputLayer and MultiBoxLossLayer.
上级
14c0e71d
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
1239 addition
and
1 deletion
+1239
-1
paddle/gserver/layers/DetectionOutputLayer.cpp
paddle/gserver/layers/DetectionOutputLayer.cpp
+154
-0
paddle/gserver/layers/DetectionOutputLayer.h
paddle/gserver/layers/DetectionOutputLayer.h
+81
-0
paddle/gserver/layers/MultiBoxLossLayer.cpp
paddle/gserver/layers/MultiBoxLossLayer.cpp
+365
-0
paddle/gserver/layers/MultiBoxLossLayer.h
paddle/gserver/layers/MultiBoxLossLayer.h
+103
-0
paddle/gserver/tests/CMakeLists.txt
paddle/gserver/tests/CMakeLists.txt
+7
-0
paddle/gserver/tests/LayerGradUtil.cpp
paddle/gserver/tests/LayerGradUtil.cpp
+25
-0
paddle/gserver/tests/LayerGradUtil.h
paddle/gserver/tests/LayerGradUtil.h
+17
-1
paddle/gserver/tests/test_DetectionOutput.cpp
paddle/gserver/tests/test_DetectionOutput.cpp
+191
-0
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+64
-0
proto/ModelConfig.proto
proto/ModelConfig.proto
+25
-0
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+46
-0
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+161
-0
未找到文件。
paddle/gserver/layers/DetectionOutputLayer.cpp
0 → 100644
浏览文件 @
65969dad
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DetectionOutputLayer.h"
namespace
paddle
{
REGISTER_LAYER
(
detection_output
,
DetectionOutputLayer
);
bool
DetectionOutputLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
auto
&
layerConf
=
config_
.
inputs
(
0
).
detection_output_conf
();
numClasses_
=
layerConf
.
num_classes
();
inputNum_
=
layerConf
.
input_num
();
nmsThreshold_
=
layerConf
.
nms_threshold
();
confidenceThreshold_
=
layerConf
.
confidence_threshold
();
nmsTopK_
=
layerConf
.
nms_top_k
();
keepTopK_
=
layerConf
.
keep_top_k
();
backgroundId_
=
layerConf
.
background_id
();
return
true
;
}
void
DetectionOutputLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
size_t
batchSize
=
getInputValue
(
*
getLocInputLayer
(
0
))
->
getHeight
();
locSizeSum_
=
0
;
confSizeSum_
=
0
;
for
(
size_t
n
=
0
;
n
<
inputNum_
;
++
n
)
{
const
MatrixPtr
inLoc
=
getInputValue
(
*
getLocInputLayer
(
n
));
const
MatrixPtr
inConf
=
getInputValue
(
*
getConfInputLayer
(
n
));
locSizeSum_
+=
inLoc
->
getElementCnt
();
confSizeSum_
+=
inConf
->
getElementCnt
();
}
Matrix
::
resizeOrCreate
(
locTmpBuffer_
,
1
,
locSizeSum_
,
false
,
useGpu_
);
Matrix
::
resizeOrCreate
(
confTmpBuffer_
,
confSizeSum_
/
numClasses_
,
numClasses_
,
false
,
useGpu_
);
locBuffer_
=
locTmpBuffer_
;
confBuffer_
=
confTmpBuffer_
;
size_t
locOffset
=
0
;
size_t
confOffset
=
0
;
auto
&
layerConf
=
config_
.
inputs
(
0
).
detection_output_conf
();
for
(
size_t
n
=
0
;
n
<
inputNum_
;
++
n
)
{
const
MatrixPtr
inLoc
=
getInputValue
(
*
getLocInputLayer
(
n
));
const
MatrixPtr
inConf
=
getInputValue
(
*
getConfInputLayer
(
n
));
size_t
height
=
getInput
(
*
getLocInputLayer
(
n
)).
getFrameHeight
();
if
(
!
height
)
height
=
layerConf
.
height
();
size_t
width
=
getInput
(
*
getLocInputLayer
(
n
)).
getFrameWidth
();
if
(
!
width
)
width
=
layerConf
.
width
();
locOffset
+=
appendWithPermute
(
*
inLoc
,
height
,
width
,
locSizeSum_
,
locOffset
,
batchSize
,
*
locBuffer_
,
kNCHWToNHWC
);
confOffset
+=
appendWithPermute
(
*
inConf
,
height
,
width
,
confSizeSum_
,
confOffset
,
batchSize
,
*
confBuffer_
,
kNCHWToNHWC
);
}
CHECK_EQ
(
locOffset
,
locSizeSum_
/
batchSize
);
CHECK_EQ
(
confOffset
,
confSizeSum_
/
batchSize
);
MatrixPtr
priorValue
;
if
(
useGpu_
)
{
Matrix
::
resizeOrCreate
(
locCpuBuffer_
,
1
,
locSizeSum_
,
false
,
false
);
Matrix
::
resizeOrCreate
(
confCpuBuffer_
,
confSizeSum_
/
numClasses_
,
numClasses_
,
false
,
false
);
MatrixPtr
priorTmpValue
=
getInputValue
(
*
getPriorBoxLayer
());
Matrix
::
resizeOrCreate
(
priorCpuValue_
,
1
,
priorTmpValue
->
getElementCnt
(),
false
,
false
);
locCpuBuffer_
->
copyFrom
(
*
locTmpBuffer_
);
confCpuBuffer_
->
copyFrom
(
*
confTmpBuffer_
);
priorCpuValue_
->
copyFrom
(
*
priorTmpValue
);
locBuffer_
=
locCpuBuffer_
;
confBuffer_
=
confCpuBuffer_
;
priorValue
=
priorCpuValue_
;
}
else
{
priorValue
=
getInputValue
(
*
getPriorBoxLayer
());
}
confBuffer_
->
softmax
(
*
confBuffer_
);
size_t
numPriors
=
priorValue
->
getElementCnt
()
/
8
;
vector
<
vector
<
NormalizedBBox
>>
allDecodedBBoxes
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
vector
<
NormalizedBBox
>
decodedBBoxes
;
for
(
size_t
i
=
0
;
i
<
numPriors
;
++
i
)
{
size_t
priorOffset
=
i
*
8
;
size_t
locPredOffset
=
n
*
numPriors
*
4
+
i
*
4
;
vector
<
NormalizedBBox
>
priorBBoxVec
;
getBBoxFromPriorData
(
priorValue
->
getData
()
+
priorOffset
,
1
,
priorBBoxVec
);
vector
<
vector
<
real
>>
priorBBoxVar
;
getBBoxVarFromPriorData
(
priorValue
->
getData
()
+
priorOffset
,
1
,
priorBBoxVar
);
vector
<
real
>
locPredData
;
for
(
size_t
j
=
0
;
j
<
4
;
++
j
)
locPredData
.
push_back
(
*
(
locBuffer_
->
getData
()
+
locPredOffset
+
j
));
NormalizedBBox
bbox
=
decodeBBoxWithVar
(
priorBBoxVec
[
0
],
priorBBoxVar
[
0
],
locPredData
);
decodedBBoxes
.
push_back
(
bbox
);
}
allDecodedBBoxes
.
push_back
(
decodedBBoxes
);
}
vector
<
map
<
size_t
,
vector
<
size_t
>>>
allIndices
;
size_t
numKept
=
getDetectionIndices
(
confBuffer_
->
getData
(),
numPriors
,
numClasses_
,
backgroundId_
,
batchSize
,
confidenceThreshold_
,
nmsTopK_
,
nmsThreshold_
,
keepTopK_
,
allDecodedBBoxes
,
&
allIndices
);
resetOutput
(
numKept
,
7
);
MatrixPtr
outV
=
getOutputValue
();
getDetectionOutput
(
confBuffer_
->
getData
(),
numKept
,
numPriors
,
numClasses_
,
batchSize
,
allIndices
,
allDecodedBBoxes
,
*
outV
);
}
}
// namespace paddle
paddle/gserver/layers/DetectionOutputLayer.h
0 → 100644
浏览文件 @
65969dad
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <vector>
#include "DetectionUtil.h"
#include "Layer.h"
using
std
::
vector
;
using
std
::
map
;
using
std
::
pair
;
namespace
paddle
{
/**
* The detection output layer for a SSD detection task. This layer apply the
* Non-maximum suppression to the all predicted bounding box and keep the
* Top-K bounding boxes.
* - Input: This layer need three input layers: This first input layer
* is the priorbox layer. The rest two input layers are convolution
* layers for generating bbox location offset and the classification
* confidence.
* - Output: The predict bounding box location.
*/
class
DetectionOutputLayer
:
public
Layer
{
public:
explicit
DetectionOutputLayer
(
const
LayerConfig
&
config
)
:
Layer
(
config
)
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
{}
protected:
inline
LayerPtr
getPriorBoxLayer
()
{
return
inputLayers_
[
0
];
}
inline
LayerPtr
getLocInputLayer
(
size_t
index
)
{
return
inputLayers_
[
1
+
index
];
}
inline
LayerPtr
getConfInputLayer
(
size_t
index
)
{
return
inputLayers_
[
1
+
inputNum_
+
index
];
}
private:
size_t
numClasses_
;
// number of classes
size_t
inputNum_
;
// number of input layers
real
nmsThreshold_
;
real
confidenceThreshold_
;
size_t
nmsTopK_
;
size_t
keepTopK_
;
size_t
backgroundId_
;
size_t
locSizeSum_
;
size_t
confSizeSum_
;
MatrixPtr
locBuffer_
;
MatrixPtr
confBuffer_
;
MatrixPtr
locTmpBuffer_
;
MatrixPtr
confTmpBuffer_
;
MatrixPtr
priorCpuValue_
;
MatrixPtr
locCpuBuffer_
;
MatrixPtr
confCpuBuffer_
;
};
}
// namespace paddle
paddle/gserver/layers/MultiBoxLossLayer.cpp
0 → 100644
浏览文件 @
65969dad
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MultiBoxLossLayer.h"
#include <float.h>
#include <vector>
#include "DataLayer.h"
using
std
::
vector
;
using
std
::
map
;
using
std
::
pair
;
namespace
paddle
{
REGISTER_LAYER
(
multibox_loss
,
MultiBoxLossLayer
);
bool
MultiBoxLossLayer
::
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
)
{
Layer
::
init
(
layerMap
,
parameterMap
);
auto
layerConf
=
config_
.
inputs
(
0
).
multibox_loss_conf
();
numClasses_
=
layerConf
.
num_classes
();
inputNum_
=
layerConf
.
input_num
();
overlapThreshold_
=
layerConf
.
overlap_threshold
();
negPosRatio_
=
layerConf
.
neg_pos_ratio
();
negOverlap_
=
layerConf
.
neg_overlap
();
backgroundId_
=
layerConf
.
background_id
();
return
true
;
}
void
MultiBoxLossLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
size_t
batchSize
=
getInputValue
(
*
getLocInputLayer
(
0
))
->
getHeight
();
resetOutput
(
batchSize
,
1
);
// all location data and confidence score data
locSizeSum_
=
0
;
confSizeSum_
=
0
;
for
(
size_t
n
=
0
;
n
<
inputNum_
;
++
n
)
{
const
MatrixPtr
inLoc
=
getInputValue
(
*
getLocInputLayer
(
n
));
const
MatrixPtr
inConf
=
getInputValue
(
*
getConfInputLayer
(
n
));
locSizeSum_
+=
inLoc
->
getElementCnt
();
confSizeSum_
+=
inConf
->
getElementCnt
();
}
// locBuffer layout:
// | xmin1 | ymin1 | xmax1 | ymax1 | xmin2 ......
Matrix
::
resizeOrCreate
(
locTmpBuffer_
,
1
,
locSizeSum_
,
false
,
useGpu_
);
locBuffer_
=
locTmpBuffer_
;
// confBuffer layout:
// | class1 score | class2 score | ... |classN score | class1 score | ......
Matrix
::
resizeOrCreate
(
confTmpBuffer_
,
1
,
confSizeSum_
,
false
,
useGpu_
);
confBuffer_
=
confTmpBuffer_
;
// concate location data and confidence score data
size_t
locOffset
=
0
;
size_t
confOffset
=
0
;
auto
&
layerConf
=
config_
.
inputs
(
0
).
multibox_loss_conf
();
for
(
size_t
n
=
0
;
n
<
inputNum_
;
++
n
)
{
const
MatrixPtr
inLoc
=
getInputValue
(
*
getLocInputLayer
(
n
));
const
MatrixPtr
inConf
=
getInputValue
(
*
getConfInputLayer
(
n
));
size_t
height
=
getInput
(
*
getLocInputLayer
(
n
)).
getFrameHeight
();
if
(
!
height
)
height
=
layerConf
.
height
();
size_t
width
=
getInput
(
*
getLocInputLayer
(
n
)).
getFrameWidth
();
if
(
!
width
)
width
=
layerConf
.
width
();
locOffset
+=
appendWithPermute
(
*
inLoc
,
height
,
width
,
locSizeSum_
,
locOffset
,
batchSize
,
*
locBuffer_
,
kNCHWToNHWC
);
confOffset
+=
appendWithPermute
(
*
inConf
,
height
,
width
,
confSizeSum_
,
confOffset
,
batchSize
,
*
confBuffer_
,
kNCHWToNHWC
);
}
CHECK_EQ
(
locOffset
,
locSizeSum_
/
batchSize
);
CHECK_EQ
(
confOffset
,
confSizeSum_
/
batchSize
);
// priorValue layout:
// | xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var
// | xmin2 | ......
MatrixPtr
priorValue
;
// labelValue layout:
// | class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ......
MatrixPtr
labelValue
;
// Copy data from GPU to CPU if use GPU
if
(
useGpu_
)
{
Matrix
::
resizeOrCreate
(
locCpuBuffer_
,
1
,
locSizeSum_
,
false
,
false
);
Matrix
::
resizeOrCreate
(
confCpuBuffer_
,
1
,
confSizeSum_
,
false
,
false
);
MatrixPtr
priorTmpValue
=
getInputValue
(
*
getPriorBoxLayer
());
Matrix
::
resizeOrCreate
(
priorCpuValue_
,
1
,
priorTmpValue
->
getElementCnt
(),
false
,
false
);
MatrixPtr
labelTmpValue
=
getInputValue
(
*
getLabelLayer
());
Matrix
::
resizeOrCreate
(
labelCpuValue_
,
labelTmpValue
->
getHeight
(),
labelTmpValue
->
getWidth
(),
false
,
false
);
locCpuBuffer_
->
copyFrom
(
*
locTmpBuffer_
);
confCpuBuffer_
->
copyFrom
(
*
confTmpBuffer_
);
priorCpuValue_
->
copyFrom
(
*
priorTmpValue
);
labelCpuValue_
->
copyFrom
(
*
labelTmpValue
);
locBuffer_
=
locCpuBuffer_
;
confBuffer_
=
confCpuBuffer_
;
priorValue
=
priorCpuValue_
;
labelValue
=
labelCpuValue_
;
}
else
{
priorValue
=
getInputValue
(
*
getPriorBoxLayer
());
labelValue
=
getInputValue
(
*
getLabelLayer
());
}
// Get max scores for each prior bbox. Used in negative mining
vector
<
vector
<
real
>>
allMaxConfScore
;
numPriors_
=
priorValue
->
getElementCnt
()
/
8
;
getMaxConfidenceScores
(
confBuffer_
->
getData
(),
batchSize
,
numPriors_
,
numClasses_
,
backgroundId_
,
&
allMaxConfScore
);
// Match prior bbox to groundtruth bbox
Argument
label
=
getInput
(
*
getLabelLayer
());
const
int
*
labelIndex
=
label
.
sequenceStartPositions
->
getData
(
false
);
size_t
seqNum
=
label
.
getNumSequences
();
numMatches_
=
0
;
numNegs_
=
0
;
allMatchIndices_
.
clear
();
allNegIndices_
.
clear
();
pair
<
size_t
,
size_t
>
retPair
=
generateMatchIndices
(
*
priorValue
,
numPriors_
,
*
labelValue
,
labelIndex
,
seqNum
,
allMaxConfScore
,
batchSize
,
overlapThreshold_
,
negOverlap_
,
negPosRatio_
,
&
allMatchIndices_
,
&
allNegIndices_
);
numMatches_
=
retPair
.
first
;
numNegs_
=
retPair
.
second
;
// BBox location L1 smooth loss
locLoss_
=
0.0
;
if
(
numMatches_
>=
1
)
{
size_t
count
=
0
;
MatrixPtr
locLossOutput
;
Matrix
::
resizeOrCreate
(
locLossOutput
,
numMatches_
*
4
,
1
,
false
,
false
);
Matrix
::
resizeOrCreate
(
locGTData_
,
numMatches_
*
4
,
1
,
false
,
false
);
Matrix
::
resizeOrCreate
(
locDiff_
,
numMatches_
*
4
,
1
,
false
,
false
);
locDiff_
->
zeroMem
();
vector
<
real
>
locGTData
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
size_t
i
=
0
;
i
<
numPriors_
;
++
i
)
{
if
(
allMatchIndices_
[
n
][
i
]
==
-
1
)
continue
;
// match none
size_t
locOffset
=
n
*
(
locBuffer_
->
getElementCnt
()
/
batchSize
)
+
i
*
4
;
locDiff_
->
getData
()[
count
++
]
=
(
locBuffer_
->
getData
()
+
locOffset
)[
0
];
locDiff_
->
getData
()[
count
++
]
=
(
locBuffer_
->
getData
()
+
locOffset
)[
1
];
locDiff_
->
getData
()[
count
++
]
=
(
locBuffer_
->
getData
()
+
locOffset
)[
2
];
locDiff_
->
getData
()[
count
++
]
=
(
locBuffer_
->
getData
()
+
locOffset
)[
3
];
const
int
gtIdx
=
allMatchIndices_
[
n
][
i
];
size_t
priorOffset
=
i
*
8
;
vector
<
NormalizedBBox
>
priorBBoxVec
;
getBBoxFromPriorData
(
priorValue
->
getData
()
+
priorOffset
,
1
,
priorBBoxVec
);
vector
<
vector
<
real
>>
priorBBoxVar
;
getBBoxVarFromPriorData
(
priorValue
->
getData
()
+
priorOffset
,
1
,
priorBBoxVar
);
size_t
labelOffset
=
(
labelIndex
[
n
]
+
gtIdx
)
*
6
;
vector
<
NormalizedBBox
>
gtBBoxVec
;
getBBoxFromLabelData
(
labelValue
->
getData
()
+
labelOffset
,
1
,
gtBBoxVec
);
vector
<
real
>
gtEncode
;
encodeBBoxWithVar
(
priorBBoxVec
[
0
],
priorBBoxVar
[
0
],
gtBBoxVec
[
0
],
gtEncode
);
locGTData
.
insert
(
locGTData
.
end
(),
gtEncode
.
begin
(),
gtEncode
.
end
());
}
}
locGTData_
->
copyFrom
(
&
locGTData
[
0
],
numMatches_
*
4
);
locLossOutput
->
smoothL1
(
*
locDiff_
,
*
locGTData_
,
0.0
);
locLoss_
=
locLossOutput
->
getSum
()
/
numMatches_
;
}
// BBox confidence softmax loss
confLoss_
=
0
;
numConf_
=
numMatches_
+
numNegs_
;
if
(
numConf_
>=
1
)
{
Matrix
::
resizeOrCreate
(
confProb_
,
numConf_
,
numClasses_
,
false
,
false
);
IVector
::
resizeOrCreate
(
confGTData_
,
numConf_
,
false
);
confProb_
->
zeroMem
();
size_t
count
=
0
;
vector
<
real
>
confPredData
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
size_t
i
=
0
;
i
<
numPriors_
;
++
i
)
{
if
(
allMatchIndices_
[
n
][
i
]
==
-
1
)
continue
;
size_t
labelOffset
=
(
labelIndex
[
n
]
+
allMatchIndices_
[
n
][
i
])
*
6
;
const
int
gtLabel
=
(
labelValue
->
getData
()
+
labelOffset
)[
0
];
confGTData_
->
getData
()[
count
]
=
gtLabel
;
size_t
confOffset
=
n
*
numPriors_
*
numClasses_
+
i
*
numClasses_
;
for
(
size_t
j
=
0
;
j
<
numClasses_
;
++
j
)
{
confProb_
->
getData
()[
count
*
numClasses_
+
j
]
=
(
confBuffer_
->
getData
()
+
confOffset
)[
j
];
confPredData
.
push_back
((
confBuffer_
->
getData
()
+
confOffset
)[
j
]);
}
++
count
;
}
// Negative mining samples
for
(
size_t
i
=
0
;
i
<
allNegIndices_
[
n
].
size
();
++
i
)
{
confGTData_
->
getData
()[
count
]
=
backgroundId_
;
size_t
confOffset
=
n
*
numPriors_
*
numClasses_
+
allNegIndices_
[
n
][
i
]
*
numClasses_
;
for
(
size_t
j
=
0
;
j
<
numClasses_
;
++
j
)
{
confProb_
->
getData
()[
count
*
numClasses_
+
j
]
=
(
confBuffer_
->
getData
()
+
confOffset
)[
j
];
confPredData
.
push_back
((
confBuffer_
->
getData
()
+
confOffset
)[
j
]);
}
count
++
;
}
}
confProb_
->
softmax
(
*
confProb_
);
MatrixPtr
confLossOutput
;
Matrix
::
resizeOrCreate
(
confLossOutput
,
numConf_
,
1
,
false
,
false
);
confLossOutput
->
oneHotCrossEntropy
(
*
confProb_
,
*
confGTData_
);
confLoss_
=
confLossOutput
->
getSum
()
/
numMatches_
;
}
real
loss
=
locLoss_
+
confLoss_
;
MatrixPtr
outV
=
getOutputValue
();
vector
<
real
>
tmp
(
batchSize
,
loss
);
outV
->
copyFrom
(
&
tmp
[
0
],
batchSize
);
}
void
MultiBoxLossLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
size_t
batchSize
=
getInputValue
(
*
getLocInputLayer
(
0
))
->
getHeight
();
locBuffer_
->
zeroMem
();
confBuffer_
->
zeroMem
();
// Back propagate on location prediction
if
(
numMatches_
>=
1
)
{
MatrixPtr
locDiffBuffer
;
Matrix
::
resizeOrCreate
(
locDiffBuffer
,
numMatches_
*
4
,
1
,
false
,
false
);
locDiffBuffer
->
smoothL1Bp
(
*
locDiff_
,
*
locGTData_
,
0.0
);
locDiff_
->
copyFrom
(
*
locDiffBuffer
);
// scale gradient
for
(
size_t
i
=
0
;
i
<
numMatches_
*
4
;
++
i
)
locDiff_
->
getData
()[
i
]
*=
(
1.
/
numMatches_
);
// Copy gradient back
size_t
count
=
0
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
for
(
size_t
i
=
0
;
i
<
numPriors_
;
++
i
)
{
if
(
allMatchIndices_
[
n
][
i
]
==
-
1
)
continue
;
real
*
locDiffData
=
locBuffer_
->
getData
()
+
n
*
numPriors_
*
4
+
i
*
4
;
locDiffData
[
0
]
=
(
locDiff_
->
getData
()
+
count
*
4
)[
0
];
locDiffData
[
1
]
=
(
locDiff_
->
getData
()
+
count
*
4
)[
1
];
locDiffData
[
2
]
=
(
locDiff_
->
getData
()
+
count
*
4
)[
2
];
locDiffData
[
3
]
=
(
locDiff_
->
getData
()
+
count
*
4
)[
3
];
++
count
;
}
CHECK_EQ
(
count
,
numMatches_
);
}
if
(
numConf_
>=
1
)
{
for
(
size_t
i
=
0
;
i
<
numConf_
;
++
i
)
confProb_
->
getData
()[
i
*
numClasses_
+
confGTData_
->
getData
()[
i
]]
-=
1
;
for
(
size_t
i
=
0
;
i
<
numConf_
*
numClasses_
;
++
i
)
confProb_
->
getData
()[
i
]
*=
(
1.
/
numMatches_
);
size_t
count
=
0
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
size_t
i
=
0
;
i
<
numPriors_
;
++
i
)
{
if
(
allMatchIndices_
[
n
][
i
]
==
-
1
)
continue
;
real
*
confDiffData
=
confBuffer_
->
getData
()
+
n
*
numPriors_
*
numClasses_
+
i
*
numClasses_
;
for
(
size_t
j
=
0
;
j
<
numClasses_
;
++
j
)
confDiffData
[
j
]
=
(
confProb_
->
getData
()
+
count
*
numClasses_
)[
j
];
++
count
;
}
for
(
size_t
i
=
0
;
i
<
allNegIndices_
[
n
].
size
();
++
i
)
{
int
idx
=
allNegIndices_
[
n
][
i
];
real
*
confDiffData
=
confBuffer_
->
getData
()
+
n
*
numPriors_
*
numClasses_
+
idx
*
numClasses_
;
for
(
size_t
j
=
0
;
j
<
numClasses_
;
++
j
)
confDiffData
[
j
]
=
(
confProb_
->
getData
()
+
count
*
numClasses_
)[
j
];
++
count
;
}
}
CHECK_EQ
(
count
,
numConf_
);
}
if
(
useGpu_
)
{
locTmpBuffer_
->
copyFrom
(
*
locCpuBuffer_
);
confTmpBuffer_
->
copyFrom
(
*
confCpuBuffer_
);
locBuffer_
=
locTmpBuffer_
;
confBuffer_
=
confTmpBuffer_
;
}
// copy back
size_t
locOffset
=
0
;
size_t
confOffset
=
0
;
auto
layerConf
=
config_
.
inputs
(
0
).
multibox_loss_conf
();
for
(
size_t
n
=
0
;
n
<
inputNum_
;
++
n
)
{
const
MatrixPtr
inLocG
=
getInputGrad
(
*
getLocInputLayer
(
n
));
const
MatrixPtr
inConfG
=
getInputGrad
(
*
getConfInputLayer
(
n
));
size_t
height
=
getInput
(
*
getLocInputLayer
(
n
)).
getFrameHeight
();
if
(
!
height
)
height
=
layerConf
.
height
();
size_t
width
=
getInput
(
*
getLocInputLayer
(
n
)).
getFrameWidth
();
if
(
!
width
)
width
=
layerConf
.
width
();
// NHWC to NCHW
MatrixPtr
locGBuffer
;
Matrix
::
resizeOrCreate
(
locGBuffer
,
inLocG
->
getHeight
(),
inLocG
->
getWidth
(),
false
,
useGpu_
);
MatrixPtr
confGBuffer
;
Matrix
::
resizeOrCreate
(
confGBuffer
,
inConfG
->
getHeight
(),
inConfG
->
getWidth
(),
false
,
useGpu_
);
locOffset
+=
decomposeWithPermute
(
*
locBuffer_
,
height
,
width
,
locSizeSum_
,
locOffset
,
batchSize
,
*
locGBuffer
,
kNHWCToNCHW
);
inLocG
->
add
(
*
locGBuffer
);
confOffset
+=
decomposeWithPermute
(
*
confBuffer_
,
height
,
width
,
confSizeSum_
,
confOffset
,
batchSize
,
*
confGBuffer
,
kNHWCToNCHW
);
inConfG
->
add
(
*
confGBuffer
);
}
CHECK_EQ
(
locOffset
,
locSizeSum_
/
batchSize
);
CHECK_EQ
(
confOffset
,
confSizeSum_
/
batchSize
);
}
}
// namespace paddle
paddle/gserver/layers/MultiBoxLossLayer.h
0 → 100644
浏览文件 @
65969dad
/* copyright (c) 2016 paddlepaddle authors. all rights reserve.
licensed under the apache license, version 2.0 (the "license");
you may not use this file except in compliance with the license.
you may obtain a copy of the license at
http://www.apache.org/licenses/license-2.0
unless required by applicable law or agreed to in writing, software
distributed under the license is distributed on an "as is" basis,
without warranties or conditions of any kind, either express or implied.
see the license for the specific language governing permissions and
limitations under the license. */
#pragma once
#include <vector>
#include "CostLayer.h"
#include "DataLayer.h"
#include "DetectionUtil.h"
#include "Layer.h"
using
std
::
vector
;
using
std
::
pair
;
namespace
paddle
{
/**
* The multibox loss layer for a SSD detection task.
* The loss is composed by the location loss and the confidence loss.
* The location loss is a smooth L1 loss and the confidence loss is
* a softmax loss.
* - Input: This layer need four input layers: This first input layer
* is the priorbox layer and the second layer is a label layer.
* The rest two input layers are convolution layers for generating
* bbox location offset and the classification confidence.
* - Output: The Single Shot Multibox Detection loss value.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class
MultiBoxLossLayer
:
public
CostLayer
{
public:
explicit
MultiBoxLossLayer
(
const
LayerConfig
&
config
)
:
CostLayer
(
config
)
{}
bool
init
(
const
LayerMap
&
layerMap
,
const
ParameterMap
&
parameterMap
);
void
forward
(
PassType
passType
);
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
void
forwardImp
(
Matrix
&
output
,
Argument
&
label
,
Matrix
&
cost
)
{}
void
backwardImp
(
Matrix
&
outputValue
,
Argument
&
label
,
Matrix
&
outputGrad
)
{}
protected:
inline
LayerPtr
getPriorBoxLayer
()
{
return
inputLayers_
[
0
];
}
inline
LayerPtr
getLabelLayer
()
{
return
inputLayers_
[
1
];
}
inline
LayerPtr
getLocInputLayer
(
size_t
index
)
{
return
inputLayers_
[
2
+
index
];
}
inline
LayerPtr
getConfInputLayer
(
size_t
index
)
{
return
inputLayers_
[
2
+
inputNum_
+
index
];
}
protected:
size_t
numClasses_
;
real
overlapThreshold_
;
real
negPosRatio_
;
real
negOverlap_
;
size_t
inputNum_
;
size_t
backgroundId_
;
real
locLoss_
;
real
confLoss_
;
size_t
numPriors_
;
size_t
numMatches_
;
size_t
numNegs_
;
size_t
numConf_
;
size_t
locSizeSum_
;
size_t
confSizeSum_
;
vector
<
vector
<
int
>>
allMatchIndices_
;
vector
<
vector
<
int
>>
allNegIndices_
;
MatrixPtr
locGTData_
;
IVectorPtr
confGTData_
;
MatrixPtr
locBuffer_
;
MatrixPtr
confBuffer_
;
MatrixPtr
locDiff_
;
MatrixPtr
confProb_
;
MatrixPtr
labelCpuValue_
;
MatrixPtr
priorCpuValue_
;
MatrixPtr
locCpuBuffer_
;
MatrixPtr
confCpuBuffer_
;
MatrixPtr
locTmpBuffer_
;
MatrixPtr
confTmpBuffer_
;
};
}
// namespace paddle
paddle/gserver/tests/CMakeLists.txt
浏览文件 @
65969dad
...
...
@@ -45,6 +45,13 @@ add_unittest_without_exec(test_PriorBox
add_test
(
NAME test_PriorBox
COMMAND test_PriorBox
)
################# test_DetectionOutput #######################
add_unittest_without_exec
(
test_DetectionOutput
test_DetectionOutput.cpp
LayerGradUtil.cpp
)
add_test
(
NAME test_DetectionOutput
COMMAND test_DetectionOutput
)
################# test_ConvUnify #######################
add_unittest_without_exec
(
test_ConvUnify
test_ConvUnify.cpp
...
...
paddle/gserver/tests/LayerGradUtil.cpp
浏览文件 @
65969dad
...
...
@@ -387,6 +387,31 @@ void initDataLayer(TestConfig testConf,
data
.
value
->
sigmoid
(
*
data
.
value
);
data
.
grad
->
zeroMem
();
break
;
case
INPUT_SELF_DEFINE_DATA
:
{
size_t
height
=
testConf
.
inputDefs
[
i
].
selfDefinedData
->
getHeight
();
size_t
width
=
testConf
.
inputDefs
[
i
].
selfDefinedData
->
getWidth
();
CHECK_GT
(
static_cast
<
int
>
(
height
),
0
);
CHECK_GT
(
static_cast
<
int
>
(
width
),
0
);
data
.
value
=
Matrix
::
create
(
height
,
width
,
false
,
useGpu
);
data
.
grad
=
Matrix
::
create
(
height
,
width
,
false
,
useGpu
);
data
.
value
->
copyFrom
(
*
testConf
.
inputDefs
[
i
].
selfDefinedData
);
data
.
grad
->
zeroMem
();
const
std
::
vector
<
int
>&
labelSeqStartPositions
=
testConf
.
inputDefs
[
i
].
labelSeqStartPositions
;
if
(
labelSeqStartPositions
.
size
()
!=
0
)
{
CHECK
(
!
sequenceStartPositions
);
CHECK_GE
(
static_cast
<
int
>
(
labelSeqStartPositions
.
size
()),
2
);
sequenceStartPositions
=
ICpuGpuVector
::
create
(
labelSeqStartPositions
.
size
(),
useGpu
);
sequenceStartPositions
->
copyFrom
(
labelSeqStartPositions
.
data
(),
labelSeqStartPositions
.
size
(),
useGpu
);
data
.
sequenceStartPositions
=
sequenceStartPositions
;
}
break
;
}
default:
LOG
(
FATAL
)
<<
" unknown inputType "
;
return
;
...
...
paddle/gserver/tests/LayerGradUtil.h
浏览文件 @
65969dad
...
...
@@ -32,6 +32,7 @@ enum InputType {
INPUT_SPARSE_NON_VALUE_DATA
,
INPUT_SPARSE_FLOAT_VALUE_DATA
,
INPUT_DENSE_DIM_DATA
,
// using sequence length to init dense data
INPUT_SELF_DEFINE_DATA
,
// support customizing for input value
};
struct
ParaSparse
{
...
...
@@ -66,6 +67,7 @@ struct InputDef {
bool
isStatic
;
std
::
vector
<
int
>
labelInitValue
;
std
::
vector
<
int
>
labelSeqStartPositions
;
MatrixPtr
selfDefinedData
;
InputDef
(
InputType
type
,
string
nameIn
,
size_t
dimIn
,
size_t
sizeIn
)
{
inputType
=
type
;
...
...
@@ -76,6 +78,20 @@ struct InputDef {
isStatic
=
false
;
}
InputDef
(
InputType
type
,
string
nameIn
,
MatrixPtr
selfDefinedData
,
std
::
vector
<
int
>
selfDefinedSeqStartPos
=
{})
:
labelSeqStartPositions
(
selfDefinedSeqStartPos
),
selfDefinedData
(
selfDefinedData
)
{
inputType
=
type
;
name
=
nameIn
;
dim
=
0
;
sparse
=
{
""
};
paraSize
=
0
;
isStatic
=
false
;
}
InputDef
(
InputType
type
,
string
nameIn
,
size_t
dimIn
,
...
...
paddle/gserver/tests/test_DetectionOutput.cpp
0 → 100644
浏览文件 @
65969dad
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using
namespace
paddle
;
// NOLINT
using
namespace
std
;
// NOLINT
// Do one forward pass of priorBox layer and check to see if its output
// matches the given result
void
doOneDetectionOutputTest
(
MatrixPtr
&
inputLoc
,
MatrixPtr
&
inputConf
,
MatrixPtr
&
inputPriorBox
,
size_t
feature_map_width
,
size_t
feature_map_height
,
real
nms_threshold
,
bool
use_gpu
,
MatrixPtr
&
result
)
{
// Setting up the detection output layer
TestConfig
configt
;
configt
.
layerConfig
.
set_type
(
"detection_output"
);
LayerInputConfig
*
input
=
configt
.
layerConfig
.
add_inputs
();
configt
.
layerConfig
.
add_inputs
();
configt
.
layerConfig
.
add_inputs
();
DetectionOutputConfig
*
detOutput
=
input
->
mutable_detection_output_conf
();
detOutput
->
set_width
(
feature_map_width
);
detOutput
->
set_height
(
feature_map_height
);
detOutput
->
set_nms_threshold
(
nms_threshold
);
detOutput
->
set_num_classes
(
2
);
detOutput
->
set_nms_top_k
(
20
);
detOutput
->
set_keep_top_k
(
10
);
detOutput
->
set_background_id
(
0
);
detOutput
->
set_confidence_threshold
(
0.01
);
detOutput
->
set_input_num
(
1
);
configt
.
inputDefs
.
push_back
({
INPUT_DATA_TARGET
,
"priorbox"
,
32
,
0
});
configt
.
inputDefs
.
push_back
({
INPUT_DATA
,
"input_loc"
,
16
,
0
});
configt
.
inputDefs
.
push_back
({
INPUT_DATA
,
"input_conf"
,
8
,
0
});
// data layer initialize
std
::
vector
<
DataLayerPtr
>
dataLayers
;
LayerMap
layerMap
;
vector
<
Argument
>
datas
;
initDataLayer
(
configt
,
&
dataLayers
,
&
datas
,
&
layerMap
,
"priorbox"
,
1
,
false
,
use_gpu
);
dataLayers
[
0
]
->
getOutputValue
()
->
copyFrom
(
*
inputPriorBox
);
dataLayers
[
1
]
->
getOutputValue
()
->
copyFrom
(
*
inputLoc
);
dataLayers
[
2
]
->
getOutputValue
()
->
copyFrom
(
*
inputConf
);
// test layer initialize
std
::
vector
<
ParameterPtr
>
parameters
;
LayerPtr
detectionOutputLayer
;
initTestLayer
(
configt
,
&
layerMap
,
&
parameters
,
&
detectionOutputLayer
);
detectionOutputLayer
->
forward
(
PASS_GC
);
checkMatrixEqual
(
detectionOutputLayer
->
getOutputValue
(),
result
);
}
TEST
(
Layer
,
detectionOutputLayerFwd
)
{
bool
useGpu
=
false
;
// CPU case 1.
MatrixPtr
inputLoc
;
MatrixPtr
inputConf
;
MatrixPtr
inputPriorBox
;
MatrixPtr
result
,
result2
,
result3
,
result4
;
real
nmsTreshold
=
0.01
;
real
inputLocData
[]
=
{
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
,
0.1
};
real
inputConfData
[]
=
{
0.1
,
0.9
,
0.2
,
0.8
,
0.3
,
0.7
,
0.4
,
0.6
};
real
inputPriorBoxData
[]
=
{
0.1
,
0.1
,
0.5
,
0.5
,
0.1
,
0.1
,
0.2
,
0.2
,
0.2
,
0.2
,
0.6
,
0.6
,
0.1
,
0.1
,
0.2
,
0.2
,
0.3
,
0.3
,
0.7
,
0.7
,
0.1
,
0.1
,
0.2
,
0.2
,
0.4
,
0.4
,
0.8
,
0.8
,
0.1
,
0.1
,
0.2
,
0.2
};
real
resultData
[]
=
{
0
,
1
,
0.68997443
,
0.099959746
,
0.099959746
,
0.50804031
,
0.50804031
};
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
inputConf
=
Matrix
::
create
(
1
,
8
,
false
,
useGpu
);
inputPriorBox
=
Matrix
::
create
(
1
,
32
,
false
,
useGpu
);
result
=
Matrix
::
create
(
1
,
7
,
false
,
useGpu
);
inputLoc
->
setData
(
inputLocData
);
inputConf
->
setData
(
inputConfData
);
inputPriorBox
->
setData
(
inputPriorBoxData
);
result
->
setData
(
resultData
);
doOneDetectionOutputTest
(
inputLoc
,
inputConf
,
inputPriorBox
,
/* feature_map_width */
1
,
/* feature_map_height */
1
,
nmsTreshold
,
useGpu
,
result
);
// CPU case 2.
nmsTreshold
=
0.2
;
result2
=
Matrix
::
create
(
2
,
7
,
false
,
useGpu
);
real
resultData2
[]
=
{
0
,
1
,
0.68997443
,
0.099959746
,
0.099959746
,
0.50804031
,
0.50804031
,
0
,
1
,
0.59868765
,
0.29995975
,
0.29995975
,
0.70804024
,
0.70804024
};
result2
->
setData
(
resultData2
);
doOneDetectionOutputTest
(
inputLoc
,
inputConf
,
inputPriorBox
,
/* feature_map_width */
1
,
/* feature_map_height */
1
,
nmsTreshold
,
useGpu
,
result2
);
#ifndef PADDLE_ONLY_CPU
// GPU case 1.
useGpu
=
true
;
inputLoc
=
Matrix
::
create
(
1
,
16
,
false
,
useGpu
);
inputConf
=
Matrix
::
create
(
1
,
8
,
false
,
useGpu
);
inputPriorBox
=
Matrix
::
create
(
1
,
32
,
false
,
useGpu
);
inputLoc
->
copyFrom
(
inputLocData
,
16
);
inputConf
->
copyFrom
(
inputConfData
,
8
);
inputPriorBox
->
copyFrom
(
inputPriorBoxData
,
32
);
nmsTreshold
=
0.01
;
result3
=
Matrix
::
create
(
1
,
7
,
false
,
useGpu
);
result3
->
copyFrom
(
resultData
,
7
);
doOneDetectionOutputTest
(
inputLoc
,
inputConf
,
inputPriorBox
,
/* feature_map_width */
1
,
/* feature_map_height */
1
,
nmsTreshold
,
useGpu
,
result3
);
// GPU case 2.
nmsTreshold
=
0.2
;
result4
=
Matrix
::
create
(
2
,
7
,
false
,
useGpu
);
result4
->
copyFrom
(
resultData2
,
14
);
doOneDetectionOutputTest
(
inputLoc
,
inputConf
,
inputPriorBox
,
/* feature_map_width */
1
,
/* feature_map_height */
1
,
nmsTreshold
,
useGpu
,
result4
);
#endif
}
int
main
(
int
argc
,
char
**
argv
)
{
testing
::
InitGoogleTest
(
&
argc
,
argv
);
initMain
(
argc
,
argv
);
return
RUN_ALL_TESTS
();
}
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
65969dad
...
...
@@ -1689,6 +1689,70 @@ TEST(Layer, smooth_l1) {
}
}
TEST
(
Layer
,
multibox_loss
)
{
TestConfig
config
;
config
.
layerConfig
.
set_type
(
"multibox_loss"
);
config
.
biasSize
=
0
;
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
MultiBoxLossConfig
*
multiboxLoss
=
input
->
mutable_multibox_loss_conf
();
multiboxLoss
->
set_num_classes
(
21
);
multiboxLoss
->
set_input_num
(
1
);
multiboxLoss
->
set_overlap_threshold
(
0.5
);
multiboxLoss
->
set_neg_pos_ratio
(
3
);
multiboxLoss
->
set_neg_overlap
(
0.5
);
multiboxLoss
->
set_background_id
(
0
);
multiboxLoss
->
set_height
(
3
);
multiboxLoss
->
set_width
(
3
);
size_t
gtNum
=
1
;
MatrixPtr
labelValue
=
Matrix
::
create
(
gtNum
,
6
,
false
,
false
);
labelValue
->
randomizeUniform
();
labelValue
->
add
(
-
0.5
);
labelValue
->
sigmoid
(
*
labelValue
);
real
*
labelData
=
labelValue
->
getData
();
size_t
labelWidth
=
labelValue
->
getWidth
();
for
(
size_t
i
=
0
;
i
<
gtNum
;
++
i
)
{
*
(
labelData
+
i
*
labelWidth
)
=
std
::
rand
()
%
20
+
1
;
*
(
labelData
+
i
*
labelWidth
+
1
)
=
0.400259
;
*
(
labelData
+
i
*
labelWidth
+
2
)
=
0.377857
;
*
(
labelData
+
i
*
labelWidth
+
3
)
=
0.525712
;
*
(
labelData
+
i
*
labelWidth
+
4
)
=
0.519368
;
}
vector
<
int
>
seqStartPositions
(
gtNum
+
1
,
0
);
for
(
size_t
i
=
1
;
i
<=
gtNum
;
++
i
)
{
seqStartPositions
[
i
]
=
i
;
}
// Ensure at lease one matched bbox
MatrixPtr
priorValue
=
Matrix
::
create
(
1
,
72
,
false
,
false
);
priorValue
->
randomizeUniform
();
priorValue
->
add
(
-
0.5
);
priorValue
->
sigmoid
(
*
priorValue
);
real
*
priorData
=
priorValue
->
getData
();
*
(
priorData
)
=
0.424811
;
*
(
priorData
+
1
)
=
0.397059
;
*
(
priorData
+
2
)
=
0.538905
;
*
(
priorData
+
3
)
=
0.447091
;
*
(
priorData
+
4
)
=
0.425720
;
*
(
priorData
+
5
)
=
0.515228
;
*
(
priorData
+
6
)
=
0.519452
;
*
(
priorData
+
7
)
=
0.591065
;
config
.
inputDefs
.
push_back
(
{
INPUT_SELF_DEFINE_DATA
,
"priorbox"
,
priorValue
,
{}});
config
.
inputDefs
.
push_back
(
{
INPUT_SELF_DEFINE_DATA
,
"label"
,
labelValue
,
seqStartPositions
});
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"locPred"
,
36
,
0
});
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"confPred"
,
189
,
0
});
config
.
layerConfig
.
add_inputs
();
config
.
layerConfig
.
add_inputs
();
config
.
layerConfig
.
add_inputs
();
for
(
auto
useGpu
:
{
false
,
true
})
{
testLayerGrad
(
config
,
"multibox_loss"
,
1
,
false
,
useGpu
,
false
);
}
}
TEST
(
Layer
,
TransLayer
)
{
TestConfig
config
;
const
int
height
=
128
;
...
...
proto/ModelConfig.proto
浏览文件 @
65969dad
...
...
@@ -266,6 +266,29 @@ message PadConfig {
repeated
uint32
pad_w
=
4
;
}
message
MultiBoxLossConfig
{
required
uint32
num_classes
=
1
;
required
float
overlap_threshold
=
2
;
required
float
neg_pos_ratio
=
3
;
required
float
neg_overlap
=
4
;
required
uint32
background_id
=
5
;
required
uint32
input_num
=
6
;
optional
uint32
height
=
7
[
default
=
1
];
optional
uint32
width
=
8
[
default
=
1
];
}
message
DetectionOutputConfig
{
required
uint32
num_classes
=
1
;
required
float
nms_threshold
=
2
;
required
uint32
nms_top_k
=
3
;
required
uint32
background_id
=
4
;
required
uint32
input_num
=
5
;
required
uint32
keep_top_k
=
6
;
required
float
confidence_threshold
=
7
;
optional
uint32
height
=
8
[
default
=
1
];
optional
uint32
width
=
9
[
default
=
1
];
}
message
LayerInputConfig
{
required
string
input_layer_name
=
1
;
optional
string
input_parameter_name
=
2
;
...
...
@@ -284,6 +307,8 @@ message LayerInputConfig {
optional
PriorBoxConfig
priorbox_conf
=
13
;
optional
PadConfig
pad_conf
=
14
;
optional
RowConvConfig
row_conv_conf
=
15
;
optional
MultiBoxLossConfig
multibox_loss_conf
=
16
;
optional
DetectionOutputConfig
detection_output_conf
=
17
;
}
message
LayerConfig
{
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
65969dad
...
...
@@ -1676,6 +1676,52 @@ class PriorBoxLayer(LayerBase):
self
.
config
.
size
=
size
@
config_layer
(
'multibox_loss'
)
class
MultiBoxLossLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
input_num
,
num_classes
,
overlap_threshold
,
neg_pos_ratio
,
neg_overlap
,
background_id
):
super
(
MultiBoxLossLayer
,
self
).
__init__
(
name
,
'multibox_loss'
,
0
,
inputs
)
config_assert
(
len
(
inputs
)
==
(
input_num
*
2
+
2
),
'MultiBoxLossLayer does not have enough inputs'
)
config_assert
(
num_classes
>
background_id
,
'Classes number must greater than background ID'
)
self
.
config
.
inputs
[
0
].
multibox_loss_conf
.
num_classes
=
num_classes
self
.
config
.
inputs
[
0
].
multibox_loss_conf
.
overlap_threshold
=
overlap_threshold
self
.
config
.
inputs
[
0
].
multibox_loss_conf
.
neg_pos_ratio
=
neg_pos_ratio
self
.
config
.
inputs
[
0
].
multibox_loss_conf
.
neg_overlap
=
neg_overlap
self
.
config
.
inputs
[
0
].
multibox_loss_conf
.
background_id
=
background_id
self
.
config
.
inputs
[
0
].
multibox_loss_conf
.
input_num
=
input_num
self
.
config
.
size
=
1
@
config_layer
(
'detection_output'
)
class
DetectionOutputLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
size
,
input_num
,
num_classes
,
nms_threshold
,
nms_top_k
,
keep_top_k
,
confidence_threshold
,
background_id
):
super
(
DetectionOutputLayer
,
self
).
__init__
(
name
,
'detection_output'
,
0
,
inputs
)
config_assert
(
len
(
inputs
)
==
(
input_num
*
2
+
1
),
'DetectionOutputLayer does not have enough inputs'
)
config_assert
(
num_classes
>
background_id
,
'Classes number must greater than background ID'
)
self
.
config
.
inputs
[
0
].
detection_output_conf
.
num_classes
=
num_classes
self
.
config
.
inputs
[
0
].
detection_output_conf
.
nms_threshold
=
nms_threshold
self
.
config
.
inputs
[
0
].
detection_output_conf
.
nms_top_k
=
nms_top_k
self
.
config
.
inputs
[
0
].
detection_output_conf
.
keep_top_k
=
keep_top_k
self
.
config
.
inputs
[
0
].
detection_output_conf
.
confidence_threshold
=
confidence_threshold
self
.
config
.
inputs
[
0
].
detection_output_conf
.
background_id
=
background_id
self
.
config
.
inputs
[
0
].
detection_output_conf
.
input_num
=
input_num
self
.
config
.
size
=
size
@
config_layer
(
'data'
)
class
DataLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
size
,
height
=
None
,
width
=
None
,
device
=
None
):
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
65969dad
...
...
@@ -115,6 +115,8 @@ __all__ = [
'print_layer'
,
'priorbox_layer'
,
'cross_channel_norm_layer'
,
'multibox_loss_layer'
,
'detection_output_layer'
,
'spp_layer'
,
'pad_layer'
,
'eos_layer'
,
...
...
@@ -195,6 +197,8 @@ class LayerType(object):
PRINT_LAYER
=
'print'
PRIORBOX_LAYER
=
'priorbox'
MULTIBOX_LOSS_LAYER
=
'multibox_loss'
DETECTION_OUTPUT_LAYER
=
'detection_output'
CTC_LAYER
=
'ctc'
WARP_CTC_LAYER
=
'warp_ctc'
...
...
@@ -1052,6 +1056,163 @@ def priorbox_layer(input,
size
=
size
)
@
wrap_name_default
(
"multibox_loss"
)
def
multibox_loss_layer
(
input_loc
,
input_conf
,
priorbox
,
label
,
num_classes
,
overlap_threshold
=
0.5
,
neg_pos_ratio
=
3.0
,
neg_overlap
=
0.5
,
background_id
=
0
,
name
=
None
):
"""
Compute the location loss and the confidence loss for ssd.
:param name: The Layer Name.
:type name: basestring
:param input_loc: The input predict location.
:type input_loc: LayerOutput
:param input_conf: The input priorbox confidence.
:type input_conf: LayerOutput
:param priorbox: The input priorbox location and the variance.
:type priorbox: LayerOutput
:param label: The input label.
:type label: LayerOutput
:param num_classes: The number of the classification.
:type num_classes: int
:param overlap_threshold: The threshold of the overlap.
:type overlap_threshold: float
:param neg_pos_ratio: The ratio of the negative bbox to the positive bbox.
:type neg_pos_ratio: float
:param neg_overlap: The negative bbox overlap threshold.
:type neg_overlap: float
:param background_id: The background class index.
:type background_id: int
:return: LayerOutput
"""
input_loc_num
=
0
input_conf_num
=
0
if
isinstance
(
input_loc
,
LayerOutput
):
input_loc
=
[
input_loc
]
assert
isinstance
(
input_loc
,
collections
.
Sequence
)
# list or tuple
for
each
in
input_loc
:
assert
isinstance
(
each
,
LayerOutput
)
input_loc_num
+=
1
if
isinstance
(
input_conf
,
LayerOutput
):
input_conf
=
[
input_conf
]
assert
isinstance
(
input_conf
,
collections
.
Sequence
)
# list or tuple
for
each
in
input_conf
:
assert
isinstance
(
each
,
LayerOutput
)
input_conf_num
+=
1
# Check the input layer number.
assert
input_loc_num
==
input_conf_num
inputs
=
[
priorbox
.
name
,
label
.
name
]
inputs
.
extend
([
l
.
name
for
l
in
input_loc
])
inputs
.
extend
([
l
.
name
for
l
in
input_conf
])
parents
=
[
priorbox
,
label
]
parents
.
extend
(
input_loc
)
parents
.
extend
(
input_conf
)
Layer
(
name
=
name
,
type
=
LayerType
.
MULTIBOX_LOSS_LAYER
,
inputs
=
inputs
,
input_num
=
input_loc_num
,
num_classes
=
num_classes
,
overlap_threshold
=
overlap_threshold
,
neg_pos_ratio
=
neg_pos_ratio
,
neg_overlap
=
neg_overlap
,
background_id
=
background_id
)
return
LayerOutput
(
name
,
LayerType
.
MULTIBOX_LOSS_LAYER
,
parents
=
parents
,
size
=
1
)
@
wrap_name_default
(
"detection_output"
)
def
detection_output_layer
(
input_loc
,
input_conf
,
priorbox
,
num_classes
,
nms_threshold
=
0.45
,
nms_top_k
=
400
,
keep_top_k
=
200
,
confidence_threshold
=
0.01
,
background_id
=
0
,
name
=
None
):
"""
Apply the NMS to the output of network and compute the predict bounding
box location.
:param name: The Layer Name.
:type name: basestring
:param input_loc: The input predict location.
:type input_loc: LayerOutput
:param input_conf: The input priorbox confidence.
:type input_conf: LayerOutput
:param priorbox: The input priorbox location and the variance.
:type priorbox: LayerOutput
:param num_classes: The number of the classification.
:type num_classes: int
:param nms_threshold: The Non-maximum suppression threshold.
:type nms_threshold: float
:param nms_top_k: The bbox number kept of the NMS's output
:type nms_top_k: int
:param keep_top_k: The bbox number kept of the layer's output
:type keep_top_k: int
:param confidence_threshold: The classification confidence threshold
:type confidence_threshold: float
:param background_id: The background class index.
:type background_id: int
:return: LayerOutput
"""
input_loc_num
=
0
input_conf_num
=
0
if
isinstance
(
input_loc
,
LayerOutput
):
input_loc
=
[
input_loc
]
assert
isinstance
(
input_loc
,
collections
.
Sequence
)
# list or tuple
for
each
in
input_loc
:
assert
isinstance
(
each
,
LayerOutput
)
input_loc_num
+=
1
if
isinstance
(
input_conf
,
LayerOutput
):
input_conf
=
[
input_conf
]
assert
isinstance
(
input_conf
,
collections
.
Sequence
)
# list or tuple
for
each
in
input_conf
:
assert
isinstance
(
each
,
LayerOutput
)
input_conf_num
+=
1
# Check the input layer number.
assert
input_loc_num
==
input_conf_num
inputs
=
[
priorbox
.
name
]
inputs
.
extend
([
l
.
name
for
l
in
input_loc
])
inputs
.
extend
([
l
.
name
for
l
in
input_conf
])
parents
=
[
priorbox
]
parents
.
extend
(
input_loc
)
parents
.
extend
(
input_conf
)
size
=
keep_top_k
*
7
Layer
(
name
=
name
,
type
=
LayerType
.
DETECTION_OUTPUT_LAYER
,
inputs
=
inputs
,
size
=
size
,
input_num
=
input_loc_num
,
num_classes
=
num_classes
,
nms_threshold
=
nms_threshold
,
nms_top_k
=
nms_top_k
,
keep_top_k
=
keep_top_k
,
confidence_threshold
=
confidence_threshold
,
background_id
=
background_id
)
return
LayerOutput
(
name
,
LayerType
.
DETECTION_OUTPUT_LAYER
,
parents
=
parents
,
size
=
size
)
@
wrap_name_default
(
"cross_channel_norm"
)
def
cross_channel_norm_layer
(
input
,
name
=
None
,
param_attr
=
None
):
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录