Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
a3ada68a
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a3ada68a
编写于
6月 16, 2017
作者:
Z
zlx
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into improve_pruning
上级
fc9e3e4b
14c0e71d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
883 addition
and
0 deletion
+883
-0
paddle/gserver/layers/DetectionUtil.cpp
paddle/gserver/layers/DetectionUtil.cpp
+576
-0
paddle/gserver/layers/DetectionUtil.h
paddle/gserver/layers/DetectionUtil.h
+307
-0
未找到文件。
paddle/gserver/layers/DetectionUtil.cpp
0 → 100644
浏览文件 @
a3ada68a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DetectionUtil.h"
namespace
paddle
{
size_t
appendWithPermute
(
const
Matrix
&
inMatrix
,
size_t
height
,
size_t
width
,
size_t
outTotalSize
,
size_t
outOffset
,
size_t
batchSize
,
Matrix
&
outMatrix
,
PermMode
permMode
)
{
CHECK_EQ
(
inMatrix
.
useGpu
(),
outMatrix
.
useGpu
());
bool
useGpu
=
inMatrix
.
useGpu
();
if
(
permMode
==
kNCHWToNHWC
)
{
size_t
inElementCnt
=
inMatrix
.
getElementCnt
();
size_t
channels
=
inElementCnt
/
(
height
*
width
*
batchSize
);
size_t
imgSize
=
height
*
width
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
++
i
)
{
size_t
offset
=
i
*
(
outTotalSize
/
batchSize
)
+
outOffset
;
const
MatrixPtr
inTmp
=
Matrix
::
create
(
const_cast
<
real
*>
(
inMatrix
.
getData
())
+
i
*
channels
*
imgSize
,
channels
,
imgSize
,
false
,
useGpu
);
MatrixPtr
outTmp
=
Matrix
::
create
(
const_cast
<
real
*>
(
outMatrix
.
getData
())
+
offset
,
imgSize
,
channels
,
false
,
useGpu
);
inTmp
->
transpose
(
outTmp
,
false
);
}
return
channels
*
imgSize
;
}
else
{
LOG
(
FATAL
)
<<
"Unkown permute mode"
;
}
}
size_t
decomposeWithPermute
(
const
Matrix
&
inMatrix
,
size_t
height
,
size_t
width
,
size_t
inTotalSize
,
size_t
inOffset
,
size_t
batchSize
,
Matrix
&
outMatrix
,
PermMode
permMode
)
{
CHECK_EQ
(
inMatrix
.
useGpu
(),
outMatrix
.
useGpu
());
bool
useGpu
=
inMatrix
.
useGpu
();
if
(
permMode
==
kNHWCToNCHW
)
{
size_t
outElementCnt
=
outMatrix
.
getElementCnt
();
size_t
channels
=
outElementCnt
/
(
height
*
width
*
batchSize
);
size_t
imgSize
=
height
*
width
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
++
i
)
{
size_t
offset
=
i
*
(
inTotalSize
/
batchSize
)
+
inOffset
;
const
MatrixPtr
inTmp
=
Matrix
::
create
(
const_cast
<
real
*>
(
inMatrix
.
getData
())
+
offset
,
imgSize
,
channels
,
false
,
useGpu
);
MatrixPtr
outTmp
=
Matrix
::
create
(
const_cast
<
real
*>
(
outMatrix
.
getData
())
+
i
*
channels
*
imgSize
,
channels
,
imgSize
,
false
,
useGpu
);
inTmp
->
transpose
(
outTmp
,
false
);
}
return
channels
*
imgSize
;
}
else
{
LOG
(
FATAL
)
<<
"Unkown permute mode"
;
}
}
real
jaccardOverlap
(
const
NormalizedBBox
&
bbox1
,
const
NormalizedBBox
&
bbox2
)
{
if
(
bbox2
.
xMin
>
bbox1
.
xMax
||
bbox2
.
xMax
<
bbox1
.
xMin
||
bbox2
.
yMin
>
bbox1
.
yMax
||
bbox2
.
yMax
<
bbox1
.
yMin
)
{
return
0.0
;
}
else
{
real
interXMin
=
std
::
max
(
bbox1
.
xMin
,
bbox2
.
xMin
);
real
interYMin
=
std
::
max
(
bbox1
.
yMin
,
bbox2
.
yMin
);
real
interXMax
=
std
::
min
(
bbox1
.
xMax
,
bbox2
.
xMax
);
real
interYMax
=
std
::
min
(
bbox1
.
yMax
,
bbox2
.
yMax
);
real
interWidth
=
interXMax
-
interXMin
;
real
interHeight
=
interYMax
-
interYMin
;
real
interArea
=
interWidth
*
interHeight
;
real
bboxArea1
=
bbox1
.
getArea
();
real
bboxArea2
=
bbox2
.
getArea
();
return
interArea
/
(
bboxArea1
+
bboxArea2
-
interArea
);
}
}
void
encodeBBoxWithVar
(
const
NormalizedBBox
&
priorBBox
,
const
vector
<
real
>&
priorBBoxVar
,
const
NormalizedBBox
&
gtBBox
,
vector
<
real
>&
outVec
)
{
real
priorBBoxWidth
=
priorBBox
.
getWidth
();
real
priorBBoxHeight
=
priorBBox
.
getHeight
();
real
priorBBoxCenterX
=
priorBBox
.
getCenterX
();
real
priorBBoxCenterY
=
priorBBox
.
getCenterY
();
real
gtBBoxWidth
=
gtBBox
.
getWidth
();
real
gtBBoxHeight
=
gtBBox
.
getHeight
();
real
gtBBoxCenterX
=
gtBBox
.
getCenterX
();
real
gtBBoxCenterY
=
gtBBox
.
getCenterY
();
outVec
.
clear
();
outVec
.
push_back
((
gtBBoxCenterX
-
priorBBoxCenterX
)
/
priorBBoxWidth
/
priorBBoxVar
[
0
]);
outVec
.
push_back
((
gtBBoxCenterY
-
priorBBoxCenterY
)
/
priorBBoxHeight
/
priorBBoxVar
[
1
]);
outVec
.
push_back
(
std
::
log
(
std
::
fabs
(
gtBBoxWidth
/
priorBBoxWidth
))
/
priorBBoxVar
[
2
]);
outVec
.
push_back
(
std
::
log
(
std
::
fabs
(
gtBBoxHeight
/
priorBBoxHeight
))
/
priorBBoxVar
[
3
]);
}
NormalizedBBox
decodeBBoxWithVar
(
const
NormalizedBBox
&
priorBBox
,
const
vector
<
real
>&
priorBBoxVar
,
const
vector
<
real
>&
locPredData
)
{
real
priorBBoxWidth
=
priorBBox
.
getWidth
();
real
priorBBoxHeight
=
priorBBox
.
getHeight
();
real
priorBBoxCenterX
=
priorBBox
.
getCenterX
();
real
priorBBoxCenterY
=
priorBBox
.
getCenterY
();
real
decodedBBoxCenterX
=
priorBBoxVar
[
0
]
*
locPredData
[
0
]
*
priorBBoxWidth
+
priorBBoxCenterX
;
real
decodedBBoxCenterY
=
priorBBoxVar
[
1
]
*
locPredData
[
1
]
*
priorBBoxHeight
+
priorBBoxCenterY
;
real
decodedBBoxWidth
=
std
::
exp
(
priorBBoxVar
[
2
]
*
locPredData
[
2
])
*
priorBBoxWidth
;
real
decodedBBoxHeight
=
std
::
exp
(
priorBBoxVar
[
3
]
*
locPredData
[
3
])
*
priorBBoxHeight
;
NormalizedBBox
decodedBBox
;
decodedBBox
.
xMin
=
decodedBBoxCenterX
-
decodedBBoxWidth
/
2
;
decodedBBox
.
yMin
=
decodedBBoxCenterY
-
decodedBBoxHeight
/
2
;
decodedBBox
.
xMax
=
decodedBBoxCenterX
+
decodedBBoxWidth
/
2
;
decodedBBox
.
yMax
=
decodedBBoxCenterY
+
decodedBBoxHeight
/
2
;
return
decodedBBox
;
}
void
getBBoxFromPriorData
(
const
real
*
priorData
,
const
size_t
numBBoxes
,
vector
<
NormalizedBBox
>&
bboxVec
)
{
size_t
outOffset
=
bboxVec
.
size
();
bboxVec
.
resize
(
bboxVec
.
size
()
+
numBBoxes
);
for
(
size_t
i
=
0
;
i
<
numBBoxes
;
++
i
)
{
NormalizedBBox
bbox
;
bbox
.
xMin
=
*
(
priorData
+
i
*
8
);
bbox
.
yMin
=
*
(
priorData
+
i
*
8
+
1
);
bbox
.
xMax
=
*
(
priorData
+
i
*
8
+
2
);
bbox
.
yMax
=
*
(
priorData
+
i
*
8
+
3
);
bboxVec
[
outOffset
+
i
]
=
bbox
;
}
}
void
getBBoxVarFromPriorData
(
const
real
*
priorData
,
const
size_t
num
,
vector
<
vector
<
real
>>&
varVec
)
{
size_t
outOffset
=
varVec
.
size
();
varVec
.
resize
(
varVec
.
size
()
+
num
);
for
(
size_t
i
=
0
;
i
<
num
;
++
i
)
{
vector
<
real
>
var
;
var
.
push_back
(
*
(
priorData
+
i
*
8
+
4
));
var
.
push_back
(
*
(
priorData
+
i
*
8
+
5
));
var
.
push_back
(
*
(
priorData
+
i
*
8
+
6
));
var
.
push_back
(
*
(
priorData
+
i
*
8
+
7
));
varVec
[
outOffset
+
i
]
=
var
;
}
}
void
getBBoxFromLabelData
(
const
real
*
labelData
,
const
size_t
numBBoxes
,
vector
<
NormalizedBBox
>&
bboxVec
)
{
size_t
outOffset
=
bboxVec
.
size
();
bboxVec
.
resize
(
bboxVec
.
size
()
+
numBBoxes
);
for
(
size_t
i
=
0
;
i
<
numBBoxes
;
++
i
)
{
NormalizedBBox
bbox
;
bbox
.
xMin
=
*
(
labelData
+
i
*
6
+
1
);
bbox
.
yMin
=
*
(
labelData
+
i
*
6
+
2
);
bbox
.
xMax
=
*
(
labelData
+
i
*
6
+
3
);
bbox
.
yMax
=
*
(
labelData
+
i
*
6
+
4
);
real
isDifficult
=
*
(
labelData
+
i
*
6
+
5
);
if
(
std
::
abs
(
isDifficult
-
0.0
)
<
1e-6
)
bbox
.
isDifficult
=
false
;
else
bbox
.
isDifficult
=
true
;
bboxVec
[
outOffset
+
i
]
=
bbox
;
}
}
void
getBBoxFromDetectData
(
const
real
*
detectData
,
const
size_t
numBBoxes
,
vector
<
real
>&
labelVec
,
vector
<
real
>&
scoreVec
,
vector
<
NormalizedBBox
>&
bboxVec
)
{
size_t
outOffset
=
bboxVec
.
size
();
labelVec
.
resize
(
outOffset
+
numBBoxes
);
scoreVec
.
resize
(
outOffset
+
numBBoxes
);
bboxVec
.
resize
(
outOffset
+
numBBoxes
);
for
(
size_t
i
=
0
;
i
<
numBBoxes
;
++
i
)
{
labelVec
[
outOffset
+
i
]
=
*
(
detectData
+
i
*
7
+
1
);
scoreVec
[
outOffset
+
i
]
=
*
(
detectData
+
i
*
7
+
2
);
NormalizedBBox
bbox
;
bbox
.
xMin
=
*
(
detectData
+
i
*
7
+
3
);
bbox
.
yMin
=
*
(
detectData
+
i
*
7
+
4
);
bbox
.
xMax
=
*
(
detectData
+
i
*
7
+
5
);
bbox
.
yMax
=
*
(
detectData
+
i
*
7
+
6
);
bboxVec
[
outOffset
+
i
]
=
bbox
;
}
}
void
matchBBox
(
const
vector
<
NormalizedBBox
>&
priorBBoxes
,
const
vector
<
NormalizedBBox
>&
gtBBoxes
,
real
overlapThreshold
,
vector
<
int
>*
matchIndices
,
vector
<
real
>*
matchOverlaps
)
{
map
<
size_t
,
map
<
size_t
,
real
>>
overlaps
;
size_t
numPriors
=
priorBBoxes
.
size
();
size_t
numGTs
=
gtBBoxes
.
size
();
matchIndices
->
clear
();
matchIndices
->
resize
(
numPriors
,
-
1
);
matchOverlaps
->
clear
();
matchOverlaps
->
resize
(
numPriors
,
0.0
);
// Store the positive overlap between predictions and ground truth
for
(
size_t
i
=
0
;
i
<
numPriors
;
++
i
)
{
for
(
size_t
j
=
0
;
j
<
numGTs
;
++
j
)
{
real
overlap
=
jaccardOverlap
(
priorBBoxes
[
i
],
gtBBoxes
[
j
]);
if
(
overlap
>
1e-6
)
{
(
*
matchOverlaps
)[
i
]
=
std
::
max
((
*
matchOverlaps
)[
i
],
overlap
);
overlaps
[
i
][
j
]
=
overlap
;
}
}
}
// Bipartite matching
vector
<
int
>
gtPool
;
for
(
size_t
i
=
0
;
i
<
numGTs
;
++
i
)
{
gtPool
.
push_back
(
i
);
}
while
(
gtPool
.
size
()
>
0
)
{
// Find the most overlapped gt and corresponding predictions
int
maxPriorIdx
=
-
1
;
int
maxGTIdx
=
-
1
;
real
maxOverlap
=
-
1.0
;
for
(
map
<
size_t
,
map
<
size_t
,
real
>>::
iterator
it
=
overlaps
.
begin
();
it
!=
overlaps
.
end
();
++
it
)
{
size_t
i
=
it
->
first
;
if
((
*
matchIndices
)[
i
]
!=
-
1
)
{
// The prediction already has matched ground truth or is ignored
continue
;
}
for
(
size_t
p
=
0
;
p
<
gtPool
.
size
();
++
p
)
{
int
j
=
gtPool
[
p
];
if
(
it
->
second
.
find
(
j
)
==
it
->
second
.
end
())
{
// No overlap between the i-th prediction and j-th ground truth
continue
;
}
// Find the maximum overlapped pair
if
(
it
->
second
[
j
]
>
maxOverlap
)
{
maxPriorIdx
=
(
int
)
i
;
maxGTIdx
=
(
int
)
j
;
maxOverlap
=
it
->
second
[
j
];
}
}
}
if
(
maxPriorIdx
==
-
1
)
{
break
;
}
else
{
(
*
matchIndices
)[
maxPriorIdx
]
=
maxGTIdx
;
(
*
matchOverlaps
)[
maxPriorIdx
]
=
maxOverlap
;
gtPool
.
erase
(
std
::
find
(
gtPool
.
begin
(),
gtPool
.
end
(),
maxGTIdx
));
}
}
// Get most overlaped for the rest prediction bboxes
for
(
map
<
size_t
,
map
<
size_t
,
real
>>::
iterator
it
=
overlaps
.
begin
();
it
!=
overlaps
.
end
();
++
it
)
{
size_t
i
=
it
->
first
;
if
((
*
matchIndices
)[
i
]
!=
-
1
)
{
// The prediction already has matched ground truth or is ignored
continue
;
}
int
maxGTIdx
=
-
1
;
real
maxOverlap
=
-
1
;
for
(
size_t
j
=
0
;
j
<
numGTs
;
++
j
)
{
if
(
it
->
second
.
find
(
j
)
==
it
->
second
.
end
())
{
// No overlap between the i-th prediction and j-th ground truth
continue
;
}
// Find the maximum overlapped pair
real
overlap
=
it
->
second
[
j
];
if
(
overlap
>
maxOverlap
&&
overlap
>=
overlapThreshold
)
{
maxGTIdx
=
j
;
maxOverlap
=
overlap
;
}
}
if
(
maxGTIdx
!=
-
1
)
{
(
*
matchIndices
)[
i
]
=
maxGTIdx
;
(
*
matchOverlaps
)[
i
]
=
maxOverlap
;
}
}
}
pair
<
size_t
,
size_t
>
generateMatchIndices
(
const
Matrix
&
priorValue
,
const
size_t
numPriorBBoxes
,
const
Matrix
&
gtValue
,
const
int
*
gtStartPosPtr
,
const
size_t
seqNum
,
const
vector
<
vector
<
real
>>&
maxConfScore
,
const
size_t
batchSize
,
const
real
overlapThreshold
,
const
real
negOverlapThreshold
,
const
size_t
negPosRatio
,
vector
<
vector
<
int
>>*
matchIndicesVecPtr
,
vector
<
vector
<
int
>>*
negIndicesVecPtr
)
{
vector
<
NormalizedBBox
>
priorBBoxes
;
// share same prior bboxes
getBBoxFromPriorData
(
priorValue
.
getData
(),
numPriorBBoxes
,
priorBBoxes
);
size_t
totalPos
=
0
;
size_t
totalNeg
=
0
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
vector
<
int
>
matchIndices
;
vector
<
int
>
negIndices
;
vector
<
real
>
matchOverlaps
;
matchIndices
.
resize
(
numPriorBBoxes
,
-
1
);
matchOverlaps
.
resize
(
numPriorBBoxes
,
0.0
);
size_t
numGTBBoxes
=
0
;
if
(
n
<
seqNum
)
numGTBBoxes
=
gtStartPosPtr
[
n
+
1
]
-
gtStartPosPtr
[
n
];
if
(
!
numGTBBoxes
)
{
matchIndicesVecPtr
->
push_back
(
matchIndices
);
negIndicesVecPtr
->
push_back
(
negIndices
);
continue
;
}
vector
<
NormalizedBBox
>
gtBBoxes
;
getBBoxFromLabelData
(
gtValue
.
getData
()
+
gtStartPosPtr
[
n
]
*
6
,
numGTBBoxes
,
gtBBoxes
);
matchBBox
(
priorBBoxes
,
gtBBoxes
,
overlapThreshold
,
&
matchIndices
,
&
matchOverlaps
);
size_t
numPos
=
0
;
size_t
numNeg
=
0
;
for
(
size_t
i
=
0
;
i
<
matchIndices
.
size
();
++
i
)
if
(
matchIndices
[
i
]
!=
-
1
)
++
numPos
;
totalPos
+=
numPos
;
vector
<
pair
<
real
,
size_t
>>
scoresIndices
;
for
(
size_t
i
=
0
;
i
<
matchIndices
.
size
();
++
i
)
if
(
matchIndices
[
i
]
==
-
1
&&
matchOverlaps
[
i
]
<
negOverlapThreshold
)
{
scoresIndices
.
push_back
(
std
::
make_pair
(
maxConfScore
[
n
][
i
],
i
));
++
numNeg
;
}
numNeg
=
std
::
min
(
static_cast
<
size_t
>
(
numPos
*
negPosRatio
),
numNeg
);
std
::
sort
(
scoresIndices
.
begin
(),
scoresIndices
.
end
(),
sortScorePairDescend
<
size_t
>
);
for
(
size_t
i
=
0
;
i
<
numNeg
;
++
i
)
negIndices
.
push_back
(
scoresIndices
[
i
].
second
);
totalNeg
+=
numNeg
;
matchIndicesVecPtr
->
push_back
(
matchIndices
);
negIndicesVecPtr
->
push_back
(
negIndices
);
}
return
std
::
make_pair
(
totalPos
,
totalNeg
);
}
void
getMaxConfidenceScores
(
const
real
*
confData
,
const
size_t
batchSize
,
const
size_t
numPriorBBoxes
,
const
size_t
numClasses
,
const
size_t
backgroundId
,
vector
<
vector
<
real
>>*
maxConfScoreVecPtr
)
{
maxConfScoreVecPtr
->
clear
();
for
(
size_t
i
=
0
;
i
<
batchSize
;
++
i
)
{
vector
<
real
>
maxConfScore
;
for
(
size_t
j
=
0
;
j
<
numPriorBBoxes
;
++
j
)
{
int
offset
=
j
*
numClasses
;
real
maxVal
=
-
FLT_MAX
;
real
maxPosVal
=
-
FLT_MAX
;
real
maxScore
=
0.0
;
for
(
size_t
c
=
0
;
c
<
numClasses
;
++
c
)
{
maxVal
=
std
::
max
<
real
>
(
confData
[
offset
+
c
],
maxVal
);
if
(
c
!=
backgroundId
)
maxPosVal
=
std
::
max
<
real
>
(
confData
[
offset
+
c
],
maxPosVal
);
}
real
sum
=
0.0
;
for
(
size_t
c
=
0
;
c
<
numClasses
;
++
c
)
sum
+=
std
::
exp
(
confData
[
offset
+
c
]
-
maxVal
);
maxScore
=
std
::
exp
(
maxPosVal
-
maxVal
)
/
sum
;
maxConfScore
.
push_back
(
maxScore
);
}
confData
+=
numPriorBBoxes
*
numClasses
;
maxConfScoreVecPtr
->
push_back
(
maxConfScore
);
}
}
template
<
typename
T
>
bool
sortScorePairDescend
(
const
pair
<
real
,
T
>&
pair1
,
const
pair
<
real
,
T
>&
pair2
)
{
return
pair1
.
first
>
pair2
.
first
;
}
template
<
>
bool
sortScorePairDescend
(
const
pair
<
real
,
NormalizedBBox
>&
pair1
,
const
pair
<
real
,
NormalizedBBox
>&
pair2
)
{
return
pair1
.
first
>
pair2
.
first
;
}
void
applyNMSFast
(
const
vector
<
NormalizedBBox
>&
bboxes
,
const
real
*
confScoreData
,
size_t
classIdx
,
size_t
topK
,
real
confThreshold
,
real
nmsThreshold
,
size_t
numPriorBBoxes
,
size_t
numClasses
,
vector
<
size_t
>*
indices
)
{
vector
<
pair
<
real
,
size_t
>>
scores
;
for
(
size_t
i
=
0
;
i
<
numPriorBBoxes
;
++
i
)
{
size_t
confOffset
=
i
*
numClasses
+
classIdx
;
if
(
confScoreData
[
confOffset
]
>
confThreshold
)
scores
.
push_back
(
std
::
make_pair
(
confScoreData
[
confOffset
],
i
));
}
std
::
stable_sort
(
scores
.
begin
(),
scores
.
end
(),
sortScorePairDescend
<
size_t
>
);
if
(
topK
>
0
&&
topK
<
scores
.
size
())
scores
.
resize
(
topK
);
while
(
scores
.
size
()
>
0
)
{
const
size_t
idx
=
scores
.
front
().
second
;
bool
keep
=
true
;
for
(
size_t
i
=
0
;
i
<
indices
->
size
();
++
i
)
{
if
(
keep
)
{
const
size_t
savedIdx
=
(
*
indices
)[
i
];
real
overlap
=
jaccardOverlap
(
bboxes
[
idx
],
bboxes
[
savedIdx
]);
keep
=
overlap
<=
nmsThreshold
;
}
else
{
break
;
}
}
if
(
keep
)
indices
->
push_back
(
idx
);
scores
.
erase
(
scores
.
begin
());
}
}
size_t
getDetectionIndices
(
const
real
*
confData
,
const
size_t
numPriorBBoxes
,
const
size_t
numClasses
,
const
size_t
backgroundId
,
const
size_t
batchSize
,
const
size_t
confThreshold
,
const
size_t
nmsTopK
,
const
real
nmsThreshold
,
const
size_t
keepTopK
,
const
vector
<
vector
<
NormalizedBBox
>>&
allDecodedBBoxes
,
vector
<
map
<
size_t
,
vector
<
size_t
>>>*
allDetectionIndices
)
{
size_t
totalKeepNum
=
0
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
const
vector
<
NormalizedBBox
>&
decodedBBoxes
=
allDecodedBBoxes
[
n
];
size_t
numDetected
=
0
;
map
<
size_t
,
vector
<
size_t
>>
indices
;
size_t
confOffset
=
n
*
numPriorBBoxes
*
numClasses
;
for
(
size_t
c
=
0
;
c
<
numClasses
;
++
c
)
{
if
(
c
==
backgroundId
)
continue
;
applyNMSFast
(
decodedBBoxes
,
confData
+
confOffset
,
c
,
nmsTopK
,
confThreshold
,
nmsThreshold
,
numPriorBBoxes
,
numClasses
,
&
(
indices
[
c
]));
numDetected
+=
indices
[
c
].
size
();
}
if
(
keepTopK
>
0
&&
numDetected
>
keepTopK
)
{
vector
<
pair
<
real
,
pair
<
size_t
,
size_t
>>>
scoreIndexPairs
;
for
(
size_t
c
=
0
;
c
<
numClasses
;
++
c
)
{
const
vector
<
size_t
>&
labelIndices
=
indices
[
c
];
for
(
size_t
i
=
0
;
i
<
labelIndices
.
size
();
++
i
)
{
size_t
idx
=
labelIndices
[
i
];
scoreIndexPairs
.
push_back
(
std
::
make_pair
((
confData
+
confOffset
)[
idx
*
numClasses
+
c
],
std
::
make_pair
(
c
,
idx
)));
}
}
std
::
sort
(
scoreIndexPairs
.
begin
(),
scoreIndexPairs
.
end
(),
sortScorePairDescend
<
pair
<
size_t
,
size_t
>>
);
scoreIndexPairs
.
resize
(
keepTopK
);
map
<
size_t
,
vector
<
size_t
>>
newIndices
;
for
(
size_t
i
=
0
;
i
<
scoreIndexPairs
.
size
();
++
i
)
{
size_t
label
=
scoreIndexPairs
[
i
].
second
.
first
;
size_t
idx
=
scoreIndexPairs
[
i
].
second
.
second
;
newIndices
[
label
].
push_back
(
idx
);
}
allDetectionIndices
->
push_back
(
newIndices
);
totalKeepNum
+=
keepTopK
;
}
else
{
allDetectionIndices
->
push_back
(
indices
);
totalKeepNum
+=
numDetected
;
}
}
return
totalKeepNum
;
}
void
getDetectionOutput
(
const
real
*
confData
,
const
size_t
numKept
,
const
size_t
numPriorBBoxes
,
const
size_t
numClasses
,
const
size_t
batchSize
,
const
vector
<
map
<
size_t
,
vector
<
size_t
>>>&
allIndices
,
const
vector
<
vector
<
NormalizedBBox
>>&
allDecodedBBoxes
,
Matrix
&
out
)
{
MatrixPtr
outBuffer
;
Matrix
::
resizeOrCreate
(
outBuffer
,
numKept
,
7
,
false
,
false
);
real
*
bufferData
=
outBuffer
->
getData
();
size_t
count
=
0
;
for
(
size_t
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
map
<
size_t
,
vector
<
size_t
>>::
const_iterator
it
=
allIndices
[
n
].
begin
();
it
!=
allIndices
[
n
].
end
();
++
it
)
{
size_t
label
=
it
->
first
;
const
vector
<
size_t
>&
indices
=
it
->
second
;
const
vector
<
NormalizedBBox
>&
decodedBBoxes
=
allDecodedBBoxes
[
n
];
for
(
size_t
i
=
0
;
i
<
indices
.
size
();
++
i
)
{
size_t
idx
=
indices
[
i
];
size_t
confOffset
=
n
*
numPriorBBoxes
*
numClasses
+
idx
*
numClasses
;
bufferData
[
count
*
7
]
=
n
;
bufferData
[
count
*
7
+
1
]
=
label
;
bufferData
[
count
*
7
+
2
]
=
(
confData
+
confOffset
)[
label
];
NormalizedBBox
clippedBBox
=
clipBBox
(
decodedBBoxes
[
idx
]);
bufferData
[
count
*
7
+
3
]
=
clippedBBox
.
xMin
;
bufferData
[
count
*
7
+
4
]
=
clippedBBox
.
yMin
;
bufferData
[
count
*
7
+
5
]
=
clippedBBox
.
xMax
;
bufferData
[
count
*
7
+
6
]
=
clippedBBox
.
yMax
;
++
count
;
}
}
}
out
.
copyFrom
(
bufferData
,
numKept
*
7
);
}
NormalizedBBox
clipBBox
(
const
NormalizedBBox
&
bbox
)
{
real
realOne
=
static_cast
<
real
>
(
1.0
);
real
realZero
=
static_cast
<
real
>
(
0.0
);
NormalizedBBox
clippedBBox
;
clippedBBox
.
xMin
=
std
::
max
(
std
::
min
(
bbox
.
xMin
,
realOne
),
realZero
);
clippedBBox
.
yMin
=
std
::
max
(
std
::
min
(
bbox
.
yMin
,
realOne
),
realZero
);
clippedBBox
.
xMax
=
std
::
max
(
std
::
min
(
bbox
.
xMax
,
realOne
),
realZero
);
clippedBBox
.
yMax
=
std
::
max
(
std
::
min
(
bbox
.
yMax
,
realOne
),
realZero
);
return
clippedBBox
;
}
}
// namespace paddle
paddle/gserver/layers/DetectionUtil.h
0 → 100644
浏览文件 @
a3ada68a
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <float.h>
#include <algorithm>
#include <vector>
#include "paddle/math/Matrix.h"
using
std
::
vector
;
using
std
::
pair
;
using
std
::
map
;
namespace
paddle
{
template
<
typename
T
>
struct
BBoxBase
{
BBoxBase
(
T
xMin
,
T
yMin
,
T
xMax
,
T
yMax
)
:
xMin
(
xMin
),
yMin
(
yMin
),
xMax
(
xMax
),
yMax
(
yMax
),
isDifficult
(
false
)
{}
BBoxBase
()
{}
T
getWidth
()
const
{
return
xMax
-
xMin
;
}
T
getHeight
()
const
{
return
yMax
-
yMin
;
}
T
getCenterX
()
const
{
return
(
xMin
+
xMax
)
/
2
;
}
T
getCenterY
()
const
{
return
(
yMin
+
yMax
)
/
2
;
}
T
getArea
()
const
{
return
getWidth
()
*
getHeight
();
}
// coordinate of bounding box
T
xMin
;
T
yMin
;
T
xMax
;
T
yMax
;
// whether difficult object (e.g. object with heavy occlusion is difficult)
bool
isDifficult
;
};
struct
NormalizedBBox
:
BBoxBase
<
real
>
{
NormalizedBBox
()
:
BBoxBase
<
real
>
()
{}
};
enum
PermMode
{
kNCHWToNHWC
,
kNHWCToNCHW
};
/**
* @brief First permute input maxtrix then append to output matrix
*/
size_t
appendWithPermute
(
const
Matrix
&
inMatrix
,
size_t
height
,
size_t
width
,
size_t
outTotalSize
,
size_t
outOffset
,
size_t
batchSize
,
Matrix
&
outMatrix
,
PermMode
permMode
);
/**
* @brief First permute input maxtrix then decompose to output
*/
size_t
decomposeWithPermute
(
const
Matrix
&
inMatrix
,
size_t
height
,
size_t
width
,
size_t
totalSize
,
size_t
offset
,
size_t
batchSize
,
Matrix
&
outMatrix
,
PermMode
permMode
);
/**
* @brief Compute jaccard overlap between two bboxes.
* @param bbox1 The first bbox
* @param bbox2 The second bbox
*/
real
jaccardOverlap
(
const
NormalizedBBox
&
bbox1
,
const
NormalizedBBox
&
bbox2
);
/**
* @brief Compute offset parameters between prior bbox and ground truth bbox
* and variances of prior bbox are considered
* @param priorBBox Input prior bbox
* @param priorBBoxVar Variance parameters of prior bbox
* @param gtBBox Groundtruth bbox
* @param outVec Output vector
*/
void
encodeBBoxWithVar
(
const
NormalizedBBox
&
priorBBox
,
const
vector
<
real
>&
priorBBoxVar
,
const
NormalizedBBox
&
gtBBox
,
vector
<
real
>&
outVec
);
/**
* @brief Decode prior bbox with offset parameters
* and variances of prior bbox are considered
* @param priorBBox Prior bbox to be decoded
* @param priorBBoxVar Variance parameters of prior bbox
* @param locPredData Offset parameters
*/
NormalizedBBox
decodeBBoxWithVar
(
const
NormalizedBBox
&
priorBBox
,
const
vector
<
real
>&
priorBBoxVar
,
const
vector
<
real
>&
locPredData
);
/**
* @brief Extract bboxes from prior matrix, the layout is
* xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ...
* @param priorData Matrix of prior value
* @param numBBoxes Number of bbox to be extracted
* @param bboxVec Append to the vector
*/
void
getBBoxFromPriorData
(
const
real
*
priorData
,
const
size_t
numBBoxes
,
vector
<
NormalizedBBox
>&
bboxVec
);
/**
* @brief Extract labels, scores and bboxes from detection matrix, the layout is
* imageId | label | score | xmin | ymin | xmax | ymax
* @param detectData Matrix of detection value
* @param numBBoxes Number of bbox to be extracted
* @param labelVec Label of bbox
* @param scoreVec Score of bbox
* @param bboxVec Append to the vector
*/
void
getBBoxFromDetectData
(
const
real
*
detectData
,
const
size_t
numBBoxes
,
vector
<
real
>&
labelVec
,
vector
<
real
>&
scoreVec
,
vector
<
NormalizedBBox
>&
bboxVec
);
/**
* @brief Extract variances from prior matrix, the layout is
* xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var ...
* @param priorData Matrix of prior value
* @param num Number to be extracted
* @param varVec Append to the vector
*/
void
getBBoxVarFromPriorData
(
const
real
*
priorData
,
const
size_t
num
,
vector
<
vector
<
real
>>&
varVec
);
/**
* @brief Extract bboxes from label matrix, the layout is
* class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ...
* @param labelData Matrix of label value
* @param numBBoxes Number to be extracted
* @param bboxVec Append to the vector
*/
void
getBBoxFromLabelData
(
const
real
*
labelData
,
const
size_t
numBBoxes
,
vector
<
NormalizedBBox
>&
bboxVec
);
/**
* @brief Match prior bbox to groundtruth bbox, the strategy is:
1. Find the most overlaped bbox pair (prior and groundtruth)
2. For rest of prior bboxes find the most overlaped groundtruth bbox
* @param priorBBoxes prior bbox
* @param gtBBoxes groundtruth bbox
* @param overlapThreshold Low boundary of overlap (judge whether matched)
* @param matchIndices For each prior bbox, groundtruth bbox index if matched
otherwise -1
* @param matchOverlaps For each prior bbox, overap with all groundtruth bboxes
*/
void
matchBBox
(
const
vector
<
NormalizedBBox
>&
priorBBoxes
,
const
vector
<
NormalizedBBox
>&
gtBBoxes
,
real
overlapThreshold
,
vector
<
int
>*
matchIndices
,
vector
<
real
>*
matchOverlaps
);
/**
* @brief Generate positive bboxes and negative bboxes,
|positive bboxes|/|negative bboxes| is negPosRatio
* @param priorValue Prior value
* @param numPriorBBoxes Number of prior bbox
* @param gtValue Groundtruth value
* @param gtStartPosPtr Since groundtruth value stored as sequence type,
this parameter indicates start position of each record
* @param seqNum Number of sequence
* @param maxConfScore Classification score for prior bbox, used to mine
negative examples
* @param batchSize Image number
* @param overlapThreshold Low boundary of overap
* @param negOverlapThreshold Upper boundary of overap (judge negative example)
* @param negPosRatio Control number of negative bboxes
* @param matchIndicesVecPtr Save indices of matched prior bbox
* @param negIndicesVecPtr Save indices of negative prior bbox
*/
pair
<
size_t
,
size_t
>
generateMatchIndices
(
const
Matrix
&
priorValue
,
const
size_t
numPriorBBoxes
,
const
Matrix
&
gtValue
,
const
int
*
gtStartPosPtr
,
const
size_t
seqNum
,
const
vector
<
vector
<
real
>>&
maxConfScore
,
const
size_t
batchSize
,
const
real
overlapThreshold
,
const
real
negOverlapThreshold
,
const
size_t
negPosRatio
,
vector
<
vector
<
int
>>*
matchIndicesVecPtr
,
vector
<
vector
<
int
>>*
negIndicesVecPtr
);
/**
* @brief Get max confidence score for each prior bbox
* @param confData Confidence scores, layout is
* class1 score | class2 score | ... | classN score ...
* @param batchSize Image number
* @param numPriorBBoxes Prior bbox number
* @param numClasses Classes number
* @param backgroundId Background id
* @param maxConfScoreVecPtr Ouput
*/
void
getMaxConfidenceScores
(
const
real
*
confData
,
const
size_t
batchSize
,
const
size_t
numPriorBBoxes
,
const
size_t
numClasses
,
const
size_t
backgroundId
,
vector
<
vector
<
real
>>*
maxConfScoreVecPtr
);
template
<
typename
T
>
bool
sortScorePairDescend
(
const
pair
<
real
,
T
>&
pair1
,
const
pair
<
real
,
T
>&
pair2
);
template
<
>
bool
sortScorePairDescend
(
const
pair
<
real
,
NormalizedBBox
>&
pair1
,
const
pair
<
real
,
NormalizedBBox
>&
pair2
);
/**
* @brief Do NMS for bboxes to remove duplicated bboxes
* @param bboxes BBoxes to apply NMS
* @param confScoreData Confidence scores
* @param classIdx Class to do NMS
* @param topK Number to keep
* @param confThreshold Low boundary of confidence score
* @param nmsThreshold Threshold of overlap
* @param numPriorBBoxes Total number of prior bboxes
* @param numClasses Total class number
* @param indices Indices of high quality bboxes
*/
void
applyNMSFast
(
const
vector
<
NormalizedBBox
>&
bboxes
,
const
real
*
confScoreData
,
size_t
classIdx
,
size_t
topK
,
real
confThreshold
,
real
nmsThreshold
,
size_t
numPriorBBoxes
,
size_t
numClasses
,
vector
<
size_t
>*
indices
);
/**
* @brief Get detection results which satify requirements
* @param numPriorBBoxes Prior bbox number
* @param numClasses Class number
* @param backgroundId Background class
* @param batchSize Image number
* @param confThreshold Threshold of class confidence
* @param nmsTopK Used in NMS operation to keep top k bbox
* @param nmsThreshold Used in NMS, threshold of overlap
* @param keepTopK How many bboxes keeped in an image
* @param allDecodedBBoxes Decoded bboxes for all images
* @param allDetectionIndices Save detection bbox indices
*/
size_t
getDetectionIndices
(
const
real
*
confData
,
const
size_t
numPriorBBoxes
,
const
size_t
numClasses
,
const
size_t
backgroundId
,
const
size_t
batchSize
,
const
size_t
confThreshold
,
const
size_t
nmsTopK
,
const
real
nmsThreshold
,
const
size_t
keepTopK
,
const
vector
<
vector
<
NormalizedBBox
>>&
allDecodedBBoxes
,
vector
<
map
<
size_t
,
vector
<
size_t
>>>*
allDetectionIndices
);
/**
* @brief Get detection results
* @param confData Confidence scores
* @param numPriorBBoxes Prior bbox number
* @param numClasses Class number
* @param batchSize Image number
* @param allIndices Indices of predicted bboxes
* @param allDecodedBBoxes BBoxes decoded
* @param out Output matrix
* image number | label | confidence score | xMin | yMin | xMax | yMax
*/
void
getDetectionOutput
(
const
real
*
confData
,
const
size_t
numKept
,
const
size_t
numPriorBBoxes
,
const
size_t
numClasses
,
const
size_t
batchSize
,
const
vector
<
map
<
size_t
,
vector
<
size_t
>>>&
allIndices
,
const
vector
<
vector
<
NormalizedBBox
>>&
allDecodedBBoxes
,
Matrix
&
out
);
NormalizedBBox
clipBBox
(
const
NormalizedBBox
&
bbox
);
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录