Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e7e4f084
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e7e4f084
编写于
12月 20, 2018
作者:
D
dengkaipeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ignore pred overlap gt > 0.7. test=develop
上级
bd6deb1a
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
668 addition
and
125 deletion
+668
-125
paddle/fluid/operators/yolov3_loss_op.cc
paddle/fluid/operators/yolov3_loss_op.cc
+25
-10
paddle/fluid/operators/yolov3_loss_op.h
paddle/fluid/operators/yolov3_loss_op.h
+474
-82
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+10
-4
python/paddle/fluid/tests/test_detection.py
python/paddle/fluid/tests/test_detection.py
+2
-2
python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
+157
-27
未找到文件。
paddle/fluid/operators/yolov3_loss_op.cc
浏览文件 @
e7e4f084
...
...
@@ -35,13 +35,16 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
auto
dim_gtlabel
=
ctx
->
GetInputDim
(
"GTLabel"
);
auto
anchors
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"anchors"
);
int
anchor_num
=
anchors
.
size
()
/
2
;
auto
anchor_mask
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"anchor_mask"
);
int
mask_num
=
anchor_mask
.
size
();
auto
class_num
=
ctx
->
Attrs
().
Get
<
int
>
(
"class_num"
);
PADDLE_ENFORCE_EQ
(
dim_x
.
size
(),
4
,
"Input(X) should be a 4-D tensor."
);
PADDLE_ENFORCE_EQ
(
dim_x
[
2
],
dim_x
[
3
],
"Input(X) dim[3] and dim[4] should be euqal."
);
PADDLE_ENFORCE_EQ
(
dim_x
[
1
],
anchor_num
*
(
5
+
class_num
),
"Input(X) dim[1] should be equal to (anchor_number * (5 "
"+ class_num))."
);
PADDLE_ENFORCE_EQ
(
dim_x
[
1
],
mask_num
*
(
5
+
class_num
),
"Input(X) dim[1] should be equal to (anchor_mask_number * (5 "
"+ class_num))."
);
PADDLE_ENFORCE_EQ
(
dim_gtbox
.
size
(),
3
,
"Input(GTBox) should be a 3-D tensor"
);
PADDLE_ENFORCE_EQ
(
dim_gtbox
[
2
],
4
,
"Input(GTBox) dim[2] should be 5"
);
...
...
@@ -55,6 +58,11 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
"Attr(anchors) length should be greater then 0."
);
PADDLE_ENFORCE_EQ
(
anchors
.
size
()
%
2
,
0
,
"Attr(anchors) length should be even integer."
);
for
(
size_t
i
=
0
;
i
<
anchor_mask
.
size
();
i
++
)
{
PADDLE_ENFORCE_LT
(
anchor_mask
[
i
],
anchor_num
,
"Attr(anchor_mask) should not crossover Attr(anchors)."
);
}
PADDLE_ENFORCE_GT
(
class_num
,
0
,
"Attr(class_num) should be an integer greater then 0."
);
...
...
@@ -74,7 +82,7 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"The input tensor of YOLO
v3 loss operator, "
"The input tensor of YOLOv3 loss operator, "
"This is a 4-D tensor with shape of [N, C, H, W]."
"H and W should be same, and the second dimention(C) stores"
"box locations, confidence score and classification one-hot"
...
...
@@ -99,13 +107,20 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
int
>
(
"class_num"
,
"The number of classes to predict."
);
AddAttr
<
std
::
vector
<
int
>>
(
"anchors"
,
"The anchor width and height, "
"it will be parsed pair by pair."
);
AddAttr
<
int
>
(
"input_size"
,
"The input size of YOLOv3 net, "
"generally this is set as 320, 416 or 608."
)
.
SetDefault
(
406
);
"it will be parsed pair by pair."
)
.
SetDefault
(
std
::
vector
<
int
>
{});
AddAttr
<
std
::
vector
<
int
>>
(
"anchor_mask"
,
"The mask index of anchors used in "
"current YOLOv3 loss calculation."
)
.
SetDefault
(
std
::
vector
<
int
>
{});
AddAttr
<
int
>
(
"downsample"
,
"The downsample ratio from network input to YOLOv3 loss "
"input, so 32, 16, 8 should be set for the first, second, "
"and thrid YOLOv3 loss operators."
)
.
SetDefault
(
32
);
AddAttr
<
float
>
(
"ignore_thresh"
,
"The ignore threshold to ignore confidence loss."
);
"The ignore threshold to ignore confidence loss."
)
.
SetDefault
(
0.7
);
AddComment
(
R"DOC(
This operator generate yolov3 loss by given predict result and ground
truth boxes.
...
...
paddle/fluid/operators/yolov3_loss_op.h
浏览文件 @
e7e4f084
...
...
@@ -321,6 +321,182 @@ static void CalcYolov3LossGrad(T* input_grad_data, const Tensor& loss_grad,
obj_mask_data
,
n
,
an_num
,
grid_num
,
class_num
,
class_num
);
}
static
int
mask_index
(
std
::
vector
<
int
>
mask
,
int
val
)
{
for
(
int
i
=
0
;
i
<
mask
.
size
();
i
++
)
{
if
(
mask
[
i
]
==
val
)
{
return
i
;
}
}
return
-
1
;
}
template
<
typename
T
>
struct
Box
{
float
x
,
y
,
w
,
h
;
};
template
<
typename
T
>
static
inline
T
sigmoid
(
T
x
)
{
return
1.0
/
(
1.0
+
std
::
exp
(
-
x
));
}
template
<
typename
T
>
static
inline
void
sigmoid_arrray
(
T
*
arr
,
int
len
)
{
for
(
int
i
=
0
;
i
<
len
;
i
++
)
{
arr
[
i
]
=
sigmoid
(
arr
[
i
]);
}
}
template
<
typename
T
>
static
inline
Box
<
T
>
get_yolo_box
(
const
T
*
x
,
std
::
vector
<
int
>
anchors
,
int
i
,
int
j
,
int
an_idx
,
int
grid_size
,
int
input_size
,
int
index
,
int
stride
)
{
Box
<
T
>
b
;
b
.
x
=
(
i
+
sigmoid
<
T
>
(
x
[
index
]))
/
grid_size
;
b
.
y
=
(
j
+
sigmoid
<
T
>
(
x
[
index
+
stride
]))
/
grid_size
;
b
.
w
=
std
::
exp
(
x
[
index
+
2
*
stride
])
*
anchors
[
2
*
an_idx
]
/
input_size
;
b
.
h
=
std
::
exp
(
x
[
index
+
3
*
stride
])
*
anchors
[
2
*
an_idx
+
1
]
/
input_size
;
return
b
;
}
template
<
typename
T
>
static
inline
Box
<
T
>
get_gt_box
(
const
T
*
gt
,
int
batch
,
int
max_boxes
,
int
idx
)
{
Box
<
T
>
b
;
b
.
x
=
gt
[(
batch
*
max_boxes
+
idx
)
*
4
];
b
.
y
=
gt
[(
batch
*
max_boxes
+
idx
)
*
4
+
1
];
b
.
w
=
gt
[(
batch
*
max_boxes
+
idx
)
*
4
+
2
];
b
.
h
=
gt
[(
batch
*
max_boxes
+
idx
)
*
4
+
3
];
return
b
;
}
template
<
typename
T
>
static
inline
T
overlap
(
T
c1
,
T
w1
,
T
c2
,
T
w2
)
{
T
l1
=
c1
-
w1
/
2.0
;
T
l2
=
c2
-
w2
/
2.0
;
T
left
=
l1
>
l2
?
l1
:
l2
;
T
r1
=
c1
+
w1
/
2.0
;
T
r2
=
c2
+
w2
/
2.0
;
T
right
=
r1
<
r2
?
r1
:
r2
;
return
right
-
left
;
}
template
<
typename
T
>
static
inline
T
box_iou
(
Box
<
T
>
b1
,
Box
<
T
>
b2
)
{
T
w
=
overlap
(
b1
.
x
,
b1
.
w
,
b2
.
x
,
b2
.
w
);
T
h
=
overlap
(
b1
.
y
,
b1
.
h
,
b2
.
y
,
b2
.
h
);
T
inter_area
=
(
w
<
0
||
h
<
0
)
?
0.0
:
w
*
h
;
T
union_area
=
b1
.
w
*
b1
.
h
+
b2
.
w
*
b2
.
h
-
inter_area
;
return
inter_area
/
union_area
;
}
static
inline
int
entry_index
(
int
batch
,
int
an_idx
,
int
hw_idx
,
int
an_num
,
int
an_stride
,
int
stride
,
int
entry
)
{
return
(
batch
*
an_num
+
an_idx
)
*
an_stride
+
entry
*
stride
+
hw_idx
;
}
template
<
typename
T
>
static
void
CalcBoxLocationLoss
(
T
*
loss
,
const
T
*
input
,
Box
<
T
>
gt
,
std
::
vector
<
int
>
anchors
,
int
an_idx
,
int
box_idx
,
int
gi
,
int
gj
,
int
grid_size
,
int
input_size
,
int
stride
)
{
T
tx
=
gt
.
x
*
grid_size
-
gi
;
T
ty
=
gt
.
y
*
grid_size
-
gj
;
T
tw
=
std
::
log
(
gt
.
w
*
input_size
/
anchors
[
2
*
an_idx
]);
T
th
=
std
::
log
(
gt
.
h
*
input_size
/
anchors
[
2
*
an_idx
+
1
]);
T
scale
=
2.0
-
gt
.
w
*
gt
.
h
;
loss
[
0
]
+=
SCE
<
T
>
(
input
[
box_idx
],
tx
)
*
scale
;
loss
[
0
]
+=
SCE
<
T
>
(
input
[
box_idx
+
stride
],
ty
)
*
scale
;
loss
[
0
]
+=
L1Loss
<
T
>
(
input
[
box_idx
+
2
*
stride
],
tw
)
*
scale
;
loss
[
0
]
+=
L1Loss
<
T
>
(
input
[
box_idx
+
3
*
stride
],
th
)
*
scale
;
}
template
<
typename
T
>
static
void
CalcBoxLocationLossGrad
(
T
*
input_grad
,
const
T
loss
,
const
T
*
input
,
Box
<
T
>
gt
,
std
::
vector
<
int
>
anchors
,
int
an_idx
,
int
box_idx
,
int
gi
,
int
gj
,
int
grid_size
,
int
input_size
,
int
stride
)
{
T
tx
=
gt
.
x
*
grid_size
-
gi
;
T
ty
=
gt
.
y
*
grid_size
-
gj
;
T
tw
=
std
::
log
(
gt
.
w
*
input_size
/
anchors
[
2
*
an_idx
]);
T
th
=
std
::
log
(
gt
.
h
*
input_size
/
anchors
[
2
*
an_idx
+
1
]);
T
scale
=
2.0
-
gt
.
w
*
gt
.
h
;
input_grad
[
box_idx
]
=
SCEGrad
<
T
>
(
input
[
box_idx
],
tx
)
*
scale
*
loss
;
input_grad
[
box_idx
+
stride
]
=
SCEGrad
<
T
>
(
input
[
box_idx
+
stride
],
ty
)
*
scale
*
loss
;
input_grad
[
box_idx
+
2
*
stride
]
=
L1LossGrad
<
T
>
(
input
[
box_idx
+
2
*
stride
],
tw
)
*
scale
*
loss
;
input_grad
[
box_idx
+
3
*
stride
]
=
L1LossGrad
<
T
>
(
input
[
box_idx
+
3
*
stride
],
th
)
*
scale
*
loss
;
}
template
<
typename
T
>
static
inline
void
CalcLabelLoss
(
T
*
loss
,
const
T
*
input
,
const
int
index
,
const
int
label
,
const
int
class_num
,
const
int
stride
)
{
for
(
int
i
=
0
;
i
<
class_num
;
i
++
)
{
loss
[
0
]
+=
SCE
<
T
>
(
input
[
index
+
i
*
stride
],
(
i
==
label
)
?
1.0
:
0.0
);
}
}
template
<
typename
T
>
static
inline
void
CalcLabelLossGrad
(
T
*
input_grad
,
const
T
loss
,
const
T
*
input
,
const
int
index
,
const
int
label
,
const
int
class_num
,
const
int
stride
)
{
for
(
int
i
=
0
;
i
<
class_num
;
i
++
)
{
input_grad
[
index
+
i
*
stride
]
=
SCEGrad
<
T
>
(
input
[
index
+
i
*
stride
],
(
i
==
label
)
?
1.0
:
0.0
)
*
loss
;
}
}
template
<
typename
T
>
static
inline
void
CalcObjnessLoss
(
T
*
loss
,
const
T
*
input
,
const
int
*
objness
,
const
int
n
,
const
int
an_num
,
const
int
h
,
const
int
w
,
const
int
stride
,
const
int
an_stride
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
an_num
;
j
++
)
{
for
(
int
k
=
0
;
k
<
h
;
k
++
)
{
for
(
int
l
=
0
;
l
<
w
;
l
++
)
{
int
obj
=
objness
[
k
*
w
+
l
];
if
(
obj
>=
0
)
{
loss
[
i
]
+=
SCE
<
T
>
(
input
[
k
*
w
+
l
],
static_cast
<
T
>
(
obj
));
}
}
}
objness
+=
stride
;
input
+=
an_stride
;
}
}
}
template
<
typename
T
>
static
inline
void
CalcObjnessLossGrad
(
T
*
input_grad
,
const
T
*
loss
,
const
T
*
input
,
const
int
*
objness
,
const
int
n
,
const
int
an_num
,
const
int
h
,
const
int
w
,
const
int
stride
,
const
int
an_stride
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
an_num
;
j
++
)
{
for
(
int
k
=
0
;
k
<
h
;
k
++
)
{
for
(
int
l
=
0
;
l
<
w
;
l
++
)
{
int
obj
=
objness
[
k
*
w
+
l
];
if
(
obj
>=
0
)
{
input_grad
[
k
*
w
+
l
]
=
SCEGrad
<
T
>
(
input
[
k
*
w
+
l
],
static_cast
<
T
>
(
obj
))
*
loss
[
i
];
}
}
}
objness
+=
stride
;
input
+=
an_stride
;
input_grad
+=
an_stride
;
}
}
}
template
<
typename
T
>
class
Yolov3LossKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -330,55 +506,158 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
auto
*
gt_label
=
ctx
.
Input
<
Tensor
>
(
"GTLabel"
);
auto
*
loss
=
ctx
.
Output
<
Tensor
>
(
"Loss"
);
auto
anchors
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"anchors"
);
auto
anchor_mask
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"anchor_mask"
);
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
int
input_size
=
ctx
.
Attr
<
int
>
(
"input_size"
);
float
ignore_thresh
=
ctx
.
Attr
<
float
>
(
"ignore_thresh"
);
int
downsample
=
ctx
.
Attr
<
int
>
(
"downsample"
);
const
int
n
=
input
->
dims
()[
0
];
const
int
h
=
input
->
dims
()[
2
];
const
int
w
=
input
->
dims
()[
3
];
const
int
an_num
=
anchors
.
size
()
/
2
;
const
int
mask_num
=
anchor_mask
.
size
();
const
int
b
=
gt_box
->
dims
()[
1
];
int
input_size
=
downsample
*
h
;
Tensor
conf_mask
,
obj_mask
;
Tensor
tx
,
ty
,
tw
,
th
,
tweight
,
tconf
,
tclass
;
conf_mask
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
obj_mask
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tx
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
ty
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tw
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
th
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tweight
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tconf
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tclass
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
,
class_num
},
ctx
.
GetPlace
());
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
constant
;
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
conf_mask
,
static_cast
<
T
>
(
1.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
obj_mask
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tx
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
ty
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tw
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
th
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tweight
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tconf
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tclass
,
static_cast
<
T
>
(
0.0
));
PreProcessGTBox
<
T
>
(
*
gt_box
,
*
gt_label
,
ignore_thresh
,
anchors
,
input_size
,
h
,
&
conf_mask
,
&
obj_mask
,
&
tx
,
&
ty
,
&
tw
,
&
th
,
&
tweight
,
&
tconf
,
&
tclass
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
gt_box_data
=
gt_box
->
data
<
T
>
();
const
int
*
gt_label_data
=
gt_label
->
data
<
int
>
();
T
*
loss_data
=
loss
->
mutable_data
<
T
>
({
n
},
ctx
.
GetPlace
());
memset
(
loss_data
,
0
,
n
*
sizeof
(
T
));
CalcYolov3Loss
<
T
>
(
loss_data
,
*
input
,
tx
,
ty
,
tw
,
th
,
tweight
,
tconf
,
tclass
,
conf_mask
,
obj_mask
);
memset
(
loss_data
,
0
,
n
*
sizeof
(
int
));
Tensor
objness
;
int
*
objness_data
=
objness
.
mutable_data
<
int
>
({
n
,
mask_num
,
h
,
w
},
ctx
.
GetPlace
());
memset
(
objness_data
,
0
,
objness
.
numel
()
*
sizeof
(
int
));
const
int
stride
=
h
*
w
;
const
int
an_stride
=
(
class_num
+
5
)
*
stride
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
mask_num
;
j
++
)
{
for
(
int
k
=
0
;
k
<
h
;
k
++
)
{
for
(
int
l
=
0
;
l
<
w
;
l
++
)
{
int
box_idx
=
entry_index
(
i
,
j
,
k
*
w
+
l
,
mask_num
,
an_stride
,
stride
,
0
);
Box
<
T
>
pred
=
get_yolo_box
(
input_data
,
anchors
,
l
,
k
,
anchor_mask
[
j
],
h
,
input_size
,
box_idx
,
stride
);
T
best_iou
=
0
;
// int best_t = 0;
for
(
int
t
=
0
;
t
<
b
;
t
++
)
{
if
(
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
])
&&
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
+
1
]))
{
continue
;
}
Box
<
T
>
gt
=
get_gt_box
(
gt_box_data
,
i
,
b
,
t
);
T
iou
=
box_iou
(
pred
,
gt
);
if
(
iou
>
best_iou
)
{
best_iou
=
iou
;
// best_t = t;
}
}
if
(
best_iou
>
ignore_thresh
)
{
int
obj_idx
=
(
i
*
mask_num
+
j
)
*
stride
+
k
*
w
+
l
;
objness_data
[
obj_idx
]
=
-
1
;
}
}
}
}
for
(
int
t
=
0
;
t
<
b
;
t
++
)
{
if
(
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
])
&&
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
+
1
]))
{
continue
;
}
Box
<
T
>
gt
=
get_gt_box
(
gt_box_data
,
i
,
b
,
t
);
int
gi
=
static_cast
<
int
>
(
gt
.
x
*
w
);
int
gj
=
static_cast
<
int
>
(
gt
.
y
*
h
);
Box
<
T
>
gt_shift
=
gt
;
gt_shift
.
x
=
0.0
;
gt_shift
.
y
=
0.0
;
T
best_iou
=
0.0
;
int
best_n
=
0
;
for
(
int
an_idx
=
0
;
an_idx
<
an_num
;
an_idx
++
)
{
Box
<
T
>
an_box
;
an_box
.
x
=
0.0
;
an_box
.
y
=
0.0
;
an_box
.
w
=
anchors
[
2
*
an_idx
]
/
static_cast
<
T
>
(
input_size
);
an_box
.
h
=
anchors
[
2
*
an_idx
+
1
]
/
static_cast
<
T
>
(
input_size
);
float
iou
=
box_iou
<
T
>
(
an_box
,
gt_shift
);
// TO DO: iou > 0.5 ?
if
(
iou
>
best_iou
)
{
best_iou
=
iou
;
best_n
=
an_idx
;
}
}
int
mask_idx
=
mask_index
(
anchor_mask
,
best_n
);
if
(
mask_idx
>=
0
)
{
int
box_idx
=
entry_index
(
i
,
mask_idx
,
gj
*
w
+
gi
,
mask_num
,
an_stride
,
stride
,
0
);
CalcBoxLocationLoss
<
T
>
(
loss_data
+
i
,
input_data
,
gt
,
anchors
,
best_n
,
box_idx
,
gi
,
gj
,
h
,
input_size
,
stride
);
int
obj_idx
=
(
i
*
mask_num
+
mask_idx
)
*
stride
+
gj
*
w
+
gi
;
objness_data
[
obj_idx
]
=
1
;
int
label
=
gt_label_data
[
i
*
b
+
t
];
int
label_idx
=
entry_index
(
i
,
mask_idx
,
gj
*
w
+
gi
,
mask_num
,
an_stride
,
stride
,
5
);
CalcLabelLoss
<
T
>
(
loss_data
+
i
,
input_data
,
label_idx
,
label
,
class_num
,
stride
);
}
}
}
CalcObjnessLoss
<
T
>
(
loss_data
,
input_data
+
4
*
stride
,
objness_data
,
n
,
mask_num
,
h
,
w
,
stride
,
an_stride
);
// Tensor conf_mask, obj_mask;
// Tensor tx, ty, tw, th, tweight, tconf, tclass;
// conf_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// th.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tweight.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
//
// math::SetConstant<platform::CPUDeviceContext, T> constant;
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &conf_mask, static_cast<T>(1.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &obj_mask, static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &tx,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &ty,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &tw,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &th,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &tweight, static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &tconf,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &tclass,
// static_cast<T>(0.0));
//
// PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors,
// input_size,
// h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th,
// &tweight,
// &tconf, &tclass);
//
// T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace());
// memset(loss_data, 0, n * sizeof(T));
// CalcYolov3Loss<T>(loss_data, *input, tx, ty, tw, th, tweight, tconf,
// tclass,
// conf_mask, obj_mask);
}
};
...
...
@@ -389,59 +668,172 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
gt_box
=
ctx
.
Input
<
Tensor
>
(
"GTBox"
);
auto
*
gt_label
=
ctx
.
Input
<
Tensor
>
(
"GTLabel"
);
auto
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
loss_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Loss"
));
auto
anchors
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"anchors"
);
auto
anchor_mask
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"anchor_mask"
);
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
float
ignore_thresh
=
ctx
.
Attr
<
float
>
(
"ignore_thresh"
);
auto
*
input_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
loss_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Loss"
));
int
input_size
=
ctx
.
Attr
<
int
>
(
"input_size"
);
int
downsample
=
ctx
.
Attr
<
int
>
(
"downsample"
);
const
int
n
=
input
->
dims
()[
0
];
const
int
c
=
input
->
dims
()[
1
];
const
int
h
=
input
->
dims
()[
2
];
const
int
w
=
input
->
dims
()[
3
];
const
int
an_num
=
anchors
.
size
()
/
2
;
Tensor
conf_mask
,
obj_mask
;
Tensor
tx
,
ty
,
tw
,
th
,
tweight
,
tconf
,
tclass
;
conf_mask
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
obj_mask
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tx
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
ty
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tw
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
th
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tweight
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tconf
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
},
ctx
.
GetPlace
());
tclass
.
mutable_data
<
T
>
({
n
,
an_num
,
h
,
w
,
class_num
},
ctx
.
GetPlace
());
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
constant
;
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
conf_mask
,
static_cast
<
T
>
(
1.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
obj_mask
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tx
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
ty
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tw
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
th
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tweight
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tconf
,
static_cast
<
T
>
(
0.0
));
constant
(
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
&
tclass
,
static_cast
<
T
>
(
0.0
));
PreProcessGTBox
<
T
>
(
*
gt_box
,
*
gt_label
,
ignore_thresh
,
anchors
,
input_size
,
h
,
&
conf_mask
,
&
obj_mask
,
&
tx
,
&
ty
,
&
tw
,
&
th
,
&
tweight
,
&
tconf
,
&
tclass
);
const
int
mask_num
=
anchor_mask
.
size
();
const
int
b
=
gt_box
->
dims
()[
1
];
int
input_size
=
downsample
*
h
;
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
gt_box_data
=
gt_box
->
data
<
T
>
();
const
int
*
gt_label_data
=
gt_label
->
data
<
int
>
();
const
T
*
loss_grad_data
=
loss_grad
->
data
<
T
>
();
T
*
input_grad_data
=
input_grad
->
mutable_data
<
T
>
({
n
,
c
,
h
,
w
},
ctx
.
GetPlace
());
CalcYolov3LossGrad
<
T
>
(
input_grad_data
,
*
loss_grad
,
*
input
,
tx
,
ty
,
tw
,
th
,
tweight
,
tconf
,
tclass
,
conf_mask
,
obj_mask
);
memset
(
input_grad_data
,
0
,
input_grad
->
numel
()
*
sizeof
(
T
));
Tensor
objness
;
int
*
objness_data
=
objness
.
mutable_data
<
int
>
({
n
,
mask_num
,
h
,
w
},
ctx
.
GetPlace
());
memset
(
objness_data
,
0
,
objness
.
numel
()
*
sizeof
(
int
));
const
int
stride
=
h
*
w
;
const
int
an_stride
=
(
class_num
+
5
)
*
stride
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
j
=
0
;
j
<
mask_num
;
j
++
)
{
for
(
int
k
=
0
;
k
<
h
;
k
++
)
{
for
(
int
l
=
0
;
l
<
w
;
l
++
)
{
int
box_idx
=
entry_index
(
i
,
j
,
k
*
w
+
l
,
mask_num
,
an_stride
,
stride
,
0
);
Box
<
T
>
pred
=
get_yolo_box
(
input_data
,
anchors
,
l
,
k
,
anchor_mask
[
j
],
h
,
input_size
,
box_idx
,
stride
);
T
best_iou
=
0
;
// int best_t = 0;
for
(
int
t
=
0
;
t
<
b
;
t
++
)
{
if
(
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
])
&&
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
+
1
]))
{
continue
;
}
Box
<
T
>
gt
=
get_gt_box
(
gt_box_data
,
i
,
b
,
t
);
T
iou
=
box_iou
(
pred
,
gt
);
if
(
iou
>
best_iou
)
{
best_iou
=
iou
;
// best_t = t;
}
}
if
(
best_iou
>
ignore_thresh
)
{
int
obj_idx
=
(
i
*
mask_num
+
j
)
*
stride
+
k
*
w
+
l
;
objness_data
[
obj_idx
]
=
-
1
;
}
}
}
}
for
(
int
t
=
0
;
t
<
b
;
t
++
)
{
if
(
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
])
&&
isZero
<
T
>
(
gt_box_data
[
i
*
b
*
4
+
t
*
4
+
1
]))
{
continue
;
}
Box
<
T
>
gt
=
get_gt_box
(
gt_box_data
,
i
,
b
,
t
);
int
gi
=
static_cast
<
int
>
(
gt
.
x
*
w
);
int
gj
=
static_cast
<
int
>
(
gt
.
y
*
h
);
Box
<
T
>
gt_shift
=
gt
;
gt_shift
.
x
=
0.0
;
gt_shift
.
y
=
0.0
;
T
best_iou
=
0.0
;
int
best_n
=
0
;
for
(
int
an_idx
=
0
;
an_idx
<
an_num
;
an_idx
++
)
{
Box
<
T
>
an_box
;
an_box
.
x
=
0.0
;
an_box
.
y
=
0.0
;
an_box
.
w
=
anchors
[
2
*
an_idx
]
/
static_cast
<
T
>
(
input_size
);
an_box
.
h
=
anchors
[
2
*
an_idx
+
1
]
/
static_cast
<
T
>
(
input_size
);
float
iou
=
box_iou
<
T
>
(
an_box
,
gt_shift
);
// TO DO: iou > 0.5 ?
if
(
iou
>
best_iou
)
{
best_iou
=
iou
;
best_n
=
an_idx
;
}
}
int
mask_idx
=
mask_index
(
anchor_mask
,
best_n
);
if
(
mask_idx
>=
0
)
{
int
box_idx
=
entry_index
(
i
,
mask_idx
,
gj
*
w
+
gi
,
mask_num
,
an_stride
,
stride
,
0
);
CalcBoxLocationLossGrad
<
T
>
(
input_grad_data
,
loss_grad_data
[
i
],
input_data
,
gt
,
anchors
,
best_n
,
box_idx
,
gi
,
gj
,
h
,
input_size
,
stride
);
int
obj_idx
=
(
i
*
mask_num
+
mask_idx
)
*
stride
+
gj
*
w
+
gi
;
objness_data
[
obj_idx
]
=
1
;
int
label
=
gt_label_data
[
i
*
b
+
t
];
int
label_idx
=
entry_index
(
i
,
mask_idx
,
gj
*
w
+
gi
,
mask_num
,
an_stride
,
stride
,
5
);
CalcLabelLossGrad
<
T
>
(
input_grad_data
,
loss_grad_data
[
i
],
input_data
,
label_idx
,
label
,
class_num
,
stride
);
}
}
}
CalcObjnessLossGrad
<
T
>
(
input_grad_data
+
4
*
stride
,
loss_grad_data
,
input_data
+
4
*
stride
,
objness_data
,
n
,
mask_num
,
h
,
w
,
stride
,
an_stride
);
// const int n = input->dims()[0];
// const int c = input->dims()[1];
// const int h = input->dims()[2];
// const int w = input->dims()[3];
// const int an_num = anchors.size() / 2;
//
// Tensor conf_mask, obj_mask;
// Tensor tx, ty, tw, th, tweight, tconf, tclass;
// conf_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// th.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tweight.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
// tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
//
// math::SetConstant<platform::CPUDeviceContext, T> constant;
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &conf_mask, static_cast<T>(1.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &obj_mask, static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &tx,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &ty,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &tw,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(), &th,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &tweight, static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &tconf,
// static_cast<T>(0.0));
// constant(ctx.template device_context<platform::CPUDeviceContext>(),
// &tclass,
// static_cast<T>(0.0));
//
// PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors,
// input_size,
// h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th,
// &tweight,
// &tconf, &tclass);
//
// T* input_grad_data =
// input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
// CalcYolov3LossGrad<T>(input_grad_data, *loss_grad, *input, tx, ty, tw,
// th,
// tweight, tconf, tclass, conf_mask, obj_mask);
}
};
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
e7e4f084
...
...
@@ -413,9 +413,10 @@ def yolov3_loss(x,
gtbox
,
gtlabel
,
anchors
,
anchor_mask
,
class_num
,
ignore_thresh
,
input_siz
e
,
downsampl
e
,
name
=
None
):
"""
${comment}
...
...
@@ -430,9 +431,10 @@ def yolov3_loss(x,
gtlabel (Variable): class id of ground truth boxes, shoud be ins shape
of [N, B].
anchors (list|tuple): ${anchors_comment}
anchor_mask (list|tuple): ${anchor_mask_comment}
class_num (int): ${class_num_comment}
ignore_thresh (float): ${ignore_thresh_comment}
input_size (int): ${input_siz
e_comment}
downsample (int): ${downsampl
e_comment}
name (string): the name of yolov3 loss
Returns:
...
...
@@ -452,7 +454,8 @@ def yolov3_loss(x,
x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32')
gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32')
anchors = [10, 13, 16, 30, 33, 23]
anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
anchors = [0, 1, 2]
loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80
anchors=anchors, ignore_thresh=0.5)
"""
...
...
@@ -466,6 +469,8 @@ def yolov3_loss(x,
raise
TypeError
(
"Input gtlabel of yolov3_loss must be Variable"
)
if
not
isinstance
(
anchors
,
list
)
and
not
isinstance
(
anchors
,
tuple
):
raise
TypeError
(
"Attr anchors of yolov3_loss must be list or tuple"
)
if
not
isinstance
(
anchor_mask
,
list
)
and
not
isinstance
(
anchor_mask
,
tuple
):
raise
TypeError
(
"Attr anchor_mask of yolov3_loss must be list or tuple"
)
if
not
isinstance
(
class_num
,
int
):
raise
TypeError
(
"Attr class_num of yolov3_loss must be an integer"
)
if
not
isinstance
(
ignore_thresh
,
float
):
...
...
@@ -480,9 +485,10 @@ def yolov3_loss(x,
attrs
=
{
"anchors"
:
anchors
,
"anchor_mask"
:
anchor_mask
,
"class_num"
:
class_num
,
"ignore_thresh"
:
ignore_thresh
,
"
input_size"
:
input_siz
e
,
"
downsample"
:
downsampl
e
,
}
helper
.
append_op
(
...
...
python/paddle/fluid/tests/test_detection.py
浏览文件 @
e7e4f084
...
...
@@ -463,8 +463,8 @@ class TestYoloDetection(unittest.TestCase):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
30
,
7
,
7
],
dtype
=
'float32'
)
gtbox
=
layers
.
data
(
name
=
'gtbox'
,
shape
=
[
10
,
4
],
dtype
=
'float32'
)
gtlabel
=
layers
.
data
(
name
=
'gtlabel'
,
shape
=
[
10
],
dtype
=
'int32'
)
loss
=
layers
.
yolov3_loss
(
x
,
gtbox
,
gtlabel
,
[
10
,
13
,
30
,
13
],
10
,
0.7
,
416
)
loss
=
layers
.
yolov3_loss
(
x
,
gtbox
,
gtlabel
,
[
10
,
13
,
30
,
13
],
[
0
,
1
],
10
,
0.7
,
32
)
self
.
assertIsNotNone
(
loss
)
...
...
python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
浏览文件 @
e7e4f084
...
...
@@ -22,32 +22,42 @@ from op_test import OpTest
from
paddle.fluid
import
core
def
l1loss
(
x
,
y
,
weight
):
n
=
x
.
shape
[
0
]
x
=
x
.
reshape
((
n
,
-
1
))
y
=
y
.
reshape
((
n
,
-
1
))
weight
=
weight
.
reshape
((
n
,
-
1
))
return
(
np
.
abs
(
y
-
x
)
*
weight
).
sum
(
axis
=
1
)
# def l1loss(x, y, weight):
# n = x.shape[0]
# x = x.reshape((n, -1))
# y = y.reshape((n, -1))
# weight = weight.reshape((n, -1))
# return (np.abs(y - x) * weight).sum(axis=1)
#
#
# def mse(x, y, weight):
# n = x.shape[0]
# x = x.reshape((n, -1))
# y = y.reshape((n, -1))
# weight = weight.reshape((n, -1))
# return ((y - x)**2 * weight).sum(axis=1)
#
#
# def sce(x, label, weight):
# n = x.shape[0]
# x = x.reshape((n, -1))
# label = label.reshape((n, -1))
# weight = weight.reshape((n, -1))
# sigmoid_x = expit(x)
# term1 = label * np.log(sigmoid_x)
# term2 = (1.0 - label) * np.log(1.0 - sigmoid_x)
# return ((-term1 - term2) * weight).sum(axis=1)
def
mse
(
x
,
y
,
weight
):
n
=
x
.
shape
[
0
]
x
=
x
.
reshape
((
n
,
-
1
))
y
=
y
.
reshape
((
n
,
-
1
))
weight
=
weight
.
reshape
((
n
,
-
1
))
return
((
y
-
x
)
**
2
*
weight
).
sum
(
axis
=
1
)
def
l1loss
(
x
,
y
):
return
abs
(
x
-
y
)
def
sce
(
x
,
label
,
weight
):
n
=
x
.
shape
[
0
]
x
=
x
.
reshape
((
n
,
-
1
))
label
=
label
.
reshape
((
n
,
-
1
))
weight
=
weight
.
reshape
((
n
,
-
1
))
def
sce
(
x
,
label
):
sigmoid_x
=
expit
(
x
)
term1
=
label
*
np
.
log
(
sigmoid_x
)
term2
=
(
1.0
-
label
)
*
np
.
log
(
1.0
-
sigmoid_x
)
return
((
-
term1
-
term2
)
*
weight
).
sum
(
axis
=
1
)
return
-
term1
-
term2
def
box_iou
(
box1
,
box2
):
...
...
@@ -160,6 +170,121 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs):
return
loss_x
+
loss_y
+
loss_w
+
loss_h
+
loss_obj
+
loss_class
def
sigmoid
(
x
):
return
1.0
/
(
1.0
+
np
.
exp
(
-
1.0
*
x
))
def
batch_xywh_box_iou
(
box1
,
box2
):
b1_left
=
box1
[:,
:,
0
]
-
box1
[:,
:,
2
]
/
2
b1_right
=
box1
[:,
:,
0
]
+
box1
[:,
:,
2
]
/
2
b1_top
=
box1
[:,
:,
1
]
-
box1
[:,
:,
3
]
/
2
b1_bottom
=
box1
[:,
:,
1
]
+
box1
[:,
:,
3
]
/
2
b2_left
=
box2
[:,
:,
0
]
-
box2
[:,
:,
2
]
/
2
b2_right
=
box2
[:,
:,
0
]
+
box2
[:,
:,
2
]
/
2
b2_top
=
box2
[:,
:,
1
]
-
box2
[:,
:,
3
]
/
2
b2_bottom
=
box2
[:,
:,
1
]
+
box2
[:,
:,
3
]
/
2
left
=
np
.
maximum
(
b1_left
[:,
:,
np
.
newaxis
],
b2_left
[:,
np
.
newaxis
,
:])
right
=
np
.
minimum
(
b1_right
[:,
:,
np
.
newaxis
],
b2_right
[:,
np
.
newaxis
,
:])
top
=
np
.
maximum
(
b1_top
[:,
:,
np
.
newaxis
],
b2_top
[:,
np
.
newaxis
,
:])
bottom
=
np
.
minimum
(
b1_bottom
[:,
:,
np
.
newaxis
],
b2_bottom
[:,
np
.
newaxis
,
:])
inter_w
=
np
.
clip
(
right
-
left
,
0.
,
1.
)
inter_h
=
np
.
clip
(
bottom
-
top
,
0.
,
1.
)
inter_area
=
inter_w
*
inter_h
b1_area
=
(
b1_right
-
b1_left
)
*
(
b1_bottom
-
b1_top
)
b2_area
=
(
b2_right
-
b2_left
)
*
(
b2_bottom
-
b2_top
)
union
=
b1_area
[:,
:,
np
.
newaxis
]
+
b2_area
[:,
np
.
newaxis
,
:]
-
inter_area
return
inter_area
/
union
def
YOLOv3Loss
(
x
,
gtbox
,
gtlabel
,
attrs
):
n
,
c
,
h
,
w
=
x
.
shape
b
=
gtbox
.
shape
[
1
]
anchors
=
attrs
[
'anchors'
]
an_num
=
len
(
anchors
)
//
2
anchor_mask
=
attrs
[
'anchor_mask'
]
mask_num
=
len
(
anchor_mask
)
class_num
=
attrs
[
"class_num"
]
ignore_thresh
=
attrs
[
'ignore_thresh'
]
downsample
=
attrs
[
'downsample'
]
input_size
=
downsample
*
h
x
=
x
.
reshape
((
n
,
mask_num
,
5
+
class_num
,
h
,
w
)).
transpose
((
0
,
1
,
3
,
4
,
2
))
loss
=
np
.
zeros
((
n
)).
astype
(
'float32'
)
pred_box
=
x
[:,
:,
:,
:,
:
4
].
copy
()
grid_x
=
np
.
tile
(
np
.
arange
(
w
).
reshape
((
1
,
w
)),
(
h
,
1
))
grid_y
=
np
.
tile
(
np
.
arange
(
h
).
reshape
((
h
,
1
)),
(
1
,
w
))
pred_box
[:,
:,
:,
:,
0
]
=
(
grid_x
+
sigmoid
(
pred_box
[:,
:,
:,
:,
0
]))
/
w
pred_box
[:,
:,
:,
:,
1
]
=
(
grid_y
+
sigmoid
(
pred_box
[:,
:,
:,
:,
1
]))
/
h
mask_anchors
=
[]
for
m
in
anchor_mask
:
mask_anchors
.
append
((
anchors
[
2
*
m
],
anchors
[
2
*
m
+
1
]))
anchors_s
=
np
.
array
(
[(
an_w
/
input_size
,
an_h
/
input_size
)
for
an_w
,
an_h
in
mask_anchors
])
anchor_w
=
anchors_s
[:,
0
:
1
].
reshape
((
1
,
mask_num
,
1
,
1
))
anchor_h
=
anchors_s
[:,
1
:
2
].
reshape
((
1
,
mask_num
,
1
,
1
))
pred_box
[:,
:,
:,
:,
2
]
=
np
.
exp
(
pred_box
[:,
:,
:,
:,
2
])
*
anchor_w
pred_box
[:,
:,
:,
:,
3
]
=
np
.
exp
(
pred_box
[:,
:,
:,
:,
3
])
*
anchor_h
pred_box
=
pred_box
.
reshape
((
n
,
-
1
,
4
))
pred_obj
=
x
[:,
:,
:,
:,
4
].
reshape
((
n
,
-
1
))
objness
=
np
.
zeros
(
pred_box
.
shape
[:
2
])
ious
=
batch_xywh_box_iou
(
pred_box
,
gtbox
)
ious_max
=
np
.
max
(
ious
,
axis
=-
1
)
objness
=
np
.
where
(
ious_max
>
ignore_thresh
,
-
np
.
ones_like
(
objness
),
objness
)
gtbox_shift
=
gtbox
.
copy
()
gtbox_shift
[:,
:,
0
]
=
0
gtbox_shift
[:,
:,
1
]
=
0
anchors
=
[(
anchors
[
2
*
i
],
anchors
[
2
*
i
+
1
])
for
i
in
range
(
0
,
an_num
)]
anchors_s
=
np
.
array
(
[(
an_w
/
input_size
,
an_h
/
input_size
)
for
an_w
,
an_h
in
anchors
])
anchor_boxes
=
np
.
concatenate
(
[
np
.
zeros_like
(
anchors_s
),
anchors_s
],
axis
=-
1
)
anchor_boxes
=
np
.
tile
(
anchor_boxes
[
np
.
newaxis
,
:,
:],
(
n
,
1
,
1
))
ious
=
batch_xywh_box_iou
(
gtbox_shift
,
anchor_boxes
)
iou_matches
=
np
.
argmax
(
ious
,
axis
=-
1
)
for
i
in
range
(
n
):
for
j
in
range
(
b
):
if
gtbox
[
i
,
j
,
2
:].
sum
()
==
0
:
continue
if
iou_matches
[
i
,
j
]
not
in
anchor_mask
:
continue
an_idx
=
anchor_mask
.
index
(
iou_matches
[
i
,
j
])
gi
=
int
(
gtbox
[
i
,
j
,
0
]
*
w
)
gj
=
int
(
gtbox
[
i
,
j
,
1
]
*
h
)
tx
=
gtbox
[
i
,
j
,
0
]
*
w
-
gi
ty
=
gtbox
[
i
,
j
,
1
]
*
w
-
gj
tw
=
np
.
log
(
gtbox
[
i
,
j
,
2
]
*
input_size
/
mask_anchors
[
an_idx
][
0
])
th
=
np
.
log
(
gtbox
[
i
,
j
,
3
]
*
input_size
/
mask_anchors
[
an_idx
][
1
])
scale
=
2.0
-
gtbox
[
i
,
j
,
2
]
*
gtbox
[
i
,
j
,
3
]
loss
[
i
]
+=
sce
(
x
[
i
,
an_idx
,
gj
,
gi
,
0
],
tx
)
*
scale
loss
[
i
]
+=
sce
(
x
[
i
,
an_idx
,
gj
,
gi
,
1
],
ty
)
*
scale
loss
[
i
]
+=
l1loss
(
x
[
i
,
an_idx
,
gj
,
gi
,
2
],
tw
)
*
scale
loss
[
i
]
+=
l1loss
(
x
[
i
,
an_idx
,
gj
,
gi
,
3
],
th
)
*
scale
objness
[
i
,
an_idx
*
h
*
w
+
gj
*
w
+
gi
]
=
1
for
label_idx
in
range
(
class_num
):
loss
[
i
]
+=
sce
(
x
[
i
,
an_idx
,
gj
,
gi
,
5
+
label_idx
],
int
(
label_idx
==
gtlabel
[
i
,
j
]))
for
j
in
range
(
mask_num
*
h
*
w
):
if
objness
[
i
,
j
]
>=
0
:
loss
[
i
]
+=
sce
(
pred_obj
[
i
,
j
],
objness
[
i
,
j
])
return
loss
class
TestYolov3LossOp
(
OpTest
):
def
setUp
(
self
):
self
.
initTestCase
()
...
...
@@ -171,13 +296,14 @@ class TestYolov3LossOp(OpTest):
self
.
attrs
=
{
"anchors"
:
self
.
anchors
,
"anchor_mask"
:
self
.
anchor_mask
,
"class_num"
:
self
.
class_num
,
"ignore_thresh"
:
self
.
ignore_thresh
,
"
input_size"
:
self
.
input_siz
e
,
"
downsample"
:
self
.
downsampl
e
,
}
self
.
inputs
=
{
'X'
:
x
,
'GTBox'
:
gtbox
,
'GTLabel'
:
gtlabel
}
self
.
outputs
=
{
'Loss'
:
Y
oloV
3Loss
(
x
,
gtbox
,
gtlabel
,
self
.
attrs
)}
self
.
outputs
=
{
'Loss'
:
Y
OLOv
3Loss
(
x
,
gtbox
,
gtlabel
,
self
.
attrs
)}
def
test_check_output
(
self
):
place
=
core
.
CPUPlace
()
...
...
@@ -189,15 +315,19 @@ class TestYolov3LossOp(OpTest):
place
,
[
'X'
],
'Loss'
,
no_grad_set
=
set
([
"GTBox"
,
"GTLabel"
]),
max_relative_error
=
0.
31
)
max_relative_error
=
0.
15
)
def
initTestCase
(
self
):
self
.
anchors
=
[
12
,
12
]
self
.
anchors
=
[
10
,
13
,
16
,
30
,
33
,
23
,
30
,
61
,
62
,
45
,
59
,
119
,
116
,
90
,
156
,
198
,
373
,
326
]
self
.
anchor_mask
=
[
0
,
1
,
2
]
self
.
class_num
=
5
self
.
ignore_thresh
=
0.
5
self
.
input_size
=
416
self
.
x_shape
=
(
1
,
len
(
self
.
anchors
)
//
2
*
(
5
+
self
.
class_num
),
3
,
3
)
self
.
gtbox_shape
=
(
1
,
5
,
4
)
self
.
ignore_thresh
=
0.
7
self
.
downsample
=
32
self
.
x_shape
=
(
3
,
len
(
self
.
anchor_mask
)
*
(
5
+
self
.
class_num
),
5
,
5
)
self
.
gtbox_shape
=
(
3
,
10
,
4
)
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录