Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b305629c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2305
Star
20932
Fork
5423
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b305629c
编写于
4月 26, 2023
作者:
陈
陈沧夜
提交者:
GitHub
4月 26, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove *npu.cc (#53342)
上级
cf6ed7cb
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
0 addition
and
1154 deletion
+0
-1154
paddle/fluid/operators/detection/box_coder_op_npu.cc
paddle/fluid/operators/detection/box_coder_op_npu.cc
+0
-448
paddle/fluid/operators/detection/density_prior_box_op_npu.cc
paddle/fluid/operators/detection/density_prior_box_op_npu.cc
+0
-396
paddle/fluid/operators/detection/iou_similarity_op_npu.cc
paddle/fluid/operators/detection/iou_similarity_op_npu.cc
+0
-204
paddle/fluid/operators/detection/prior_box_op_npu.cc
paddle/fluid/operators/detection/prior_box_op_npu.cc
+0
-106
未找到文件。
paddle/fluid/operators/detection/box_coder_op_npu.cc
已删除
100644 → 0
浏览文件 @
cf6ed7cb
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/impl/box_coder.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
struct
BoxCoderFunction
{
public:
explicit
BoxCoderFunction
(
const
framework
::
ExecutionContext
&
ctx
)
:
ctx
(
ctx
)
{
place
=
ctx
.
GetPlace
();
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
}
phi
::
DenseTensor
Adds
(
const
phi
::
DenseTensor
&
x
,
float
scalar
)
{
phi
::
DenseTensor
y
;
y
.
mutable_data
<
T
>
(
x
.
dims
(),
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Adds"
,
{
x
},
{
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
return
y
;
}
phi
::
DenseTensor
Muls
(
const
phi
::
DenseTensor
&
x
,
float
scalar
)
{
phi
::
DenseTensor
y
;
y
.
mutable_data
<
T
>
(
x
.
dims
(),
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Muls"
,
{
x
},
{
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
return
y
;
}
phi
::
DenseTensor
Mul
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
)
{
phi
::
DenseTensor
z
;
z
.
mutable_data
<
T
>
(
x
.
dims
(),
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
{
x
,
y
},
{
z
},
{});
runner
.
Run
(
stream
);
return
z
;
}
phi
::
DenseTensor
SubWithBroadCast
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
)
{
phi
::
DenseTensor
z
;
z
.
mutable_data
<
T
>
(
shape
,
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Sub"
,
{
x
,
y
},
{
z
},
{});
runner
.
Run
(
stream
);
return
z
;
}
void
DivWithBroadCastVoid
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
,
phi
::
DenseTensor
*
z
)
{
z
->
mutable_data
<
T
>
(
shape
,
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Div"
,
{
x
,
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
phi
::
DenseTensor
DivWithBroadCast
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
)
{
phi
::
DenseTensor
z
;
DivWithBroadCastVoid
(
x
,
y
,
shape
,
&
z
);
return
z
;
}
void
MulWithBroadCastVoid
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
,
phi
::
DenseTensor
*
z
)
{
z
->
mutable_data
<
T
>
(
shape
,
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
{
x
,
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
phi
::
DenseTensor
MulWithBroadCast
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
)
{
phi
::
DenseTensor
z
;
MulWithBroadCastVoid
(
x
,
y
,
shape
,
&
z
);
return
z
;
}
void
AddWithBroadCastVoid
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
,
phi
::
DenseTensor
*
z
)
{
z
->
mutable_data
<
T
>
(
shape
,
place
);
const
auto
&
runner
=
NpuOpRunner
(
"AddV2"
,
{
x
,
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
phi
::
DenseTensor
AddWithBroadCast
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
,
const
framework
::
DDim
&
shape
)
{
phi
::
DenseTensor
z
;
AddWithBroadCastVoid
(
x
,
y
,
shape
,
&
z
);
return
z
;
}
phi
::
DenseTensor
Abs
(
const
phi
::
DenseTensor
&
x
)
{
phi
::
DenseTensor
y
;
y
.
mutable_data
<
T
>
(
x
.
dims
(),
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Abs"
,
{
x
},
{
y
},
{});
runner
.
Run
(
stream
);
return
y
;
}
phi
::
DenseTensor
Log
(
const
phi
::
DenseTensor
&
x
)
{
phi
::
DenseTensor
t_x_m1
=
Adds
(
x
,
-
1
);
phi
::
DenseTensor
y
;
y
.
mutable_data
<
T
>
(
x
.
dims
(),
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Log1p"
,
{
t_x_m1
},
{
y
},
{});
runner
.
Run
(
stream
);
return
y
;
}
phi
::
DenseTensor
Exp
(
const
phi
::
DenseTensor
&
x
)
{
phi
::
DenseTensor
y
;
y
.
mutable_data
<
T
>
(
x
.
dims
(),
place
);
const
auto
&
runner
=
NpuOpRunner
(
"Exp"
,
{
x
},
{
y
},
{});
runner
.
Run
(
stream
);
return
y
;
}
phi
::
DenseTensor
Dot
(
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
y
)
{
auto
dim_x
=
x
.
dims
();
auto
dim_y
=
y
.
dims
();
PADDLE_ENFORCE_EQ
(
dim_x
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"x should be a 2-dim tensor, but got %d-dim."
,
dim_x
.
size
()));
PADDLE_ENFORCE_EQ
(
dim_y
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"y should be a 2-dim tensor, but got %d-dim."
,
dim_y
.
size
()));
PADDLE_ENFORCE_EQ
(
dim_x
[
1
],
dim_y
[
0
],
platform
::
errors
::
InvalidArgument
(
"Expect dim_x[1] == dim_y[0], but "
"got dim_x[1] = %d, dim_y[0] = %d."
,
dim_x
[
1
],
dim_y
[
0
]));
phi
::
DenseTensor
z
;
z
.
mutable_data
<
T
>
({
dim_x
[
0
],
dim_y
[
1
]},
place
);
const
auto
&
runner
=
NpuOpRunner
(
"MatMul"
,
{
x
,
y
},
{
z
},
{{
"transpose_x1"
,
false
},
{
"transpose_x2"
,
false
}});
runner
.
Run
(
stream
);
return
z
;
}
void
ConcatVoid
(
const
std
::
vector
<
phi
::
DenseTensor
>&
inputs
,
const
framework
::
DDim
&
shape_out
,
int
axis
,
phi
::
DenseTensor
*
output
)
{
output
->
mutable_data
<
T
>
(
shape_out
,
place
);
std
::
vector
<
std
::
string
>
names
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
names
.
push_back
(
"x"
+
std
::
to_string
(
i
));
}
NpuOpRunner
runner
{
"ConcatD"
,
{
inputs
},
{
*
output
},
{{
"concat_dim"
,
axis
},
{
"N"
,
static_cast
<
int
>
(
inputs
.
size
())}}};
runner
.
AddInputNames
(
names
);
runner
.
Run
(
stream
);
}
phi
::
DenseTensor
Concat
(
const
std
::
vector
<
phi
::
DenseTensor
>&
inputs
,
const
framework
::
DDim
&
shape_out
,
int
axis
)
{
phi
::
DenseTensor
output
;
ConcatVoid
(
inputs
,
shape_out
,
axis
,
&
output
);
return
output
;
}
phi
::
DenseTensor
Slice
(
const
phi
::
DenseTensor
&
x
,
const
std
::
vector
<
int
>&
offsets
,
const
std
::
vector
<
int
>&
size
,
const
framework
::
DDim
&
shape
)
{
phi
::
DenseTensor
y
;
y
.
mutable_data
<
T
>
(
shape
,
place
);
const
auto
&
runner
=
NpuOpRunner
(
"SliceD"
,
{
x
},
{
y
},
{{
"offsets"
,
offsets
},
{
"size"
,
size
}});
runner
.
Run
(
stream
);
return
y
;
}
private:
platform
::
Place
place
;
aclrtStream
stream
;
const
framework
::
ExecutionContext
&
ctx
;
};
template
<
typename
T
>
void
Vector2Tensor
(
const
framework
::
ExecutionContext
&
ctx
,
const
std
::
vector
<
T
>&
vec
,
const
framework
::
DDim
&
ddim
,
phi
::
DenseTensor
*
tsr
)
{
framework
::
TensorFromVector
<
T
>
(
vec
,
ctx
.
device_context
(),
tsr
);
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>().
Wait
();
tsr
->
Resize
(
ddim
);
}
template
<
typename
T
>
void
BoxCoderEnc
(
const
framework
::
ExecutionContext
&
ctx
,
const
phi
::
DenseTensor
*
tb
,
const
phi
::
DenseTensor
*
pb
,
const
phi
::
DenseTensor
*
pbv
,
const
bool
norm
,
const
std
::
vector
<
float
>&
variance
,
phi
::
DenseTensor
*
out
)
{
auto
M
=
pb
->
dims
()[
0
];
auto
N
=
tb
->
dims
()[
0
];
auto
shape_0
=
phi
::
make_ddim
({
4
,
2
});
phi
::
DenseTensor
m_diff
;
phi
::
DenseTensor
m_aver
;
std
::
vector
<
T
>
vec_diff
=
{
static_cast
<
T
>
(
-
1
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
-
1
),
static_cast
<
T
>
(
1
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
1
)};
std
::
vector
<
T
>
vec_aver
=
{
static_cast
<
T
>
(
0.5
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0.5
),
static_cast
<
T
>
(
0.5
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0.5
)};
Vector2Tensor
<
T
>
(
ctx
,
vec_diff
,
shape_0
,
&
m_diff
);
Vector2Tensor
<
T
>
(
ctx
,
vec_aver
,
shape_0
,
&
m_aver
);
BoxCoderFunction
<
T
>
F
(
ctx
);
phi
::
DenseTensor
pb_xy
=
F
.
Adds
(
F
.
Dot
(
*
pb
,
m_aver
),
(
norm
?
0
:
0.5
));
phi
::
DenseTensor
pb_wh
=
F
.
Adds
(
F
.
Dot
(
*
pb
,
m_diff
),
(
norm
?
0
:
1
));
phi
::
DenseTensor
tb_xy
=
F
.
Dot
(
*
tb
,
m_aver
);
phi
::
DenseTensor
tb_wh
=
F
.
Adds
(
F
.
Dot
(
*
tb
,
m_diff
),
(
norm
?
0
:
1
));
pb_xy
.
Resize
({
1
,
M
,
2
});
pb_wh
.
Resize
({
1
,
M
,
2
});
tb_xy
.
Resize
({
N
,
1
,
2
});
tb_wh
.
Resize
({
N
,
1
,
2
});
auto
shape_half
=
phi
::
make_ddim
({
N
,
M
,
2
});
auto
shape_full
=
phi
::
make_ddim
({
N
,
M
,
4
});
phi
::
DenseTensor
out_xy_0
=
F
.
DivWithBroadCast
(
F
.
SubWithBroadCast
(
tb_xy
,
pb_xy
,
shape_half
),
pb_wh
,
shape_half
);
phi
::
DenseTensor
out_wh_0
=
F
.
Log
(
F
.
Abs
(
F
.
DivWithBroadCast
(
tb_wh
,
pb_wh
,
shape_half
)));
phi
::
DenseTensor
out_0
=
F
.
Concat
({
out_xy_0
,
out_wh_0
},
shape_full
,
2
);
if
(
pbv
)
{
F
.
DivWithBroadCastVoid
(
out_0
,
*
pbv
,
shape_full
,
out
);
}
else
{
phi
::
DenseTensor
t_var
;
std
::
vector
<
T
>
vec_var
(
4
);
for
(
auto
i
=
0
;
i
<
4
;
i
++
)
{
vec_var
[
i
]
=
static_cast
<
T
>
(
variance
[
i
]);
}
Vector2Tensor
(
ctx
,
vec_var
,
phi
::
make_ddim
({
1
,
1
,
4
}),
&
t_var
);
F
.
DivWithBroadCastVoid
(
out_0
,
t_var
,
shape_full
,
out
);
}
}
template
<
typename
T
>
void
BoxCoderDec
(
const
framework
::
ExecutionContext
&
ctx
,
const
phi
::
DenseTensor
*
tb
,
const
phi
::
DenseTensor
*
pb
,
const
phi
::
DenseTensor
*
pbv
,
const
bool
norm
,
const
std
::
vector
<
float
>&
variance
,
int
axis
,
phi
::
DenseTensor
*
out
)
{
auto
shape_0
=
phi
::
make_ddim
({
4
,
2
});
phi
::
DenseTensor
m_diff
;
phi
::
DenseTensor
m_aver
;
std
::
vector
<
T
>
vec_diff
=
{
static_cast
<
T
>
(
-
1
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
-
1
),
static_cast
<
T
>
(
1
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
1
)};
std
::
vector
<
T
>
vec_aver
=
{
static_cast
<
T
>
(
0.5
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0.5
),
static_cast
<
T
>
(
0.5
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0
),
static_cast
<
T
>
(
0.5
)};
Vector2Tensor
<
T
>
(
ctx
,
vec_diff
,
shape_0
,
&
m_diff
);
Vector2Tensor
<
T
>
(
ctx
,
vec_aver
,
shape_0
,
&
m_aver
);
BoxCoderFunction
<
T
>
F
(
ctx
);
phi
::
DenseTensor
pb_xy
=
F
.
Adds
(
F
.
Dot
(
*
pb
,
m_aver
),
(
norm
?
0
:
0.5
));
phi
::
DenseTensor
pb_wh
=
F
.
Adds
(
F
.
Dot
(
*
pb
,
m_diff
),
(
norm
?
0
:
1
));
auto
pb_resize_shape
=
axis
==
0
?
phi
::
make_ddim
({
1
,
pb
->
dims
()[
0
],
2
})
:
phi
::
make_ddim
({
pb
->
dims
()[
0
],
1
,
2
});
pb_xy
.
Resize
(
pb_resize_shape
);
pb_wh
.
Resize
(
pb_resize_shape
);
auto
tbox_slice_shape
=
phi
::
make_ddim
({
tb
->
dims
()[
0
],
tb
->
dims
()[
1
],
2
});
std
::
vector
<
int
>
tbox_slice_size
=
{
static_cast
<
int
>
(
tb
->
dims
()[
0
]),
static_cast
<
int
>
(
tb
->
dims
()[
1
]),
2
};
phi
::
DenseTensor
tbox01
=
F
.
Slice
(
*
tb
,
{
0
,
0
,
0
},
tbox_slice_size
,
tbox_slice_shape
);
phi
::
DenseTensor
tbox23
=
F
.
Slice
(
*
tb
,
{
0
,
0
,
2
},
tbox_slice_size
,
tbox_slice_shape
);
phi
::
DenseTensor
tb_xy
;
phi
::
DenseTensor
tb_wh
;
if
(
pbv
)
{
auto
pbvt_slice_shape
=
phi
::
make_ddim
({
pbv
->
dims
()[
0
],
2
});
auto
pbvt_resize_shape
=
axis
==
0
?
phi
::
make_ddim
({
1
,
pbv
->
dims
()[
0
],
2
})
:
phi
::
make_ddim
({
pbv
->
dims
()[
0
],
1
,
2
});
std
::
vector
<
int
>
pbvt_slice_size
=
{
static_cast
<
int
>
(
pbv
->
dims
()[
0
]),
2
};
phi
::
DenseTensor
pbv_t01
=
F
.
Slice
(
*
pbv
,
{
0
,
0
},
pbvt_slice_size
,
pbvt_slice_shape
);
phi
::
DenseTensor
pbv_t23
=
F
.
Slice
(
*
pbv
,
{
0
,
2
},
pbvt_slice_size
,
pbvt_slice_shape
);
pbv_t01
.
Resize
(
pbvt_resize_shape
);
pbv_t23
.
Resize
(
pbvt_resize_shape
);
F
.
AddWithBroadCastVoid
(
F
.
MulWithBroadCast
(
tbox01
,
F
.
Mul
(
pb_wh
,
pbv_t01
),
tbox_slice_shape
),
pb_xy
,
tbox_slice_shape
,
&
tb_xy
);
F
.
MulWithBroadCastVoid
(
F
.
Exp
(
F
.
MulWithBroadCast
(
pbv_t23
,
tbox23
,
tbox_slice_shape
)),
pb_wh
,
tbox_slice_shape
,
&
tb_wh
);
}
else
if
(
variance
.
empty
())
{
F
.
AddWithBroadCastVoid
(
F
.
MulWithBroadCast
(
tbox01
,
pb_wh
,
tbox_slice_shape
),
pb_xy
,
tbox_slice_shape
,
&
tb_xy
);
F
.
MulWithBroadCastVoid
(
F
.
Exp
(
tbox23
),
pb_wh
,
tbox_slice_shape
,
&
tb_wh
);
}
else
{
phi
::
DenseTensor
t_var01
,
t_var23
;
auto
t_var_shape
=
phi
::
make_ddim
({
1
,
1
,
2
});
std
::
vector
<
T
>
vec_var01
=
{
static_cast
<
T
>
(
variance
[
0
]),
static_cast
<
T
>
(
variance
[
1
])};
std
::
vector
<
T
>
vec_var23
=
{
static_cast
<
T
>
(
variance
[
2
]),
static_cast
<
T
>
(
variance
[
3
])};
Vector2Tensor
(
ctx
,
vec_var01
,
t_var_shape
,
&
t_var01
);
Vector2Tensor
(
ctx
,
vec_var23
,
t_var_shape
,
&
t_var23
);
F
.
AddWithBroadCastVoid
(
F
.
MulWithBroadCast
(
tbox01
,
F
.
MulWithBroadCast
(
pb_wh
,
t_var01
,
pb_resize_shape
),
tbox_slice_shape
),
pb_xy
,
tbox_slice_shape
,
&
tb_xy
);
F
.
MulWithBroadCastVoid
(
F
.
Exp
(
F
.
MulWithBroadCast
(
t_var23
,
tbox23
,
tbox_slice_shape
)),
pb_wh
,
tbox_slice_shape
,
&
tb_wh
);
}
phi
::
DenseTensor
obox01
=
F
.
AddWithBroadCast
(
tb_xy
,
F
.
Muls
(
tb_wh
,
-
0.5
),
tbox_slice_shape
);
phi
::
DenseTensor
obox23
=
F
.
Adds
(
F
.
AddWithBroadCast
(
tb_xy
,
F
.
Muls
(
tb_wh
,
0.5
),
tbox_slice_shape
),
(
norm
?
0
:
-
1
));
F
.
ConcatVoid
({
obox01
,
obox23
},
out
->
dims
(),
2
,
out
);
}
template
<
typename
T
>
class
BoxCoderNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
prior_box
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"PriorBox"
);
auto
*
prior_box_var
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"PriorBoxVar"
);
auto
*
target_box
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"TargetBox"
);
auto
*
output_box
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"OutputBox"
);
std
::
vector
<
float
>
variance
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variance"
);
const
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
if
(
prior_box_var
)
{
PADDLE_ENFORCE_EQ
(
variance
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Input 'PriorBoxVar' and attribute 'variance'"
" of BoxCoder operator should not be used at the "
"same time."
));
}
if
(
!
(
variance
.
empty
()))
{
PADDLE_ENFORCE_EQ
(
static_cast
<
int
>
(
variance
.
size
()),
4
,
platform
::
errors
::
InvalidArgument
(
"Size of attribute 'variance' in BoxCoder operator"
" should be 4. But received size is %d"
,
variance
.
size
()));
}
if
(
target_box
->
lod
().
size
())
{
PADDLE_ENFORCE_EQ
(
target_box
->
lod
().
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"Input 'TargetBox' of BoxCoder operator only"
" supports LoD with one level."
));
}
auto
code_type
=
phi
::
funcs
::
GetBoxCodeType
(
ctx
.
Attr
<
std
::
string
>
(
"code_type"
));
bool
normalized
=
ctx
.
Attr
<
bool
>
(
"box_normalized"
);
if
(
code_type
==
phi
::
funcs
::
BoxCodeType
::
kEncodeCenterSize
)
{
BoxCoderEnc
<
T
>
(
ctx
,
target_box
,
prior_box
,
prior_box_var
,
normalized
,
variance
,
output_box
);
}
else
{
BoxCoderDec
<
T
>
(
ctx
,
target_box
,
prior_box
,
prior_box_var
,
normalized
,
variance
,
axis
,
output_box
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_NPU_KERNEL
(
box_coder
,
ops
::
BoxCoderNPUKernel
<
float
>
,
ops
::
BoxCoderNPUKernel
<
plat
::
float16
>
);
paddle/fluid/operators/detection/density_prior_box_op_npu.cc
已删除
100644 → 0
浏览文件 @
cf6ed7cb
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/density_prior_box_op.h"
namespace
paddle
{
namespace
operators
{
using
fp16
=
paddle
::
platform
::
float16
;
template
<
typename
T
>
struct
DensityPriorBoxFunction
{
public:
explicit
DensityPriorBoxFunction
(
const
framework
::
ExecutionContext
&
ctx
)
:
ctx
(
ctx
)
{
place
=
ctx
.
GetPlace
();
stream
=
ctx
.
template
device_context
<
platform
::
NPUDeviceContext
>().
stream
();
t0
.
mutable_data
<
float
>
({
1
},
place
);
t1
.
mutable_data
<
float
>
({
1
},
place
);
tn
.
mutable_data
<
float
>
({
1
},
place
);
FillNpuTensorWithConstant
<
float
>
(
&
t0
,
static_cast
<
float
>
(
0
));
FillNpuTensorWithConstant
<
float
>
(
&
t1
,
static_cast
<
float
>
(
1
));
}
void
Arange
(
int
n
,
phi
::
DenseTensor
*
x
)
{
// x should be init first
FillNpuTensorWithConstant
<
float
>
(
&
tn
,
static_cast
<
float
>
(
n
));
const
auto
&
runner
=
NpuOpRunner
(
"Range"
,
{
t0
,
tn
,
t1
},
{
*
x
},
{});
runner
.
Run
(
stream
);
}
void
Add
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// z should be init first
const
auto
&
runner
=
NpuOpRunner
(
"AddV2"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Cast
(
const
phi
::
DenseTensor
*
x
,
phi
::
DenseTensor
*
y
)
{
auto
dst_dtype
=
ConvertToNpuDtype
(
framework
::
TransToProtoVarType
(
y
->
type
()));
const
auto
&
runner
=
NpuOpRunner
(
"Cast"
,
{
*
x
},
{
*
y
},
{{
"dst_type"
,
static_cast
<
int
>
(
dst_dtype
)}});
runner
.
Run
(
stream
);
}
void
Sub
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// z should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Sub"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Mul
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Adds
(
const
phi
::
DenseTensor
*
x
,
float
scalar
,
phi
::
DenseTensor
*
y
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Adds"
,
{
*
x
},
{
*
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
}
void
Muls
(
const
phi
::
DenseTensor
*
x
,
float
scalar
,
phi
::
DenseTensor
*
y
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Muls"
,
{
*
x
},
{
*
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
}
void
Maximum
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Maximum"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Minimum
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Minimum"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Concat
(
const
std
::
vector
<
phi
::
DenseTensor
>&
inputs
,
int
axis
,
phi
::
DenseTensor
*
output
)
{
// output should be init first
std
::
vector
<
std
::
string
>
names
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
names
.
push_back
(
"x"
+
std
::
to_string
(
i
));
}
NpuOpRunner
runner
{
"ConcatD"
,
{
inputs
},
{
*
output
},
{{
"concat_dim"
,
axis
},
{
"N"
,
static_cast
<
int
>
(
inputs
.
size
())}}};
runner
.
AddInputNames
(
names
);
runner
.
Run
(
stream
);
}
void
Tile
(
const
phi
::
DenseTensor
*
x
,
phi
::
DenseTensor
*
y
,
const
std
::
vector
<
int
>&
multiples
)
{
// y should be init first
if
(
x
->
dims
()
==
y
->
dims
())
{
framework
::
TensorCopy
(
*
x
,
place
,
ctx
.
template
device_context
<
platform
::
NPUDeviceContext
>(),
y
);
return
;
}
const
auto
&
runner
=
NpuOpRunner
(
"TileD"
,
{
*
x
},
{
*
y
},
{{
"multiples"
,
multiples
}});
runner
.
Run
(
stream
);
}
void
FloatVec2Tsr
(
const
std
::
vector
<
float
>&
vec
,
phi
::
DenseTensor
*
tsr_dst
)
{
//
framework
::
TensorFromVector
<
T
>
(
vec
,
ctx
.
device_context
(),
tsr_dst
);
ctx
.
template
device_context
<
platform
::
NPUDeviceContext
>().
Wait
();
}
private:
platform
::
Place
place
;
aclrtStream
stream
;
const
framework
::
ExecutionContext
&
ctx
;
phi
::
DenseTensor
t0
;
phi
::
DenseTensor
t1
;
phi
::
DenseTensor
tn
;
};
template
<
>
void
DensityPriorBoxFunction
<
fp16
>::
Arange
(
int
n
,
phi
::
DenseTensor
*
x
)
{
phi
::
DenseTensor
x_fp32
(
phi
::
DataType
::
FLOAT32
);
x_fp32
.
mutable_data
<
float
>
(
x
->
dims
(),
place
);
FillNpuTensorWithConstant
<
float
>
(
&
tn
,
static_cast
<
float
>
(
n
));
const
auto
&
runner
=
NpuOpRunner
(
"Range"
,
{
t0
,
tn
,
t1
},
{
x_fp32
},
{});
runner
.
Run
(
stream
);
Cast
(
&
x_fp32
,
x
);
}
template
<
>
void
DensityPriorBoxFunction
<
fp16
>::
FloatVec2Tsr
(
const
std
::
vector
<
float
>&
vec
,
phi
::
DenseTensor
*
tsr_dst
)
{
phi
::
DenseTensor
tsr_fp32
(
phi
::
DataType
::
FLOAT32
);
tsr_fp32
.
mutable_data
<
float
>
(
tsr_dst
->
dims
(),
place
);
framework
::
TensorFromVector
<
float
>
(
vec
,
ctx
.
device_context
(),
&
tsr_fp32
);
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>().
Wait
();
Cast
(
&
tsr_fp32
,
tsr_dst
);
}
template
<
typename
T
>
class
DensityPriorBoxOpNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Input"
);
auto
*
image
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Image"
);
auto
*
boxes
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"Boxes"
);
auto
*
vars
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"Variances"
);
auto
variances
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
auto
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
auto
fixed_sizes
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"fixed_sizes"
);
auto
fixed_ratios
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"fixed_ratios"
);
auto
densities
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"densities"
);
float
step_w
=
ctx
.
Attr
<
float
>
(
"step_w"
);
float
step_h
=
ctx
.
Attr
<
float
>
(
"step_h"
);
float
offset
=
ctx
.
Attr
<
float
>
(
"offset"
);
int
image_w
=
image
->
dims
()[
3
];
int
image_h
=
image
->
dims
()[
2
];
int
layer_w
=
input
->
dims
()[
3
];
int
layer_h
=
input
->
dims
()[
2
];
auto
_type
=
input
->
dtype
();
auto
place
=
ctx
.
GetPlace
();
DensityPriorBoxFunction
<
T
>
F
(
ctx
);
phi
::
DenseTensor
h
(
_type
);
h
.
mutable_data
<
T
>
({
layer_h
},
place
);
phi
::
DenseTensor
w
(
_type
);
w
.
mutable_data
<
T
>
({
layer_w
},
place
);
F
.
Arange
(
layer_h
,
&
h
);
F
.
Arange
(
layer_w
,
&
w
);
h
.
Resize
({
layer_h
,
1
,
1
,
1
});
w
.
Resize
({
1
,
layer_w
,
1
,
1
});
step_w
=
step_w
>
0
?
step_w
:
static_cast
<
float
>
(
image_w
)
/
layer_w
;
step_h
=
step_h
>
0
?
step_h
:
static_cast
<
float
>
(
image_h
)
/
layer_h
;
int
step_average
=
static_cast
<
int
>
((
step_w
+
step_h
)
*
0.5
);
int
ratios_size
=
fixed_ratios
.
size
();
int
num_priors_per_ratio
=
0
;
for
(
size_t
i
=
0
;
i
<
densities
.
size
();
++
i
)
{
num_priors_per_ratio
+=
densities
[
i
]
*
densities
[
i
];
}
phi
::
DenseTensor
di
(
_type
);
phi
::
DenseTensor
dj
(
_type
);
phi
::
DenseTensor
shifts
(
_type
);
phi
::
DenseTensor
box_w_ratio
(
_type
);
phi
::
DenseTensor
box_h_ratio
(
_type
);
di
.
mutable_data
<
T
>
({
ratios_size
*
num_priors_per_ratio
},
place
);
dj
.
mutable_data
<
T
>
({
ratios_size
*
num_priors_per_ratio
},
place
);
shifts
.
mutable_data
<
T
>
({
ratios_size
*
num_priors_per_ratio
},
place
);
box_w_ratio
.
mutable_data
<
T
>
({
ratios_size
*
num_priors_per_ratio
},
place
);
box_h_ratio
.
mutable_data
<
T
>
({
ratios_size
*
num_priors_per_ratio
},
place
);
int64_t
start
=
0
;
std
::
vector
<
int
>
vec_tile
=
{
0
,
0
,
0
};
for
(
size_t
i
=
0
;
i
<
densities
.
size
();
++
i
)
{
// Range = start:start+ratios_size*density_sqr, density = densities[i]
int
density_sqr
=
densities
[
i
]
*
densities
[
i
];
// shifts[Range] = [step_average/density]*ratios_size*density_sqr
phi
::
DenseTensor
shifts_part
=
shifts
.
Slice
(
start
,
start
+
ratios_size
*
density_sqr
);
FillNpuTensorWithConstant
<
T
>
(
&
shifts_part
,
static_cast
<
T
>
(
step_average
/
densities
[
i
]));
// di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size
// dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size
phi
::
DenseTensor
di_part
=
di
.
Slice
(
start
,
start
+
ratios_size
*
density_sqr
);
phi
::
DenseTensor
dj_part
=
dj
.
Slice
(
start
,
start
+
ratios_size
*
density_sqr
);
if
(
densities
[
i
]
>
1
)
{
di_part
.
Resize
({
ratios_size
,
densities
[
i
],
densities
[
i
]});
dj_part
.
Resize
({
ratios_size
,
densities
[
i
],
densities
[
i
]});
phi
::
DenseTensor
range_n
(
_type
);
range_n
.
mutable_data
<
T
>
({
densities
[
i
]},
place
);
F
.
Arange
(
densities
[
i
],
&
range_n
);
range_n
.
Resize
({
1
,
densities
[
i
],
1
});
vec_tile
[
0
]
=
ratios_size
;
vec_tile
[
1
]
=
1
;
vec_tile
[
2
]
=
densities
[
i
];
F
.
Tile
(
&
range_n
,
&
di_part
,
vec_tile
);
range_n
.
Resize
({
1
,
1
,
densities
[
i
]});
vec_tile
[
1
]
=
densities
[
i
];
vec_tile
[
2
]
=
1
;
F
.
Tile
(
&
range_n
,
&
dj_part
,
vec_tile
);
}
else
{
FillNpuTensorWithConstant
<
T
>
(
&
di_part
,
static_cast
<
T
>
(
0
));
FillNpuTensorWithConstant
<
T
>
(
&
dj_part
,
static_cast
<
T
>
(
0
));
}
int
start_box_ratio
=
start
;
for
(
float
ar
:
fixed_ratios
)
{
// Range_mini = start_box_ratio:start_box_ratio+density_sqr
// box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr
// box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr
phi
::
DenseTensor
box_h_ratio_part
=
box_h_ratio
.
Slice
(
start_box_ratio
,
start_box_ratio
+
density_sqr
);
phi
::
DenseTensor
box_w_ratio_part
=
box_w_ratio
.
Slice
(
start_box_ratio
,
start_box_ratio
+
density_sqr
);
FillNpuTensorWithConstant
<
T
>
(
&
box_w_ratio_part
,
static_cast
<
T
>
(
fixed_sizes
[
i
]
*
sqrt
(
ar
)));
FillNpuTensorWithConstant
<
T
>
(
&
box_h_ratio_part
,
static_cast
<
T
>
(
fixed_sizes
[
i
]
/
sqrt
(
ar
)));
start_box_ratio
+=
density_sqr
;
}
start
=
start_box_ratio
;
}
di
.
Resize
({
1
,
1
,
ratios_size
*
num_priors_per_ratio
,
1
});
dj
.
Resize
({
1
,
1
,
ratios_size
*
num_priors_per_ratio
,
1
});
shifts
.
Resize
({
1
,
1
,
ratios_size
*
num_priors_per_ratio
,
1
});
box_w_ratio
.
Resize
({
1
,
1
,
ratios_size
*
num_priors_per_ratio
,
1
});
box_h_ratio
.
Resize
({
1
,
1
,
ratios_size
*
num_priors_per_ratio
,
1
});
// c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts
// c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts
phi
::
DenseTensor
c_x
(
_type
);
phi
::
DenseTensor
c_y
(
_type
);
auto
dim0
=
phi
::
make_ddim
({
1
,
layer_w
,
ratios_size
*
num_priors_per_ratio
,
1
});
auto
dim1
=
phi
::
make_ddim
({
layer_h
,
1
,
ratios_size
*
num_priors_per_ratio
,
1
});
c_x
.
mutable_data
<
T
>
(
dim0
,
place
);
c_y
.
mutable_data
<
T
>
(
dim1
,
place
);
F
.
Adds
(
&
w
,
offset
,
&
w
);
F
.
Muls
(
&
w
,
step_w
,
&
w
);
F
.
Adds
(
&
w
,
static_cast
<
float
>
(
-
step_average
)
*
static_cast
<
float
>
(
0.5
),
&
w
);
F
.
Adds
(
&
h
,
offset
,
&
h
);
F
.
Muls
(
&
h
,
step_h
,
&
h
);
F
.
Adds
(
&
h
,
static_cast
<
float
>
(
-
step_average
)
*
static_cast
<
float
>
(
0.5
),
&
h
);
F
.
Mul
(
&
di
,
&
shifts
,
&
di
);
F
.
Mul
(
&
dj
,
&
shifts
,
&
dj
);
F
.
Muls
(
&
shifts
,
static_cast
<
float
>
(
0.5
),
&
shifts
);
F
.
Add
(
&
di
,
&
shifts
,
&
di
);
F
.
Add
(
&
dj
,
&
shifts
,
&
dj
);
F
.
Add
(
&
dj
,
&
w
,
&
c_x
);
F
.
Add
(
&
di
,
&
h
,
&
c_y
);
// box_w_ratio = box_w_ratio / 2
// box_h_ratio = box_h_ratio / 2
F
.
Muls
(
&
box_w_ratio
,
static_cast
<
float
>
(
0.5
),
&
box_w_ratio
);
F
.
Muls
(
&
box_h_ratio
,
static_cast
<
float
>
(
0.5
),
&
box_h_ratio
);
phi
::
DenseTensor
zero_t
(
_type
);
phi
::
DenseTensor
one_t
(
_type
);
zero_t
.
mutable_data
<
T
>
({
1
},
place
);
one_t
.
mutable_data
<
T
>
({
1
},
place
);
FillNpuTensorWithConstant
<
T
>
(
&
zero_t
,
static_cast
<
T
>
(
0
));
FillNpuTensorWithConstant
<
T
>
(
&
one_t
,
static_cast
<
T
>
(
1
));
phi
::
DenseTensor
outbox0
(
_type
);
phi
::
DenseTensor
outbox1
(
_type
);
phi
::
DenseTensor
outbox2
(
_type
);
phi
::
DenseTensor
outbox3
(
_type
);
outbox0
.
mutable_data
<
T
>
(
dim0
,
place
);
outbox1
.
mutable_data
<
T
>
(
dim1
,
place
);
outbox2
.
mutable_data
<
T
>
(
dim0
,
place
);
outbox3
.
mutable_data
<
T
>
(
dim1
,
place
);
// outbox0 = max ( (c_x - box_w_ratio)/image_w, 0 )
// outbox1 = max ( (c_y - box_h_ratio)/image_h, 0 )
// outbox2 = min ( (c_x + box_w_ratio)/image_w, 1 )
// outbox3 = min ( (c_y + box_h_ratio)/image_h, 1 )
F
.
Sub
(
&
c_x
,
&
box_w_ratio
,
&
outbox0
);
F
.
Sub
(
&
c_y
,
&
box_h_ratio
,
&
outbox1
);
F
.
Add
(
&
c_x
,
&
box_w_ratio
,
&
outbox2
);
F
.
Add
(
&
c_y
,
&
box_h_ratio
,
&
outbox3
);
F
.
Muls
(
&
outbox0
,
static_cast
<
float
>
(
1.0
/
image_w
),
&
outbox0
);
F
.
Muls
(
&
outbox1
,
static_cast
<
float
>
(
1.0
/
image_h
),
&
outbox1
);
F
.
Muls
(
&
outbox2
,
static_cast
<
float
>
(
1.0
/
image_w
),
&
outbox2
);
F
.
Muls
(
&
outbox3
,
static_cast
<
float
>
(
1.0
/
image_h
),
&
outbox3
);
F
.
Maximum
(
&
outbox0
,
&
zero_t
,
&
outbox0
);
F
.
Maximum
(
&
outbox1
,
&
zero_t
,
&
outbox1
);
F
.
Minimum
(
&
outbox2
,
&
one_t
,
&
outbox2
);
F
.
Minimum
(
&
outbox3
,
&
one_t
,
&
outbox3
);
if
(
clip
)
{
// outbox0 = min ( outbox0, 1 )
// outbox1 = min ( outbox1, 1 )
// outbox2 = max ( outbox2, 0 )
// outbox3 = max ( outbox3, 0 )
F
.
Minimum
(
&
outbox0
,
&
one_t
,
&
outbox0
);
F
.
Minimum
(
&
outbox1
,
&
one_t
,
&
outbox1
);
F
.
Maximum
(
&
outbox2
,
&
zero_t
,
&
outbox2
);
F
.
Maximum
(
&
outbox3
,
&
zero_t
,
&
outbox3
);
}
auto
out_dim
=
phi
::
make_ddim
(
{
layer_h
,
layer_w
,
ratios_size
*
num_priors_per_ratio
,
4
});
boxes
->
mutable_data
<
T
>
(
place
);
vars
->
mutable_data
<
T
>
(
place
);
phi
::
DenseTensor
boxes_share
(
_type
);
phi
::
DenseTensor
vars_share
(
_type
);
boxes_share
.
ShareDataWith
(
*
boxes
);
boxes_share
.
Resize
(
out_dim
);
vars_share
.
ShareDataWith
(
*
vars
);
vars_share
.
Resize
(
out_dim
);
phi
::
DenseTensor
box0
(
_type
);
phi
::
DenseTensor
box1
(
_type
);
phi
::
DenseTensor
box2
(
_type
);
phi
::
DenseTensor
box3
(
_type
);
// out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1}
out_dim
[
3
]
=
1
;
box0
.
mutable_data
<
T
>
(
out_dim
,
place
);
box1
.
mutable_data
<
T
>
(
out_dim
,
place
);
box2
.
mutable_data
<
T
>
(
out_dim
,
place
);
box3
.
mutable_data
<
T
>
(
out_dim
,
place
);
std
::
vector
<
int
>
vec_exp_out02
=
{
layer_h
,
1
,
1
,
1
};
std
::
vector
<
int
>
vec_exp_out13
=
{
1
,
layer_w
,
1
,
1
};
F
.
Tile
(
&
outbox0
,
&
box0
,
vec_exp_out02
);
F
.
Tile
(
&
outbox1
,
&
box1
,
vec_exp_out13
);
F
.
Tile
(
&
outbox2
,
&
box2
,
vec_exp_out02
);
F
.
Tile
(
&
outbox3
,
&
box3
,
vec_exp_out13
);
F
.
Concat
({
box0
,
box1
,
box2
,
box3
},
3
,
&
boxes_share
);
std
::
vector
<
int
>
multiples
=
{
layer_h
,
layer_w
,
ratios_size
*
num_priors_per_ratio
,
1
};
phi
::
DenseTensor
variances_t
(
_type
);
// variances.size() == 4
variances_t
.
mutable_data
<
T
>
({
4
},
place
);
F
.
FloatVec2Tsr
(
variances
,
&
variances_t
);
F
.
Tile
(
&
variances_t
,
&
vars_share
,
multiples
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_NPU_KERNEL
(
density_prior_box
,
ops
::
DensityPriorBoxOpNPUKernel
<
plat
::
float16
>
,
ops
::
DensityPriorBoxOpNPUKernel
<
float
>
);
paddle/fluid/operators/detection/iou_similarity_op_npu.cc
已删除
100644 → 0
浏览文件 @
cf6ed7cb
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/iou_similarity_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
struct
IouFunction
{
public:
explicit
IouFunction
(
const
framework
::
ExecutionContext
&
ctx
)
:
ctx
(
ctx
)
{
place
=
ctx
.
GetPlace
();
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
}
void
Transpose
(
const
phi
::
DenseTensor
*
x
,
phi
::
DenseTensor
*
y
,
const
std
::
vector
<
int
>&
axis
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"TransposeD"
,
{
*
x
},
{
*
y
},
{{
"perm"
,
axis
}});
runner
.
Run
(
stream
);
}
void
Add
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"AddV2"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Sub
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Sub"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Mul
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
DivNoNan
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"DivNoNan"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Adds
(
const
phi
::
DenseTensor
*
x
,
float
scalar
,
phi
::
DenseTensor
*
y
)
{
// y should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Adds"
,
{
*
x
},
{
*
y
},
{{
"value"
,
scalar
}});
runner
.
Run
(
stream
);
}
void
Maximum
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// z should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Maximum"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
void
Minimum
(
const
phi
::
DenseTensor
*
x
,
const
phi
::
DenseTensor
*
y
,
phi
::
DenseTensor
*
z
)
{
// z should be init first
const
auto
&
runner
=
NpuOpRunner
(
"Minimum"
,
{
*
x
,
*
y
},
{
*
z
},
{});
runner
.
Run
(
stream
);
}
private:
platform
::
Place
place
;
aclrtStream
stream
;
const
framework
::
ExecutionContext
&
ctx
;
};
template
<
typename
T
>
class
IouSimilarityNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Y"
);
bool
normalized
=
ctx
.
Attr
<
bool
>
(
"box_normalized"
);
auto
*
out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"Out"
);
auto
_type
=
x
->
dtype
();
auto
place
=
ctx
.
GetPlace
();
IouFunction
<
T
>
F
(
ctx
);
auto
N
=
x
->
dims
()[
0
];
auto
M
=
y
->
dims
()[
0
];
out
->
mutable_data
<
T
>
({
N
,
M
},
place
);
phi
::
DenseTensor
xt
(
_type
);
phi
::
DenseTensor
yt
(
_type
);
xt
.
mutable_data
<
T
>
({
4
,
N
},
place
);
yt
.
mutable_data
<
T
>
({
4
,
M
},
place
);
std
::
vector
<
int
>
vec_trans
=
{
1
,
0
};
F
.
Transpose
(
x
,
&
xt
,
vec_trans
);
F
.
Transpose
(
y
,
&
yt
,
vec_trans
);
phi
::
DenseTensor
xmin1
=
xt
.
Slice
(
0
,
1
);
phi
::
DenseTensor
ymin1
=
xt
.
Slice
(
1
,
2
);
phi
::
DenseTensor
xmax1
=
xt
.
Slice
(
2
,
3
);
phi
::
DenseTensor
ymax1
=
xt
.
Slice
(
3
,
4
);
phi
::
DenseTensor
xmin2
=
yt
.
Slice
(
0
,
1
);
phi
::
DenseTensor
ymin2
=
yt
.
Slice
(
1
,
2
);
phi
::
DenseTensor
xmax2
=
yt
.
Slice
(
2
,
3
);
phi
::
DenseTensor
ymax2
=
yt
.
Slice
(
3
,
4
);
xmin1
.
Resize
({
N
,
1
});
ymin1
.
Resize
({
N
,
1
});
xmax1
.
Resize
({
N
,
1
});
ymax1
.
Resize
({
N
,
1
});
xmin2
.
Resize
({
1
,
M
});
ymin2
.
Resize
({
1
,
M
});
xmax2
.
Resize
({
1
,
M
});
ymax2
.
Resize
({
1
,
M
});
phi
::
DenseTensor
w1
(
_type
);
phi
::
DenseTensor
h1
(
_type
);
phi
::
DenseTensor
w2
(
_type
);
phi
::
DenseTensor
h2
(
_type
);
phi
::
DenseTensor
area1
(
_type
);
phi
::
DenseTensor
area2
(
_type
);
w1
.
mutable_data
<
T
>
({
N
,
1
},
place
);
h1
.
mutable_data
<
T
>
({
N
,
1
},
place
);
w2
.
mutable_data
<
T
>
({
1
,
M
},
place
);
h2
.
mutable_data
<
T
>
({
1
,
M
},
place
);
area1
.
mutable_data
<
T
>
({
N
,
1
},
place
);
area2
.
mutable_data
<
T
>
({
1
,
M
},
place
);
F
.
Sub
(
&
xmax1
,
&
xmin1
,
&
w1
);
F
.
Sub
(
&
ymax1
,
&
ymin1
,
&
h1
);
F
.
Sub
(
&
xmax2
,
&
xmin2
,
&
w2
);
F
.
Sub
(
&
ymax2
,
&
ymin2
,
&
h2
);
if
(
!
normalized
)
{
F
.
Adds
(
&
w1
,
1.0
f
,
&
w1
);
F
.
Adds
(
&
h1
,
1.0
f
,
&
h1
);
F
.
Adds
(
&
w2
,
1.0
f
,
&
w2
);
F
.
Adds
(
&
h2
,
1.0
f
,
&
h2
);
}
F
.
Mul
(
&
w1
,
&
h1
,
&
area1
);
F
.
Mul
(
&
w2
,
&
h2
,
&
area2
);
phi
::
DenseTensor
inter_xmax
(
_type
);
phi
::
DenseTensor
inter_ymax
(
_type
);
phi
::
DenseTensor
inter_xmin
(
_type
);
phi
::
DenseTensor
inter_ymin
(
_type
);
inter_xmax
.
mutable_data
<
T
>
({
N
,
M
},
place
);
inter_ymax
.
mutable_data
<
T
>
({
N
,
M
},
place
);
inter_xmin
.
mutable_data
<
T
>
({
N
,
M
},
place
);
inter_ymin
.
mutable_data
<
T
>
({
N
,
M
},
place
);
F
.
Minimum
(
&
xmax1
,
&
xmax2
,
&
inter_xmax
);
F
.
Minimum
(
&
ymax1
,
&
ymax2
,
&
inter_ymax
);
F
.
Maximum
(
&
xmin1
,
&
xmin2
,
&
inter_xmin
);
F
.
Maximum
(
&
ymin1
,
&
ymin2
,
&
inter_ymin
);
phi
::
DenseTensor
inter_w
(
_type
);
phi
::
DenseTensor
inter_h
(
_type
);
inter_w
.
mutable_data
<
T
>
({
N
,
M
},
place
);
inter_h
.
mutable_data
<
T
>
({
N
,
M
},
place
);
F
.
Sub
(
&
inter_xmax
,
&
inter_xmin
,
&
inter_w
);
F
.
Sub
(
&
inter_ymax
,
&
inter_ymin
,
&
inter_h
);
if
(
!
normalized
)
{
F
.
Adds
(
&
inter_w
,
1.0
f
,
&
inter_w
);
F
.
Adds
(
&
inter_h
,
1.0
f
,
&
inter_h
);
}
phi
::
DenseTensor
zeros
(
_type
);
zeros
.
mutable_data
<
T
>
({
1
},
place
);
FillNpuTensorWithConstant
<
T
>
(
&
zeros
,
static_cast
<
T
>
(
0
));
F
.
Maximum
(
&
inter_w
,
&
zeros
,
&
inter_w
);
F
.
Maximum
(
&
inter_h
,
&
zeros
,
&
inter_h
);
F
.
Mul
(
&
inter_w
,
&
inter_h
,
out
);
phi
::
DenseTensor
union_area
(
_type
);
union_area
.
mutable_data
<
T
>
({
N
,
M
},
place
);
F
.
Add
(
&
area1
,
&
area2
,
&
union_area
);
F
.
Sub
(
&
union_area
,
out
,
&
union_area
);
F
.
DivNoNan
(
out
,
&
union_area
,
out
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_NPU_KERNEL
(
iou_similarity
,
ops
::
IouSimilarityNPUKernel
<
float
>
,
ops
::
IouSimilarityNPUKernel
<
plat
::
float16
>
);
paddle/fluid/operators/detection/prior_box_op_npu.cc
已删除
100644 → 0
浏览文件 @
cf6ed7cb
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/prior_box_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
PriorBoxNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Input"
);
auto
*
image
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Image"
);
auto
*
boxes
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"Boxes"
);
auto
*
variances
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"Variances"
);
PADDLE_ENFORCE_EQ
(
boxes
->
dims
(),
variances
->
dims
(),
platform
::
errors
::
Unimplemented
(
"the shape of boxes and variances must be same in "
"the npu kernel of prior_box, but got boxes->dims() "
"= [%s], variances->dims() = [%s]"
,
boxes
->
dims
(),
variances
->
dims
()));
auto
min_sizes
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"min_sizes"
);
auto
max_sizes
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"max_sizes"
);
auto
aspect_ratios
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"aspect_ratios"
);
auto
variances_attr
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
bool
flip
=
ctx
.
Attr
<
bool
>
(
"flip"
);
bool
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
float
step_w
=
ctx
.
Attr
<
float
>
(
"step_w"
);
float
step_h
=
ctx
.
Attr
<
float
>
(
"step_h"
);
float
offset
=
ctx
.
Attr
<
float
>
(
"offset"
);
auto
place
=
ctx
.
GetPlace
();
phi
::
DenseTensor
out
(
input
->
type
());
auto
out_dims
=
phi
::
vectorize
(
boxes
->
dims
());
out_dims
.
insert
(
out_dims
.
begin
(),
2
);
out
.
Resize
(
phi
::
make_ddim
(
out_dims
));
out
.
mutable_data
<
T
>
(
place
);
framework
::
NPUAttributeMap
attr_input
=
{{
"min_size"
,
min_sizes
},
{
"max_size"
,
max_sizes
},
{
"aspect_ratio"
,
aspect_ratios
},
{
"step_h"
,
step_h
},
{
"step_w"
,
step_w
},
{
"flip"
,
flip
},
{
"clip"
,
clip
},
{
"offset"
,
offset
},
{
"variance"
,
variances_attr
}};
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
const
auto
&
runner
=
NpuOpRunner
(
"PriorBox"
,
{
*
input
,
*
image
},
{
out
},
attr_input
);
runner
.
Run
(
stream
);
out
.
Resize
(
phi
::
make_ddim
({
out
.
numel
()}));
phi
::
DenseTensor
out_boxes
=
out
.
Slice
(
0
,
boxes
->
numel
());
phi
::
DenseTensor
out_variances
=
out
.
Slice
(
boxes
->
numel
(),
out
.
numel
());
out_boxes
.
Resize
(
boxes
->
dims
());
out_variances
.
Resize
(
variances
->
dims
());
boxes
->
mutable_data
<
T
>
(
place
);
variances
->
mutable_data
<
T
>
(
place
);
framework
::
TensorCopy
(
out_boxes
,
place
,
ctx
.
template
device_context
<
platform
::
NPUDeviceContext
>(),
boxes
);
framework
::
TensorCopy
(
out_variances
,
place
,
ctx
.
template
device_context
<
platform
::
NPUDeviceContext
>(),
variances
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_NPU_KERNEL
(
prior_box
,
ops
::
PriorBoxNPUKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
PriorBoxNPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录