Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
975cd45d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
975cd45d
编写于
5月 25, 2020
作者:
S
shipengchao
提交者:
MaxwellDing
5月 27, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add roi align x86 kernel
上级
d6791276
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
285 addition
and
2 deletion
+285
-2
lite/core/mir/fusion/fc_fuse_pass.cc
lite/core/mir/fusion/fc_fuse_pass.cc
+1
-1
lite/core/mir/mlu_postprocess_pass.cc
lite/core/mir/mlu_postprocess_pass.cc
+2
-0
lite/kernels/x86/CMakeLists.txt
lite/kernels/x86/CMakeLists.txt
+2
-0
lite/kernels/x86/roi_align_compute.cc
lite/kernels/x86/roi_align_compute.cc
+235
-0
lite/kernels/x86/roi_align_compute.h
lite/kernels/x86/roi_align_compute.h
+37
-0
lite/tests/kernels/CMakeLists.txt
lite/tests/kernels/CMakeLists.txt
+1
-1
lite/tests/kernels/roi_align_compute_test.cc
lite/tests/kernels/roi_align_compute_test.cc
+7
-0
未找到文件。
lite/core/mir/fusion/fc_fuse_pass.cc
浏览文件 @
975cd45d
...
...
@@ -27,7 +27,7 @@ void FcFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
#ifdef LITE_WITH_MLU
fusion
::
FcFuser
fuser
(
false
);
fuser
(
graph
.
get
());
#el
if
#el
se
fusion
::
FcFuser
fuser
(
true
);
fuser
(
graph
.
get
());
#endif
...
...
lite/core/mir/mlu_postprocess_pass.cc
浏览文件 @
975cd45d
...
...
@@ -854,9 +854,11 @@ void ModifyValidPlaces(SSAGraph* graph, bool use_mlu_cast) {
for
(
auto
&
place
:
v_places
)
{
prec_set
.
insert
(
place
.
precision
);
}
#ifdef LITE_WITH_MLU
if
(
lite
::
TargetWrapperMlu
::
UseFirstConv
())
{
prec_set
.
insert
(
PRECISION
(
kInt8
));
}
#endif
for
(
auto
&
prec
:
prec_set
)
{
v_places
.
emplace_back
(
TARGET
(
kX86
),
prec
,
DATALAYOUT
(
kNHWC
));
}
...
...
lite/kernels/x86/CMakeLists.txt
浏览文件 @
975cd45d
...
...
@@ -70,6 +70,7 @@ add_kernel(search_fc_compute_x86 X86 basic SRCS search_fc_compute.cc DEPS ${lite
add_kernel
(
matmul_compute_x86 X86 basic SRCS matmul_compute.cc DEPS
${
lite_kernel_deps
}
blas
)
add_kernel
(
yolo_box_compute_x86 X86 basic SRCS yolo_box_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
roi_align_compute_x86 X86 basic SRCS roi_align_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
interpolate_compute_x86 X86 basic SRCS interpolate_compute.cc DEPS
${
lite_kernel_deps
}
)
lite_cc_test
(
test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86
)
...
...
@@ -111,5 +112,6 @@ lite_cc_test(test_sequence_arithmetic_compute_x86 SRCS sequence_arithmetic_compu
lite_cc_test
(
test_leaky_relu_compute_x86 SRCS leaky_relu_compute_test.cc DEPS activation_compute_x86
)
lite_cc_test
(
test_yolo_box_compute_x86 SRCS yolo_box_compute_test.cc DEPS
yolo_box_compute_x86
)
# lite_cc_test(test_roi_align_compute_x86 SRCS roi_align_compute_test.cc DEPS roi_align_compute_x86)
lite_cc_test
(
test_nearest_interp_comute_x86 SRCS interpolate_compute_test.cc
DEPS interpolate_compute_x86
)
lite/kernels/x86/roi_align_compute.cc
0 → 100644
浏览文件 @
975cd45d
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/roi_align_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
static
constexpr
int
kROISize
=
4
;
template
<
class
T
>
void
PreCalcForBilinearInterpolate
(
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
iy_upper
,
const
int
ix_upper
,
T
roi_ymin
,
T
roi_xmin
,
T
bin_size_h
,
T
bin_size_w
,
int
roi_bin_grid_h
,
int
roi_bin_grid_w
,
Tensor
*
pre_pos
,
Tensor
*
pre_w
)
{
int
pre_calc_index
=
0
;
int
*
pre_pos_data
=
pre_pos
->
mutable_data
<
int
>
();
T
*
pre_w_data
=
pre_w
->
mutable_data
<
T
>
();
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
for
(
int
iy
=
0
;
iy
<
iy_upper
;
iy
++
)
{
// calculate y of sample points
T
y
=
roi_ymin
+
ph
*
bin_size_h
+
static_cast
<
T
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
T
>
(
roi_bin_grid_h
);
// calculate x of samle points
for
(
int
ix
=
0
;
ix
<
ix_upper
;
ix
++
)
{
T
x
=
roi_xmin
+
pw
*
bin_size_w
+
static_cast
<
T
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
T
>
(
roi_bin_grid_w
);
// deal with elements out of map
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
for
(
int
i
=
0
;
i
<
kROISize
;
++
i
)
{
pre_pos_data
[
i
+
pre_calc_index
*
kROISize
]
=
0
;
pre_w_data
[
i
+
pre_calc_index
*
kROISize
]
=
0
;
}
pre_calc_index
+=
1
;
continue
;
}
y
=
y
<=
0
?
0
:
y
;
x
=
x
<=
0
?
0
:
x
;
int
y_low
=
static_cast
<
int
>
(
y
);
int
x_low
=
static_cast
<
int
>
(
x
);
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
static_cast
<
T
>
(
y_low
);
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
static_cast
<
T
>
(
x_low
);
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
,
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
pre_pos_data
[
pre_calc_index
*
kROISize
]
=
y_low
*
width
+
x_low
;
pre_pos_data
[
pre_calc_index
*
kROISize
+
1
]
=
y_low
*
width
+
x_high
;
pre_pos_data
[
pre_calc_index
*
kROISize
+
2
]
=
y_high
*
width
+
x_low
;
pre_pos_data
[
pre_calc_index
*
kROISize
+
3
]
=
y_high
*
width
+
x_high
;
pre_w_data
[
pre_calc_index
*
kROISize
]
=
hy
*
hx
;
pre_w_data
[
pre_calc_index
*
kROISize
+
1
]
=
hy
*
lx
;
pre_w_data
[
pre_calc_index
*
kROISize
+
2
]
=
ly
*
hx
;
pre_w_data
[
pre_calc_index
*
kROISize
+
3
]
=
ly
*
lx
;
pre_calc_index
+=
1
;
}
}
}
}
}
void
RoiAlignCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
RoiAlignParam
>
();
auto
*
in
=
param
.
X
;
auto
*
rois
=
param
.
ROIs
;
auto
*
out
=
param
.
Out
;
float
spatial_scale
=
param
.
spatial_scale
;
int
pooled_height
=
param
.
pooled_height
;
int
pooled_width
=
param
.
pooled_width
;
int
sampling_ratio
=
param
.
sampling_ratio
;
auto
in_dims
=
in
->
dims
();
// int batch_size = in_dims[0];
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
width
=
in_dims
[
3
];
auto
rois_dims
=
rois
->
dims
();
int
rois_num
=
rois_dims
[
0
];
auto
out_dims
=
out
->
dims
();
if
(
rois_num
==
0
)
{
return
;
}
DDim
in_stride
({
static_cast
<
int
>
(
in_dims
[
1
]
*
in_dims
[
2
]
*
in_dims
[
3
]),
static_cast
<
int
>
(
in_dims
[
2
]
*
in_dims
[
3
]),
static_cast
<
int
>
(
in_dims
[
3
]),
1
});
DDim
roi_stride
({
static_cast
<
int
>
(
rois_dims
[
1
]),
1
});
DDim
out_stride
({
static_cast
<
int
>
(
out_dims
[
1
]
*
out_dims
[
2
]
*
out_dims
[
3
]),
static_cast
<
int
>
(
out_dims
[
2
]
*
out_dims
[
3
]),
static_cast
<
int
>
(
out_dims
[
3
]),
1
});
auto
*
input_data
=
in
->
data
<
float
>
();
Tensor
roi_batch_id_list
;
roi_batch_id_list
.
Resize
({
rois_num
});
int
*
roi_batch_id_data
=
roi_batch_id_list
.
mutable_data
<
int
>
();
auto
rois_lod
=
rois
->
lod
().
back
();
int
rois_batch_size
=
rois_lod
.
size
()
-
1
;
// CHECK_OR_FALSE(rois_batch_size == batch_size);
// int rois_num_with_lod = rois_lod[rois_batch_size];
// CHECK_OR_FALSE(rois_num_with_lod == rois_num);
for
(
int
n
=
0
;
n
<
rois_batch_size
;
++
n
)
{
for
(
size_t
i
=
rois_lod
[
n
];
i
<
rois_lod
[
n
+
1
];
++
i
)
{
roi_batch_id_data
[
i
]
=
n
;
}
}
auto
*
output_data
=
out
->
mutable_data
<
float
>
();
auto
*
rois_data
=
rois
->
data
<
float
>
();
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
float
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
float
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
float
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
float
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
float
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
1.0
f
);
float
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
1.0
f
);
float
bin_size_h
=
roi_height
/
pooled_height
;
float
bin_size_w
=
roi_width
/
pooled_width
;
const
float
*
batch_data
=
input_data
+
roi_batch_id
*
in_stride
[
0
];
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
const
float
count
=
roi_bin_grid_h
*
roi_bin_grid_w
;
Tensor
pre_pos
;
Tensor
pre_w
;
int
pre_size
=
count
*
out_stride
[
1
];
pre_pos
.
Resize
({
pre_size
,
kROISize
});
pre_w
.
Resize
({
pre_size
,
kROISize
});
PreCalcForBilinearInterpolate
<
float
>
(
height
,
width
,
pooled_height
,
pooled_width
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_ymin
,
roi_xmin
,
bin_size_h
,
bin_size_w
,
roi_bin_grid_h
,
roi_bin_grid_w
,
&
pre_pos
,
&
pre_w
);
const
int
*
pre_pos_data
=
pre_pos
.
data
<
int
>
();
const
float
*
pre_w_data
=
pre_w
.
data
<
float
>
();
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
const
int
pool_index
=
ph
*
pooled_width
+
pw
;
float
output_val
=
0
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
for
(
int
i
=
0
;
i
<
kROISize
;
i
++
)
{
int
pos
=
pre_pos_data
[
pre_calc_index
*
kROISize
+
i
];
float
w
=
pre_w_data
[
pre_calc_index
*
kROISize
+
i
];
output_val
+=
w
*
batch_data
[
pos
];
}
pre_calc_index
+=
1
;
}
}
output_val
/=
count
;
output_data
[
pool_index
]
=
output_val
;
}
}
batch_data
+=
in_stride
[
1
];
output_data
+=
out_stride
[
1
];
}
rois_data
+=
roi_stride
[
0
];
}
}
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
roi_align
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
RoiAlignCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"ROIs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
lite/kernels/x86/roi_align_compute.h
0 → 100644
浏览文件 @
975cd45d
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/roi_align_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
class
RoiAlignCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
RoiAlignParam
;
void
Run
()
override
;
virtual
~
RoiAlignCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/tests/kernels/CMakeLists.txt
浏览文件 @
975cd45d
...
...
@@ -55,7 +55,7 @@ if(LITE_BUILD_EXTRA)
lite_cc_test
(
test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
#lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#
lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test
(
test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
bm_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
...
...
lite/tests/kernels/roi_align_compute_test.cc
浏览文件 @
975cd45d
...
...
@@ -120,6 +120,13 @@ TEST(RoiAlign, precision) {
// The unit test for roi_align needs the params,
// which is obtained by runing model by paddle.
LOG
(
INFO
)
<<
"test roi align op"
;
#ifdef LITE_WITH_X86
Place
place
(
TARGET
(
kX86
));
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
RoiAlignComputeTester
(
place
,
"def"
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-4
);
arena
.
TestPrecision
();
#endif
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录