Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
d1b7aec5
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d1b7aec5
编写于
12月 18, 2019
作者:
J
juncaipeng
提交者:
GitHub
12月 18, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support Mask RCNN2 (#2588)
* Support Mask RCNN2 (#2588)
上级
b8992673
变更
25
显示空白变更内容
内联
并排
Showing
25 changed file
with
563 addition
and
37 deletion
+563
-37
lite/backends/arm/math/elementwise.cc
lite/backends/arm/math/elementwise.cc
+113
-0
lite/backends/arm/math/interpolate.cc
lite/backends/arm/math/interpolate.cc
+3
-3
lite/core/arena/framework.h
lite/core/arena/framework.h
+34
-3
lite/core/mir/elimination/identity_scale_eliminate_pass.cc
lite/core/mir/elimination/identity_scale_eliminate_pass.cc
+2
-1
lite/core/mir/pattern_matcher.cc
lite/core/mir/pattern_matcher.cc
+13
-0
lite/core/mir/pattern_matcher.h
lite/core/mir/pattern_matcher.h
+1
-0
lite/kernels/arm/CMakeLists.txt
lite/kernels/arm/CMakeLists.txt
+1
-0
lite/kernels/arm/cast_compute.cc
lite/kernels/arm/cast_compute.cc
+2
-2
lite/kernels/arm/collect_fpn_proposals_compute.cc
lite/kernels/arm/collect_fpn_proposals_compute.cc
+2
-2
lite/kernels/arm/compare_compute.cc
lite/kernels/arm/compare_compute.cc
+13
-0
lite/kernels/arm/conditional_block_compute.h
lite/kernels/arm/conditional_block_compute.h
+7
-2
lite/kernels/arm/distribute_fpn_proposals_compute.cc
lite/kernels/arm/distribute_fpn_proposals_compute.cc
+151
-0
lite/kernels/arm/distribute_fpn_proposals_compute.h
lite/kernels/arm/distribute_fpn_proposals_compute.h
+38
-0
lite/kernels/arm/elementwise_compute.cc
lite/kernels/arm/elementwise_compute.cc
+21
-13
lite/kernels/arm/elementwise_compute.h
lite/kernels/arm/elementwise_compute.h
+2
-2
lite/kernels/arm/elementwise_compute_test.cc
lite/kernels/arm/elementwise_compute_test.cc
+2
-2
lite/kernels/arm/gather_compute.cc
lite/kernels/arm/gather_compute.cc
+4
-2
lite/kernels/arm/interpolate_compute.cc
lite/kernels/arm/interpolate_compute.cc
+8
-4
lite/kernels/arm/slice_compute.cc
lite/kernels/arm/slice_compute.cc
+11
-0
lite/operators/CMakeLists.txt
lite/operators/CMakeLists.txt
+1
-0
lite/operators/collect_fpn_proposals_op.cc
lite/operators/collect_fpn_proposals_op.cc
+2
-1
lite/operators/distribute_fpn_proposals_op.cc
lite/operators/distribute_fpn_proposals_op.cc
+70
-0
lite/operators/distribute_fpn_proposals_op.h
lite/operators/distribute_fpn_proposals_op.h
+51
-0
lite/operators/op_params.h
lite/operators/op_params.h
+10
-0
lite/tests/kernels/cast_compute_test.cc
lite/tests/kernels/cast_compute_test.cc
+1
-0
未找到文件。
lite/backends/arm/math/elementwise.cc
浏览文件 @
d1b7aec5
...
@@ -557,6 +557,52 @@ void elementwise_mul<float>(const float* dinx,
...
@@ -557,6 +557,52 @@ void elementwise_mul<float>(const float* dinx,
}
}
}
}
template
<
>
void
elementwise_mul
<
int
>
(
const
int
*
dinx
,
const
int
*
diny
,
int
*
dout
,
int
num
)
{
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
cnt
;
++
i
)
{
const
int
*
dinx_ptr
=
dinx
+
(
i
<<
4
);
const
int
*
diny_ptr
=
diny
+
(
i
<<
4
);
int
*
dout_ptr
=
dout
+
(
i
<<
4
);
int32x4_t
dinx0
=
vld1q_s32
(
dinx_ptr
);
int32x4_t
dinx1
=
vld1q_s32
(
dinx_ptr
+
4
);
int32x4_t
dinx2
=
vld1q_s32
(
dinx_ptr
+
8
);
int32x4_t
dinx3
=
vld1q_s32
(
dinx_ptr
+
12
);
int32x4_t
diny0
=
vld1q_s32
(
diny_ptr
);
int32x4_t
diny1
=
vld1q_s32
(
diny_ptr
+
4
);
int32x4_t
diny2
=
vld1q_s32
(
diny_ptr
+
8
);
int32x4_t
diny3
=
vld1q_s32
(
diny_ptr
+
12
);
dinx0
=
vmulq_s32
(
dinx0
,
diny0
);
dinx1
=
vmulq_s32
(
dinx1
,
diny1
);
dinx2
=
vmulq_s32
(
dinx2
,
diny2
);
dinx3
=
vmulq_s32
(
dinx3
,
diny3
);
vst1q_s32
(
dout_ptr
,
dinx0
);
vst1q_s32
(
dout_ptr
+
4
,
dinx1
);
vst1q_s32
(
dout_ptr
+
8
,
dinx2
);
vst1q_s32
(
dout_ptr
+
12
,
dinx3
);
}
if
(
remain
>
0
)
{
const
int
*
dinx_ptr
=
dinx
+
(
cnt
<<
4
);
const
int
*
diny_ptr
=
diny
+
(
cnt
<<
4
);
int
*
dout_ptr
=
dout
+
(
cnt
<<
4
);
for
(
int
i
=
0
;
i
<
remain
;
i
++
)
{
*
dout_ptr
=
*
dinx_ptr
*
*
diny_ptr
;
dout_ptr
++
;
dinx_ptr
++
;
diny_ptr
++
;
}
}
}
template
<
>
template
<
>
void
elementwise_mul_relu
<
float
>
(
const
float
*
dinx
,
void
elementwise_mul_relu
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
const
float
*
diny
,
...
@@ -678,6 +724,73 @@ void elementwise_mul_broadcast<float>(const float* dinx,
...
@@ -678,6 +724,73 @@ void elementwise_mul_broadcast<float>(const float* dinx,
}
}
}
}
template
<
>
void
elementwise_mul_broadcast
<
int
>
(
const
int
*
dinx
,
const
int
*
diny
,
int
*
dout
,
int
batch
,
int
channels
,
int
num
)
{
#pragma omp parallel for collapse(2)
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
int
offset
=
(
i
*
channels
+
j
)
*
num
;
const
int
*
din_ptr
=
dinx
+
offset
;
const
int
diny_data
=
diny
[
j
];
int
*
dout_ptr
=
dout
+
offset
;
int
cnt
=
num
>>
4
;
int
remain
=
num
%
16
;
int32x4_t
rb
=
vdupq_n_s32
(
diny_data
);
for
(
int
k
=
0
;
k
<
cnt
;
++
k
)
{
int32x4_t
din0
=
vld1q_s32
(
din_ptr
);
int32x4_t
din1
=
vld1q_s32
(
din_ptr
+
4
);
int32x4_t
din2
=
vld1q_s32
(
din_ptr
+
8
);
int32x4_t
din3
=
vld1q_s32
(
din_ptr
+
12
);
din0
=
vmulq_s32
(
din0
,
rb
);
din1
=
vmulq_s32
(
din1
,
rb
);
din2
=
vmulq_s32
(
din2
,
rb
);
din3
=
vmulq_s32
(
din3
,
rb
);
vst1q_s32
(
dout_ptr
,
din0
);
vst1q_s32
(
dout_ptr
+
4
,
din1
);
vst1q_s32
(
dout_ptr
+
8
,
din2
);
vst1q_s32
(
dout_ptr
+
12
,
din3
);
din_ptr
+=
16
;
dout_ptr
+=
16
;
}
if
(
remain
>=
8
)
{
int32x4_t
din0
=
vld1q_s32
(
din_ptr
);
int32x4_t
din1
=
vld1q_s32
(
din_ptr
+
4
);
din0
=
vmulq_s32
(
din0
,
rb
);
din1
=
vmulq_s32
(
din1
,
rb
);
vst1q_s32
(
dout_ptr
,
din0
);
vst1q_s32
(
dout_ptr
+
4
,
din1
);
din_ptr
+=
8
;
dout_ptr
+=
8
;
remain
-=
8
;
}
if
(
remain
>=
4
)
{
int32x4_t
din0
=
vld1q_s32
(
din_ptr
);
din0
=
vmulq_s32
(
din0
,
rb
);
vst1q_s32
(
dout_ptr
,
din0
);
din_ptr
+=
4
;
dout_ptr
+=
4
;
remain
-=
4
;
}
if
(
remain
>
0
)
{
for
(
int
p
=
0
;
p
<
remain
;
++
p
)
{
*
dout_ptr
=
*
din_ptr
*
diny_data
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
}
template
<
>
template
<
>
void
elementwise_mul_relu_broadcast
<
float
>
(
const
float
*
dinx
,
void
elementwise_mul_relu_broadcast
<
float
>
(
const
float
*
dinx
,
const
float
*
diny
,
const
float
*
diny
,
...
...
lite/backends/arm/math/interpolate.cc
浏览文件 @
d1b7aec5
...
@@ -526,9 +526,9 @@ void interpolate(lite::Tensor* X,
...
@@ -526,9 +526,9 @@ void interpolate(lite::Tensor* X,
}
}
auto
out_size
=
OutSize
;
auto
out_size
=
OutSize
;
if
(
out_size
!=
nullptr
)
{
if
(
out_size
!=
nullptr
)
{
auto
out_size_data
=
get_new_data_from_tensor
<
floa
t
>
(
out_size
);
auto
out_size_data
=
get_new_data_from_tensor
<
in
t
>
(
out_size
);
out_height
=
static_cast
<
int
>
(
out_size_data
[
0
])
;
out_height
=
out_size_data
[
0
]
;
out_width
=
static_cast
<
int
>
(
out_size_data
[
1
])
;
out_width
=
out_size_data
[
1
]
;
}
}
}
}
float
height_scale
=
scale
;
float
height_scale
=
scale
;
...
...
lite/core/arena/framework.h
浏览文件 @
d1b7aec5
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include <iomanip>
#include <iomanip>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <unordered_map>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/op_registry.h"
...
@@ -77,6 +78,20 @@ class TestCase {
...
@@ -77,6 +78,20 @@ class TestCase {
// kernel registry.
// kernel registry.
void
CheckKernelConsistWithDefinition
()
{}
void
CheckKernelConsistWithDefinition
()
{}
// Get the real precision of the output for check precision. When the declare
// precision obtained from the kernel is any, we should set the precision of
// the output in test case.
bool
GetPrecisonType
(
const
std
::
string
&
var_name
,
PrecisionType
*
precision_type
)
{
auto
res
=
precision_type_map_
.
find
(
var_name
);
if
(
res
==
precision_type_map_
.
end
())
{
return
false
;
}
else
{
*
precision_type
=
precision_type_map_
.
at
(
var_name
);
return
true
;
}
}
Scope
&
scope
()
{
return
*
scope_
;
}
Scope
&
scope
()
{
return
*
scope_
;
}
Scope
*
baseline_scope
()
{
return
base_scope_
;
}
Scope
*
baseline_scope
()
{
return
base_scope_
;
}
...
@@ -105,6 +120,19 @@ class TestCase {
...
@@ -105,6 +120,19 @@ class TestCase {
// Prepare for the operator.
// Prepare for the operator.
virtual
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
=
0
;
virtual
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
=
0
;
// Set the real precision of the output for check precision. When the declare
// precision obtained from the kernel is any, we should set the precision of
// the output in test case.
void
SetPrecisionType
(
const
std
::
string
&
var_name
,
const
PrecisionType
&
precision_type
)
{
auto
res
=
precision_type_map_
.
find
(
var_name
);
if
(
res
==
precision_type_map_
.
end
())
{
precision_type_map_
.
insert
({
var_name
,
precision_type
});
}
else
{
precision_type_map_
.
at
(
var_name
)
=
precision_type
;
}
}
public:
public:
const
Instruction
&
instruction
()
{
return
*
instruction_
;
}
const
Instruction
&
instruction
()
{
return
*
instruction_
;
}
...
@@ -148,6 +176,7 @@ class TestCase {
...
@@ -148,6 +176,7 @@ class TestCase {
Scope
*
base_scope_
{};
Scope
*
base_scope_
{};
std
::
unique_ptr
<
cpp
::
OpDesc
>
op_desc_
;
std
::
unique_ptr
<
cpp
::
OpDesc
>
op_desc_
;
std
::
unique_ptr
<
Instruction
>
instruction_
;
std
::
unique_ptr
<
Instruction
>
instruction_
;
std
::
unordered_map
<
std
::
string
,
PrecisionType
>
precision_type_map_
;
};
};
class
Arena
{
class
Arena
{
...
@@ -189,8 +218,11 @@ class Arena {
...
@@ -189,8 +218,11 @@ class Arena {
// get tensor type.
// get tensor type.
const
Type
*
type
=
const
Type
*
type
=
tester_
->
instruction
().
kernel
()
->
GetOutputDeclType
(
arg_name
);
tester_
->
instruction
().
kernel
()
->
GetOutputDeclType
(
arg_name
);
auto
precision_type
=
type
->
precision
();
switch
(
type
->
precision
())
{
if
(
precision_type
==
PRECISION
(
kAny
))
{
CHECK
(
tester_
->
GetPrecisonType
(
var_name
,
&
precision_type
));
}
switch
(
precision_type
)
{
case
PRECISION
(
kFloat
):
case
PRECISION
(
kFloat
):
return
tester_
->
CheckPrecision
<
float
>
(
var_name
,
abs_error_
);
return
tester_
->
CheckPrecision
<
float
>
(
var_name
,
abs_error_
);
case
PRECISION
(
kInt8
):
case
PRECISION
(
kInt8
):
...
@@ -199,7 +231,6 @@ class Arena {
...
@@ -199,7 +231,6 @@ class Arena {
return
tester_
->
CheckPrecision
<
int32_t
>
(
var_name
,
abs_error_
);
return
tester_
->
CheckPrecision
<
int32_t
>
(
var_name
,
abs_error_
);
case
PRECISION
(
kBool
):
case
PRECISION
(
kBool
):
return
tester_
->
CheckPrecision
<
bool
>
(
var_name
,
abs_error_
);
return
tester_
->
CheckPrecision
<
bool
>
(
var_name
,
abs_error_
);
default:
default:
LOG
(
FATAL
)
<<
"not support type "
<<
PrecisionToStr
(
type
->
precision
());
LOG
(
FATAL
)
<<
"not support type "
<<
PrecisionToStr
(
type
->
precision
());
return
false
;
return
false
;
...
...
lite/core/mir/elimination/identity_scale_eliminate_pass.cc
浏览文件 @
d1b7aec5
...
@@ -25,7 +25,8 @@ namespace {
...
@@ -25,7 +25,8 @@ namespace {
class
Eliminator
:
public
FuseBase
{
class
Eliminator
:
public
FuseBase
{
public:
public:
void
BuildPattern
()
override
{
void
BuildPattern
()
override
{
auto
*
pre_op
=
OpNode
(
"preop"
);
// the previous op's output need update
// the previous op's output need updat
auto
*
pre_op
=
OpNode
(
"preop"
)
->
assert_is_not_op_type
(
"conditional_block"
);
// TODO(Superjomn) check has only one output
// TODO(Superjomn) check has only one output
auto
*
x
=
VarNode
(
"x"
)
->
assert_is_op_input
(
"scale"
,
"X"
);
auto
*
x
=
VarNode
(
"x"
)
->
assert_is_op_input
(
"scale"
,
"X"
);
auto
*
scale_op
=
OpNode
(
"scale"
,
"scale"
)
auto
*
scale_op
=
OpNode
(
"scale"
,
"scale"
)
...
...
lite/core/mir/pattern_matcher.cc
浏览文件 @
d1b7aec5
...
@@ -377,6 +377,19 @@ PMNode *PMNode::assert_is_op(const std::string &op_type) {
...
@@ -377,6 +377,19 @@ PMNode *PMNode::assert_is_op(const std::string &op_type) {
return
this
;
return
this
;
}
}
PMNode
*
PMNode
::
assert_is_not_op_type
(
const
std
::
string
&
op_type
)
{
asserts_
.
emplace_back
([
op_type
](
const
Node
*
x
)
{
if
(
x
&&
x
->
IsStmt
())
{
auto
*
op_info
=
x
->
stmt
()
->
op_info
();
if
(
op_info
->
Type
()
==
op_type
)
{
return
false
;
}
}
return
true
;
});
return
this
;
}
PMNode
*
PMNode
::
assert_is_var
()
{
PMNode
*
PMNode
::
assert_is_var
()
{
asserts_
.
emplace_back
([](
const
Node
*
x
)
{
return
x
&&
x
->
IsArg
();
});
asserts_
.
emplace_back
([](
const
Node
*
x
)
{
return
x
&&
x
->
IsArg
();
});
return
this
;
return
this
;
...
...
lite/core/mir/pattern_matcher.h
浏览文件 @
d1b7aec5
...
@@ -123,6 +123,7 @@ struct PMNode {
...
@@ -123,6 +123,7 @@ struct PMNode {
// Assertions, helper functions to simplify the pattern definition.
// Assertions, helper functions to simplify the pattern definition.
PMNode
*
assert_is_op
();
PMNode
*
assert_is_op
();
PMNode
*
assert_is_op
(
const
std
::
string
&
op_type
);
PMNode
*
assert_is_op
(
const
std
::
string
&
op_type
);
PMNode
*
assert_is_not_op_type
(
const
std
::
string
&
op_type
);
PMNode
*
assert_is_var
();
PMNode
*
assert_is_var
();
PMNode
*
assert_var_not_persistable
();
PMNode
*
assert_var_not_persistable
();
PMNode
*
assert_is_persistable_var
();
PMNode
*
assert_is_persistable_var
();
...
...
lite/kernels/arm/CMakeLists.txt
浏览文件 @
d1b7aec5
...
@@ -79,6 +79,7 @@ add_kernel(box_clip_compute_arm ARM extra SRCS box_clip_compute.cc DEPS ${lite_k
...
@@ -79,6 +79,7 @@ add_kernel(box_clip_compute_arm ARM extra SRCS box_clip_compute.cc DEPS ${lite_k
add_kernel
(
assign_value_compute_arm ARM extra SRCS assign_value_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
assign_value_compute_arm ARM extra SRCS assign_value_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
conditional_block_compute_arm ARM extra SRCS conditional_block_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
conditional_block_compute_arm ARM extra SRCS conditional_block_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
collect_fpn_proposals_compute_arm ARM extra SRCS collect_fpn_proposals_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
distribute_fpn_proposals_compute_arm ARM extra SRCS distribute_fpn_proposals_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
# for OCR specific
# for OCR specific
...
...
lite/kernels/arm/cast_compute.cc
浏览文件 @
d1b7aec5
...
@@ -74,6 +74,6 @@ void CastCompute::Run() {
...
@@ -74,6 +74,6 @@ void CastCompute::Run() {
REGISTER_LITE_KERNEL
(
REGISTER_LITE_KERNEL
(
cast
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CastCompute
,
def
)
cast
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CastCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
)
,
PRECISION
(
kAny
)
)})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
)
,
PRECISION
(
kAny
)
)})
.
Finalize
();
.
Finalize
();
lite/kernels/arm/collect_fpn_proposals_compute.cc
浏览文件 @
d1b7aec5
...
@@ -141,7 +141,7 @@ REGISTER_LITE_KERNEL(collect_fpn_proposals,
...
@@ -141,7 +141,7 @@ REGISTER_LITE_KERNEL(collect_fpn_proposals,
kNCHW
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CollectFpnProposalsCompute
,
paddle
::
lite
::
kernels
::
arm
::
CollectFpnProposalsCompute
,
def
)
def
)
.
BindInput
(
"MultiLevelRois"
,
{
LiteType
::
GetTensor
List
Ty
(
TARGET
(
kARM
))})
.
BindInput
(
"MultiLevelRois"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"MultiLevelScores"
,
{
LiteType
::
GetTensor
List
Ty
(
TARGET
(
kARM
))})
.
BindInput
(
"MultiLevelScores"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"FpnRois"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"FpnRois"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
lite/kernels/arm/compare_compute.cc
浏览文件 @
d1b7aec5
...
@@ -219,6 +219,19 @@ REGISTER_LITE_KERNEL(greater_equal,
...
@@ -219,6 +219,19 @@ REGISTER_LITE_KERNEL(greater_equal,
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
less_than
,
kARM
,
kInt32
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int32
<
paddle
::
lite
::
kernels
::
arm
::
_LessThanFunctor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
equal
,
REGISTER_LITE_KERNEL
(
equal
,
kARM
,
kARM
,
kInt32
,
kInt32
,
...
...
lite/kernels/arm/conditional_block_compute.h
浏览文件 @
d1b7aec5
...
@@ -23,9 +23,8 @@
...
@@ -23,9 +23,8 @@
#include "lite/operators/conditional_block_op.h"
#include "lite/operators/conditional_block_op.h"
#ifdef LITE_WITH_PROFILE
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/basic_profiler.h"
#include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/precision_profiler.h"
#include "lite/core/profile/precision_profiler.h"
#include "lite/core/profile/profiler.h"
#endif
#endif
namespace
paddle
{
namespace
paddle
{
...
@@ -57,6 +56,11 @@ class CondExecutor {
...
@@ -57,6 +56,11 @@ class CondExecutor {
}
}
void
Run
()
{
void
Run
()
{
#ifdef LITE_WITH_PROFILE
#ifdef LITE_WITH_PRECISION_PROFILE
lite
::
profile
::
Profiler
profiler
;
#endif // LITE_WITH_PRECISION_PROFILE
#endif // LITE_WITH_PROFILE
for
(
auto
&
op_handler
:
ops_of_block_
)
{
for
(
auto
&
op_handler
:
ops_of_block_
)
{
op_handler
->
CheckShape
();
op_handler
->
CheckShape
();
op_handler
->
InferShape
();
op_handler
->
InferShape
();
...
@@ -64,6 +68,7 @@ class CondExecutor {
...
@@ -64,6 +68,7 @@ class CondExecutor {
#ifdef LITE_WITH_PRECISION_PROFILE
#ifdef LITE_WITH_PRECISION_PROFILE
std
::
unique_ptr
<
KernelBase
>
kernel
(
op_handler
->
GetKernel
());
std
::
unique_ptr
<
KernelBase
>
kernel
(
op_handler
->
GetKernel
());
Instruction
inst
(
op_handler
,
std
::
move
(
kernel
));
Instruction
inst
(
op_handler
,
std
::
move
(
kernel
));
inst
.
set_profiler
(
&
profiler
);
#endif // LITE_WITH_PRECISION_PROFILE
#endif // LITE_WITH_PRECISION_PROFILE
#endif // LITE_WITH_PROFILE
#endif // LITE_WITH_PROFILE
op_handler
->
Run
();
op_handler
->
Run
();
...
...
lite/kernels/arm/distribute_fpn_proposals_compute.cc
0 → 100644
浏览文件 @
d1b7aec5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/distribute_fpn_proposals_compute.h"
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
const
int
kBoxDim
=
4
;
template
<
typename
T
>
static
inline
T
BBoxArea
(
const
T
*
box
,
bool
normalized
)
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return
static_cast
<
T
>
(
0.
);
}
else
{
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
h
=
box
[
3
]
-
box
[
1
];
if
(
normalized
)
{
return
w
*
h
;
}
else
{
// If coordinate values are not within range [0, 1].
return
(
w
+
1
)
*
(
h
+
1
);
}
}
}
void
DistributeFpnProposalsCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
DistributeFpnProposalsParam
>
();
const
lite
::
Tensor
*
fpn_rois
=
param
.
fpn_rois
;
std
::
vector
<
lite
::
Tensor
*>
multi_fpn_rois
=
param
.
multi_fpn_rois
;
lite
::
Tensor
*
restore_index
=
param
.
restore_index
;
int
min_level
=
param
.
min_level
;
int
max_level
=
param
.
max_level
;
int
refer_level
=
param
.
refer_level
;
int
refer_scale
=
param
.
refer_scale
;
int
num_level
=
max_level
-
min_level
+
1
;
CHECK_EQ
(
fpn_rois
->
lod
().
size
(),
1
);
auto
fpn_rois_lod
=
fpn_rois
->
lod
().
back
();
int
fpn_rois_num
=
fpn_rois_lod
[
fpn_rois_lod
.
size
()
-
1
];
std
::
vector
<
int
>
target_level
;
// record the number of rois in each level
std
::
vector
<
int
>
num_rois_level
(
num_level
,
0
);
std
::
vector
<
int
>
num_rois_level_integral
(
num_level
+
1
,
0
);
for
(
size_t
i
=
0
;
i
<
fpn_rois_lod
.
size
()
-
1
;
++
i
)
{
auto
fpn_rois_slice
=
fpn_rois
->
Slice
<
float
>
(
static_cast
<
int64_t
>
(
fpn_rois_lod
[
i
]),
static_cast
<
int64_t
>
(
fpn_rois_lod
[
i
+
1
]));
const
float
*
rois_data
=
fpn_rois_slice
.
data
<
float
>
();
for
(
int
j
=
0
;
j
<
fpn_rois_slice
.
dims
()[
0
];
++
j
)
{
// get the target level of current rois
float
roi_scale
=
std
::
sqrt
(
BBoxArea
(
rois_data
,
false
));
int
tgt_lvl
=
std
::
floor
(
std
::
log2
(
roi_scale
/
refer_scale
+
static_cast
<
float
>
(
1e-6
))
+
refer_level
);
tgt_lvl
=
std
::
min
(
max_level
,
std
::
max
(
tgt_lvl
,
min_level
));
target_level
.
push_back
(
tgt_lvl
);
num_rois_level
[
tgt_lvl
-
min_level
]
++
;
rois_data
+=
kBoxDim
;
}
}
// define the output rois
// pointer which point to each level fpn rois
std
::
vector
<
float
*>
multi_fpn_rois_data
(
num_level
);
// lod0 which will record the offset information of each level rois
std
::
vector
<
std
::
vector
<
uint64_t
>>
multi_fpn_rois_lod0
;
for
(
int
i
=
0
;
i
<
num_level
;
++
i
)
{
// allocate memory for each level rois
multi_fpn_rois
[
i
]
->
Resize
({
num_rois_level
[
i
],
kBoxDim
});
multi_fpn_rois_data
[
i
]
=
multi_fpn_rois
[
i
]
->
mutable_data
<
float
>
();
std
::
vector
<
uint64_t
>
lod0
(
1
,
0
);
multi_fpn_rois_lod0
.
push_back
(
lod0
);
// statistic start point for each level rois
num_rois_level_integral
[
i
+
1
]
=
num_rois_level_integral
[
i
]
+
num_rois_level
[
i
];
}
restore_index
->
Resize
({
fpn_rois_num
,
1
});
int
*
restore_index_data
=
restore_index
->
mutable_data
<
int
>
();
std
::
vector
<
int
>
restore_index_inter
(
fpn_rois_num
,
-
1
);
// distribute the rois into different fpn level by target level
for
(
size_t
i
=
0
;
i
<
fpn_rois_lod
.
size
()
-
1
;
++
i
)
{
Tensor
fpn_rois_slice
=
fpn_rois
->
Slice
<
float
>
(
static_cast
<
int64_t
>
(
fpn_rois_lod
[
i
]),
static_cast
<
int64_t
>
(
fpn_rois_lod
[
i
+
1
]));
const
float
*
rois_data
=
fpn_rois_slice
.
data
<
float
>
();
size_t
cur_offset
=
fpn_rois_lod
[
i
];
// std::vector<size_t > lod_offset[num_level];
for
(
int
j
=
0
;
j
<
num_level
;
j
++
)
{
multi_fpn_rois_lod0
[
j
].
push_back
(
multi_fpn_rois_lod0
[
j
][
i
]);
}
for
(
int
j
=
0
;
j
<
fpn_rois_slice
.
dims
()[
0
];
++
j
)
{
int
lvl
=
target_level
[
cur_offset
+
j
];
memcpy
(
multi_fpn_rois_data
[
lvl
-
min_level
],
rois_data
,
kBoxDim
*
sizeof
(
float
));
multi_fpn_rois_data
[
lvl
-
min_level
]
+=
kBoxDim
;
int
index_in_shuffle
=
num_rois_level_integral
[
lvl
-
min_level
]
+
multi_fpn_rois_lod0
[
lvl
-
min_level
][
i
+
1
];
restore_index_inter
[
index_in_shuffle
]
=
cur_offset
+
j
;
multi_fpn_rois_lod0
[
lvl
-
min_level
][
i
+
1
]
++
;
rois_data
+=
kBoxDim
;
}
}
for
(
int
i
=
0
;
i
<
fpn_rois_num
;
++
i
)
{
restore_index_data
[
restore_index_inter
[
i
]]
=
i
;
}
// merge lod information into LoDTensor
for
(
int
i
=
0
;
i
<
num_level
;
++
i
)
{
lite
::
LoD
lod
;
lod
.
emplace_back
(
multi_fpn_rois_lod0
[
i
]);
multi_fpn_rois
[
i
]
->
set_lod
(
lod
);
}
return
;
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
distribute_fpn_proposals
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
DistributeFpnProposalsCompute
,
def
)
.
BindInput
(
"FpnRois"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"MultiFpnRois"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"RestoreIndex"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
lite/kernels/arm/distribute_fpn_proposals_compute.h
0 → 100644
浏览文件 @
d1b7aec5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/distribute_fpn_proposals_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
class
DistributeFpnProposalsCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
DistributeFpnProposalsParam
;
void
Run
()
override
;
virtual
~
DistributeFpnProposalsCompute
()
=
default
;
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/arm/elementwise_compute.cc
浏览文件 @
d1b7aec5
...
@@ -161,20 +161,21 @@ void ElementwiseSubActivationCompute::Run() {
...
@@ -161,20 +161,21 @@ void ElementwiseSubActivationCompute::Run() {
}
}
}
}
void
ElementwiseMulCompute
::
Run
()
{
template
<
typename
T
,
PrecisionType
PType
>
auto
&
param
=
Param
<
operators
::
ElementwiseParam
>
();
void
ElementwiseMulCompute
<
T
,
PType
>::
Run
()
{
const
float
*
x_data
=
param
.
X
->
data
<
float
>
();
auto
&
param
=
this
->
template
Param
<
operators
::
ElementwiseParam
>();
const
float
*
y_data
=
param
.
Y
->
data
<
float
>
();
auto
*
x_data
=
param
.
X
->
template
data
<
T
>();
float
*
out_data
=
param
.
Out
->
mutable_data
<
float
>
();
auto
*
y_data
=
param
.
Y
->
template
data
<
T
>();
auto
*
out_data
=
param
.
Out
->
template
mutable_data
<
T
>();
int
axis
=
param
.
axis
;
int
axis
=
param
.
axis
;
auto
x_dims
=
param
.
X
->
dims
();
auto
x_dims
=
param
.
X
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
auto
y_dims
=
param
.
Y
->
dims
();
int
pre
,
n
,
post
;
int
pre
,
n
,
post
;
if
(
is_broadcast
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
))
{
if
(
is_broadcast
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
))
{
lite
::
arm
::
math
::
elementwise_mul_broadcast
(
lite
::
arm
::
math
::
elementwise_mul_broadcast
<
T
>
(
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
);
x_data
,
y_data
,
out_data
,
pre
,
n
,
post
);
}
else
{
}
else
{
lite
::
arm
::
math
::
elementwise_mul
(
lite
::
arm
::
math
::
elementwise_mul
<
T
>
(
x_data
,
y_data
,
out_data
,
x_dims
.
production
());
x_data
,
y_data
,
out_data
,
x_dims
.
production
());
}
}
}
}
...
@@ -347,17 +348,24 @@ REGISTER_LITE_KERNEL(
...
@@ -347,17 +348,24 @@ REGISTER_LITE_KERNEL(
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_mul
,
using
elementwise_mul_float
=
kARM
,
paddle
::
lite
::
kernels
::
arm
::
ElementwiseMulCompute
<
float
,
PRECISION
(
kFloat
)
>
;
kFloat
,
REGISTER_LITE_KERNEL
(
kNCHW
,
elementwise_mul
,
kARM
,
kFloat
,
kNCHW
,
elementwise_mul_float
,
def
)
paddle
::
lite
::
kernels
::
arm
::
ElementwiseMulCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
using
elementwise_mul_int32
=
paddle
::
lite
::
kernels
::
arm
::
ElementwiseMulCompute
<
int
,
PRECISION
(
kInt32
)
>
;
REGISTER_LITE_KERNEL
(
elementwise_mul
,
kARM
,
kInt32
,
kNCHW
,
elementwise_mul_int32
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
REGISTER_LITE_KERNEL
(
fusion_elementwise_mul_activation
,
fusion_elementwise_mul_activation
,
kARM
,
kARM
,
...
...
lite/kernels/arm/elementwise_compute.h
浏览文件 @
d1b7aec5
...
@@ -54,8 +54,8 @@ class ElementwiseSubActivationCompute
...
@@ -54,8 +54,8 @@ class ElementwiseSubActivationCompute
virtual
~
ElementwiseSubActivationCompute
()
=
default
;
virtual
~
ElementwiseSubActivationCompute
()
=
default
;
};
};
class
ElementwiseMulCompute
template
<
typename
T
,
PrecisionType
PType
>
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
class
ElementwiseMulCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PType
>
{
public:
public:
void
Run
()
override
;
void
Run
()
override
;
...
...
lite/kernels/arm/elementwise_compute_test.cc
浏览文件 @
d1b7aec5
...
@@ -329,13 +329,13 @@ TEST(elementwise_mul_arm, retrive_op) {
...
@@ -329,13 +329,13 @@ TEST(elementwise_mul_arm, retrive_op) {
}
}
TEST
(
elementwise_mul_arm
,
init
)
{
TEST
(
elementwise_mul_arm
,
init
)
{
ElementwiseMulCompute
elementwise_mul
;
ElementwiseMulCompute
<
float
,
PRECISION
(
kFloat
)
>
elementwise_mul
;
ASSERT_EQ
(
elementwise_mul
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
elementwise_mul
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
elementwise_mul
.
target
(),
TARGET
(
kARM
));
ASSERT_EQ
(
elementwise_mul
.
target
(),
TARGET
(
kARM
));
}
}
TEST
(
elementwise_mul
,
compute
)
{
TEST
(
elementwise_mul
,
compute
)
{
ElementwiseMulCompute
elementwise_mul
;
ElementwiseMulCompute
<
float
,
PRECISION
(
kFloat
)
>
elementwise_mul
;
operators
::
ElementwiseParam
param
;
operators
::
ElementwiseParam
param
;
lite
::
Tensor
x
,
y
,
output
,
output_ref
;
lite
::
Tensor
x
,
y
,
output
,
output_ref
;
...
...
lite/kernels/arm/gather_compute.cc
浏览文件 @
d1b7aec5
...
@@ -29,7 +29,7 @@ void GatherCompute::Run() {
...
@@ -29,7 +29,7 @@ void GatherCompute::Run() {
auto
index_size
=
param
.
Index
->
dims
()[
0
];
auto
index_size
=
param
.
Index
->
dims
()[
0
];
auto
src_dims
=
param
.
X
->
dims
();
auto
src_dims
=
param
.
X
->
dims
();
const
float
*
p_src
=
param
.
X
->
data
<
float
>
();
const
float
*
p_src
=
param
.
X
->
data
<
float
>
();
const
float
*
p_index
=
param
.
Index
->
data
<
floa
t
>
();
const
int
*
p_index
=
param
.
Index
->
data
<
in
t
>
();
int
slice_size
=
1
;
int
slice_size
=
1
;
for
(
int
i
=
1
;
i
<
src_dims
.
size
();
++
i
)
{
for
(
int
i
=
1
;
i
<
src_dims
.
size
();
++
i
)
{
...
@@ -50,6 +50,8 @@ void GatherCompute::Run() {
...
@@ -50,6 +50,8 @@ void GatherCompute::Run() {
REGISTER_LITE_KERNEL
(
REGISTER_LITE_KERNEL
(
gather
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
GatherCompute
,
def
)
gather
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
GatherCompute
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Index"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
lite/kernels/arm/interpolate_compute.cc
浏览文件 @
d1b7aec5
...
@@ -84,8 +84,10 @@ REGISTER_LITE_KERNEL(bilinear_interp,
...
@@ -84,8 +84,10 @@ REGISTER_LITE_KERNEL(bilinear_interp,
paddle
::
lite
::
kernels
::
arm
::
BilinearInterpCompute
,
paddle
::
lite
::
kernels
::
arm
::
BilinearInterpCompute
,
def
)
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"OutSize"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"OutSize"
,
.
BindInput
(
"SizeTensor"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"SizeTensor"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
...
@@ -97,8 +99,10 @@ REGISTER_LITE_KERNEL(nearest_interp,
...
@@ -97,8 +99,10 @@ REGISTER_LITE_KERNEL(nearest_interp,
paddle
::
lite
::
kernels
::
arm
::
NearestInterpCompute
,
paddle
::
lite
::
kernels
::
arm
::
NearestInterpCompute
,
def
)
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"OutSize"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"OutSize"
,
.
BindInput
(
"SizeTensor"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"SizeTensor"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
lite/kernels/arm/slice_compute.cc
浏览文件 @
d1b7aec5
...
@@ -176,3 +176,14 @@ REGISTER_LITE_KERNEL(slice, kARM, kFloat, kNCHW, slice_float, def)
...
@@ -176,3 +176,14 @@ REGISTER_LITE_KERNEL(slice, kARM, kFloat, kNCHW, slice_float, def)
.
BindInput
(
"EndsTensorList"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"EndsTensorList"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
using
slice_int32
=
paddle
::
lite
::
kernels
::
arm
::
SliceCompute
<
int
,
PRECISION
(
kInt32
)
>
;
REGISTER_LITE_KERNEL
(
slice
,
kARM
,
kInt32
,
kNCHW
,
slice_int32
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"StartsTensor"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"EndsTensor"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"StartsTensorList"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"EndsTensorList"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
Finalize
();
lite/operators/CMakeLists.txt
浏览文件 @
d1b7aec5
...
@@ -100,6 +100,7 @@ add_operator(attention_padding_mask_op_lite extra SRCS attention_padding_mask_op
...
@@ -100,6 +100,7 @@ add_operator(attention_padding_mask_op_lite extra SRCS attention_padding_mask_op
add_operator
(
sequence_arithmetic_op_lite extra SRCS sequence_arithmetic_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
sequence_arithmetic_op_lite extra SRCS sequence_arithmetic_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
conditional_block_op_lite extra SRCS conditional_block_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
conditional_block_op_lite extra SRCS conditional_block_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
collect_fpn_proposals_op_lite extra SRCS collect_fpn_proposals_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
collect_fpn_proposals_op_lite extra SRCS collect_fpn_proposals_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
distribute_fpn_proposals_op_lite extra SRCS distribute_fpn_proposals_op.cc DEPS
${
op_DEPS
}
)
# for OCR specific
# for OCR specific
add_operator
(
while_op extra SRCS while_op.cc DEPS
${
op_DEPS
}
)
add_operator
(
while_op extra SRCS while_op.cc DEPS
${
op_DEPS
}
)
...
...
lite/operators/collect_fpn_proposals_op.cc
浏览文件 @
d1b7aec5
...
@@ -31,7 +31,7 @@ bool CollectFpnProposalsOpLite::CheckShape() const {
...
@@ -31,7 +31,7 @@ bool CollectFpnProposalsOpLite::CheckShape() const {
}
}
for
(
auto
item
:
param_
.
multi_level_scores
)
{
for
(
auto
item
:
param_
.
multi_level_scores
)
{
auto
dims
=
item
->
dims
();
auto
dims
=
item
->
dims
();
CHECK_OR_FALSE
(
dims
[
1
]
==
2
);
CHECK_OR_FALSE
(
dims
[
1
]
==
1
);
}
}
for
(
int
i
=
0
;
i
<
param_
.
multi_level_rois
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
param_
.
multi_level_rois
.
size
();
i
++
)
{
auto
roi
=
param_
.
multi_level_rois
[
i
];
auto
roi
=
param_
.
multi_level_rois
[
i
];
...
@@ -45,6 +45,7 @@ bool CollectFpnProposalsOpLite::CheckShape() const {
...
@@ -45,6 +45,7 @@ bool CollectFpnProposalsOpLite::CheckShape() const {
bool
CollectFpnProposalsOpLite
::
InferShape
()
const
{
bool
CollectFpnProposalsOpLite
::
InferShape
()
const
{
param_
.
fpn_rois
->
Resize
({
param_
.
post_nms_topN
,
4
});
param_
.
fpn_rois
->
Resize
({
param_
.
post_nms_topN
,
4
});
return
true
;
return
true
;
}
}
...
...
lite/operators/distribute_fpn_proposals_op.cc
0 → 100644
浏览文件 @
d1b7aec5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/distribute_fpn_proposals_op.h"
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
bool
DistributeFpnProposalsOpLite
::
CheckShape
()
const
{
CHECK_OR_FALSE
(
param_
.
fpn_rois
);
CHECK_OR_FALSE
(
param_
.
restore_index
);
CHECK_OR_FALSE
(
param_
.
multi_fpn_rois
.
size
()
>
1
);
CHECK_OR_FALSE
(
param_
.
max_level
>=
param_
.
min_level
);
size_t
num_out_rois
=
static_cast
<
size_t
>
(
param_
.
max_level
-
param_
.
min_level
+
1
);
CHECK_OR_FALSE
(
num_out_rois
==
param_
.
multi_fpn_rois
.
size
());
return
true
;
}
bool
DistributeFpnProposalsOpLite
::
InferShape
()
const
{
int
num_out_rois
=
param_
.
max_level
-
param_
.
min_level
+
1
;
for
(
int
i
=
0
;
i
<
num_out_rois
;
i
++
)
{
param_
.
multi_fpn_rois
[
i
]
->
Resize
({
-
1
,
4
});
}
param_
.
restore_index
->
Resize
({
-
1
,
1
});
return
true
;
}
bool
DistributeFpnProposalsOpLite
::
AttachImpl
(
const
cpp
::
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
{
auto
fpn_rois
=
op_desc
.
Input
(
"FpnRois"
).
front
();
param_
.
fpn_rois
=
scope
->
FindVar
(
fpn_rois
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
multi_fpn_rois
=
op_desc
.
Output
(
"MultiFpnRois"
);
for
(
const
auto
&
name
:
multi_fpn_rois
)
{
param_
.
multi_fpn_rois
.
push_back
(
scope
->
FindVar
(
name
)
->
GetMutable
<
lite
::
Tensor
>
());
}
auto
restore_index
=
op_desc
.
Output
(
"RestoreIndex"
).
front
();
param_
.
restore_index
=
scope
->
FindVar
(
restore_index
)
->
GetMutable
<
lite
::
Tensor
>
();
param_
.
min_level
=
op_desc
.
GetAttr
<
int
>
(
"min_level"
);
param_
.
max_level
=
op_desc
.
GetAttr
<
int
>
(
"max_level"
);
param_
.
refer_level
=
op_desc
.
GetAttr
<
int
>
(
"refer_level"
);
param_
.
refer_scale
=
op_desc
.
GetAttr
<
int
>
(
"refer_scale"
);
return
true
;
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
distribute_fpn_proposals
,
paddle
::
lite
::
operators
::
DistributeFpnProposalsOpLite
);
lite/operators/distribute_fpn_proposals_op.h
0 → 100644
浏览文件 @
d1b7aec5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/operators/op_params.h"
#include "lite/utils/all.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
class
DistributeFpnProposalsOpLite
:
public
OpLite
{
public:
DistributeFpnProposalsOpLite
()
{}
explicit
DistributeFpnProposalsOpLite
(
const
std
::
string
&
op_type
)
:
OpLite
(
op_type
)
{}
bool
CheckShape
()
const
override
;
bool
InferShape
()
const
override
;
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"distribute_fpn_proposals"
;
}
private:
mutable
DistributeFpnProposalsParam
param_
;
};
}
// namespace operators
}
// namespace lite
}
// namespace paddle
lite/operators/op_params.h
浏览文件 @
d1b7aec5
...
@@ -1094,6 +1094,16 @@ struct CollectFpnProposalsParam {
...
@@ -1094,6 +1094,16 @@ struct CollectFpnProposalsParam {
int
post_nms_topN
{};
int
post_nms_topN
{};
};
};
struct
DistributeFpnProposalsParam
{
const
lite
::
Tensor
*
fpn_rois
{};
std
::
vector
<
lite
::
Tensor
*>
multi_fpn_rois
{};
lite
::
Tensor
*
restore_index
{};
int
min_level
{};
int
max_level
{};
int
refer_level
{};
int
refer_scale
{};
};
/// --------------------- instance_norm operators --------------------
/// --------------------- instance_norm operators --------------------
struct
InstanceNormParam
{
struct
InstanceNormParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
x
{};
...
...
lite/tests/kernels/cast_compute_test.cc
浏览文件 @
d1b7aec5
...
@@ -80,6 +80,7 @@ class CastComputeTester : public arena::TestCase {
...
@@ -80,6 +80,7 @@ class CastComputeTester : public arena::TestCase {
}
}
void
PrepareData
()
override
{
void
PrepareData
()
override
{
SetPrecisionType
(
output_
,
PRECISION
(
kFloat
));
if
(
in_dtype_
==
20
)
{
if
(
in_dtype_
==
20
)
{
std
::
vector
<
unsigned
char
>
x_data
(
x_dims_
.
production
());
std
::
vector
<
unsigned
char
>
x_data
(
x_dims_
.
production
());
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录