Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
5e5d4ae5
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5e5d4ae5
编写于
6月 25, 2019
作者:
S
shixiaowei02
提交者:
xingzhaolong
6月 25, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add gemm-like conv
上级
e28b5a3c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
253 addition
and
70 deletion
+253
-70
paddle/fluid/lite/kernels/CMakeLists.txt
paddle/fluid/lite/kernels/CMakeLists.txt
+3
-1
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+25
-7
paddle/fluid/lite/kernels/arm/conv_compute.h
paddle/fluid/lite/kernels/arm/conv_compute.h
+1
-0
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
+218
-62
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+6
-0
未找到文件。
paddle/fluid/lite/kernels/CMakeLists.txt
浏览文件 @
5e5d4ae5
message
(
STATUS
"add lite kernels"
)
message
(
STATUS
"add lite kernels"
)
set
(
lite_kernel_deps type_system kernel_lite op_lite op_registry_lite context_lite
${
tensor_lite
}
)
set
(
lite_kernel_deps type_system kernel_lite op_lite op_registry_lite context_lite
${
tensor_lite
}
CACHE INTERNAL
""
FORCE
)
add_subdirectory
(
host
)
add_subdirectory
(
host
)
add_subdirectory
(
arm
)
add_subdirectory
(
arm
)
add_subdirectory
(
cuda
)
add_subdirectory
(
cuda
)
...
...
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
5e5d4ae5
...
@@ -92,8 +92,24 @@ void ConvCompute::Run() {
...
@@ -92,8 +92,24 @@ void ConvCompute::Run() {
// }
// }
}
}
void
ConvComputeInt8
::
PrepareForRun
()
{}
template
<
PrecisionType
Ptype_out
>
void
ConvComputeInt8
::
Run
()
{}
void
ConvComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
impl_
=
new
lite
::
arm
::
math
::
GemmLikeConvInt8
<
Ptype_out
>
;
CHECK
(
this
->
impl_
->
create
(
param
,
&
ctx
));
}
template
<
PrecisionType
Ptype_out
>
void
ConvComputeInt8
<
Ptype_out
>::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
CHECK
(
impl_
);
impl_
->
run
(
param
);
}
template
class
ConvComputeInt8
<
PRECISION
(
kInt8
)>;
template
class
ConvComputeInt8
<
PRECISION
(
kFloat
)>;
template
class
ConvComputeInt8
<
PRECISION
(
kInt32
)>;
}
// namespace arm
}
// namespace arm
}
// namespace kernels
}
// namespace kernels
...
@@ -116,8 +132,9 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
...
@@ -116,8 +132,9 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
REGISTER_LITE_KERNEL
(
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
<
PRECISION
(
kInt8
)
>
,
int8_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
.
BindInput
(
"Filter"
,
...
@@ -126,12 +143,13 @@ REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW,
...
@@ -126,12 +143,13 @@ REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kInt8
,
kNCHW
,
REGISTER_LITE_KERNEL
(
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
<
PRECISION
(
kFloat
)
>
,
fp32_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
k
Int8
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
k
Float
))})
.
Finalize
();
.
Finalize
();
paddle/fluid/lite/kernels/arm/conv_compute.h
浏览文件 @
5e5d4ae5
...
@@ -41,6 +41,7 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
...
@@ -41,6 +41,7 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
nullptr
};
nullptr
};
};
};
template
<
PrecisionType
Ptype_out
>
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
public:
using
param_t
=
operators
::
ConvParam
;
using
param_t
=
operators
::
ConvParam
;
...
...
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
浏览文件 @
5e5d4ae5
...
@@ -14,9 +14,11 @@
...
@@ -14,9 +14,11 @@
#include "paddle/fluid/lite/kernels/arm/conv_compute.h"
#include "paddle/fluid/lite/kernels/arm/conv_compute.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <limits>
#include <memory>
#include <memory>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/arm/math/type_trans.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -24,83 +26,89 @@ namespace lite {
...
@@ -24,83 +26,89 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
template
<
typename
dtype
>
static
float
compute_max_kernel
(
const
float
*
din
,
int64_t
size
)
{
void
conv_compute_ref
(
const
operators
::
ConvParam
&
param
)
{
float
max_value
=
-
std
::
numeric_limits
<
float
>::
max
();
auto
input
=
param
.
x
;
for
(
int64_t
i
=
0
;
i
<
size
;
i
++
)
{
auto
filter
=
param
.
filter
;
max_value
=
max_value
>
din
[
0
]
?
max_value
:
din
[
0
];
auto
output
=
param
.
output
;
DDim
input_dims
=
param
.
x
->
dims
();
DDim
filter_dims
=
param
.
filter
->
dims
();
DDim
output_dims
=
param
.
output
->
dims
();
std
::
vector
<
int
>
paddings
=
param
.
paddings
;
std
::
vector
<
int
>
strides
=
param
.
strides
;
std
::
vector
<
int
>
dilations
=
param
.
dilations
;
int
groups
=
param
.
groups
;
auto
input_data
=
param
.
x
->
data
<
float
>
();
auto
output_data
=
param
.
output
->
mutable_data
<
float
>
();
auto
filter_data
=
param
.
filter
->
mutable_data
<
float
>
();
const
float
*
bias_data
=
nullptr
;
if
(
param
.
bias
!=
nullptr
)
{
bias_data
=
param
.
bias
->
mutable_data
<
float
>
();
}
}
bool
flag_bias
=
bias_data
!=
nullptr
;
LOG
(
INFO
)
<<
"[max_value]: "
<<
max_value
;
bool
flag_relu
=
param
.
fuse_relu
;
return
max_value
;
}
static
std
::
vector
<
float
>
get_tensor_scale_n
(
const
float
*
in_data
,
int
axis_size
,
int64_t
inner_size
,
float
scale_factor
)
{
std
::
vector
<
float
>
scale_out
(
axis_size
);
for
(
int
c
=
0
;
c
<
axis_size
;
++
c
)
{
// num
const
float
*
ptr_in
=
in_data
+
c
*
inner_size
;
// channel*width*height
scale_out
[
c
]
=
compute_max_kernel
(
ptr_in
,
inner_size
)
/
scale_factor
;
}
for
(
auto
s
:
scale_out
)
{
LOG
(
INFO
)
<<
"[Scale out]: "
<<
s
;
}
return
scale_out
;
}
template
<
typename
Dtype1
,
typename
Dtype2
>
static
void
conv_basic
(
const
Dtype1
*
din
,
Dtype2
*
dout
,
int
num
,
int
chout
,
int
hout
,
int
wout
,
int
chin
,
int
hin
,
int
win
,
const
Dtype1
*
weights
,
const
Dtype2
*
bias
,
int
group
,
int
kernel_w
,
int
kernel_h
,
int
stride_w
,
int
stride_h
,
int
dila_w
,
int
dila_h
,
int
pad_w
,
int
pad_h
,
bool
flag_bias
,
bool
flag_relu
)
{
Dtype2
beta
=
0
;
auto
src_data
=
din
;
auto
dst_data_ref
=
dout
;
auto
weights_data
=
weights
;
auto
with_bias
=
flag_bias
;
auto
bias_data
=
bias
;
int
in_num
=
num
;
int
out_channels
=
chout
;
int
out_h
=
hout
;
int
out_w
=
wout
;
int
num
=
input_dims
[
0
];
int
in_channel
=
chin
;
int
chout
=
output_dims
[
1
];
int
in_h
=
hin
;
int
hout
=
output_dims
[
2
];
int
in_w
=
win
;
int
wout
=
output_dims
[
3
];
int
out_c_group
=
out_channels
/
group
;
int
in_c_group
=
in_channel
/
group
;
int
chin
=
input_dims
[
1
];
int
hin
=
input_dims
[
2
];
for
(
int
n
=
0
;
n
<
in_num
;
++
n
)
{
int
win
=
input_dims
[
3
];
for
(
int
g
=
0
;
g
<
group
;
++
g
)
{
int
out_c_group
=
chout
/
groups
;
int
in_c_group
=
chin
/
groups
;
int
stride_h
=
strides
[
0
];
int
stride_w
=
strides
[
1
];
int
dilation_h
=
dilations
[
0
];
int
dilation_w
=
dilations
[
1
];
int
padding_h
=
paddings
[
0
];
int
padding_w
=
paddings
[
1
];
int
kernel_h
=
filter_dims
[
2
];
int
kernel_w
=
filter_dims
[
3
];
for
(
int
n
=
0
;
n
<
num
;
++
n
)
{
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
for
(
int
oc
=
0
;
oc
<
out_c_group
;
++
oc
)
{
for
(
int
oc
=
0
;
oc
<
out_c_group
;
++
oc
)
{
for
(
int
oh
=
0
;
oh
<
hout
;
++
oh
)
{
for
(
int
oh
=
0
;
oh
<
out_h
;
++
oh
)
{
for
(
int
ow
=
0
;
ow
<
wout
;
++
ow
)
{
for
(
int
ow
=
0
;
ow
<
out_w
;
++
ow
)
{
int
out_idx
=
n
*
group
s
*
out_c_group
*
hout
*
wout
+
int
out_idx
=
n
*
group
*
out_c_group
*
out_h
*
out_w
+
g
*
out_c_group
*
hout
*
wout
+
oc
*
hout
*
wout
+
g
*
out_c_group
*
out_h
*
out_w
+
oc
*
out_h
*
out_w
+
oh
*
wout
+
ow
;
oh
*
out_w
+
ow
;
output_data
[
out_idx
]
=
Dtype2
bias_d
=
flag_bias
?
static_cast
<
float
>
(
bias_data
[
g
*
out_c_group
+
oc
])
with_bias
?
(
bias_data
[
g
*
out_c_group
+
oc
])
:
(
Dtype2
)
0
;
:
0.
f
;
dst_data_ref
[
out_idx
]
=
bias_d
;
// + dst_data_ref[out_idx] * beta
;
for
(
int
ic
=
0
;
ic
<
in_c_group
;
++
ic
)
{
for
(
int
ic
=
0
;
ic
<
in_c_group
;
++
ic
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
iw
=
ow
*
stride_w
-
pad
ding_w
+
kw
*
(
dilation
_w
);
int
iw
=
ow
*
stride_w
-
pad
_w
+
kw
*
(
dila
_w
);
int
ih
=
oh
*
stride_h
-
pad
ding_h
+
kh
*
(
dilation
_h
);
int
ih
=
oh
*
stride_h
-
pad
_h
+
kh
*
(
dila
_h
);
if
(
iw
<
0
||
iw
>=
win
)
continue
;
if
(
iw
<
0
||
iw
>=
in_w
)
continue
;
if
(
ih
<
0
||
ih
>=
hin
)
continue
;
if
(
ih
<
0
||
ih
>=
in_h
)
continue
;
int
iidx
=
n
*
chin
*
hin
*
win
+
g
*
in_c_group
*
hin
*
win
+
int
iidx
=
n
*
in_channel
*
in_h
*
in_w
+
ic
*
hin
*
win
+
ih
*
win
+
iw
;
g
*
in_c_group
*
in_h
*
in_w
+
ic
*
in_h
*
in_w
+
ih
*
in_w
+
iw
;
int
widx
=
int
widx
=
g
*
out_c_group
*
in_c_group
*
kernel_h
*
kernel_w
+
g
*
out_c_group
*
in_c_group
*
kernel_h
*
kernel_w
+
oc
*
in_c_group
*
kernel_h
*
kernel_w
+
oc
*
in_c_group
*
kernel_h
*
kernel_w
+
ic
*
kernel_h
*
kernel_w
+
kh
*
kernel_w
+
kw
;
ic
*
kernel_h
*
kernel_w
+
kh
*
kernel_w
+
kw
;
output_data
[
out_idx
]
+=
dst_data_ref
[
out_idx
]
+=
src_data
[
iidx
]
*
weights_data
[
widx
];
(
dtype
)
input_data
[
iidx
]
*
(
dtype
)
filter_data
[
widx
];
}
}
}
}
}
}
if
(
flag_relu
)
{
if
(
flag_relu
)
{
output_data
[
out_idx
]
=
dst_data_ref
[
out_idx
]
=
dst_data_ref
[
out_idx
]
>
(
Dtype2
)
0
output_data
[
out_idx
]
>
0.
f
?
output_data
[
out_idx
]
:
0.
f
;
?
dst_data_ref
[
out_idx
]
:
(
Dtype2
)
0
;
}
}
}
}
}
}
...
@@ -109,6 +117,44 @@ void conv_compute_ref(const operators::ConvParam& param) {
...
@@ -109,6 +117,44 @@ void conv_compute_ref(const operators::ConvParam& param) {
}
}
}
}
template
<
typename
Dtype1
,
typename
Dtype2
>
void
conv_compute_ref
(
const
operators
::
ConvParam
&
param
)
{
const
Dtype1
*
din
=
param
.
x
->
data
<
Dtype1
>
();
Dtype2
*
dout
=
param
.
output
->
mutable_data
<
Dtype2
>
();
int
num
=
param
.
x
->
dims
()[
0
];
int
chout
=
param
.
output
->
dims
()[
1
];
int
hout
=
param
.
output
->
dims
()[
2
];
int
wout
=
param
.
output
->
dims
()[
3
];
int
chin
=
param
.
x
->
dims
()[
1
];
int
hin
=
param
.
x
->
dims
()[
2
];
int
win
=
param
.
x
->
dims
()[
3
];
const
Dtype1
*
weights
=
param
.
filter
->
mutable_data
<
Dtype1
>
();
Dtype2
*
bias
=
nullptr
;
if
(
param
.
bias
!=
nullptr
)
{
bias
=
param
.
bias
->
mutable_data
<
Dtype2
>
();
}
int
group
=
param
.
groups
;
int
kernel_w
=
param
.
filter
->
dims
()[
2
];
int
kernel_h
=
param
.
filter
->
dims
()[
3
];
int
stride_w
=
param
.
strides
[
0
];
int
stride_h
=
param
.
strides
[
1
];
int
dila_w
=
param
.
dilations
[
0
];
int
dila_h
=
param
.
dilations
[
1
];
int
pad_w
=
param
.
paddings
[
0
];
int
pad_h
=
param
.
paddings
[
1
];
bool
flag_bias
=
(
param
.
bias
!=
nullptr
);
bool
flag_relu
=
param
.
fuse_relu
;
conv_basic
(
din
,
dout
,
num
,
chout
,
hout
,
wout
,
chin
,
hin
,
win
,
weights
,
bias
,
group
,
kernel_w
,
kernel_h
,
stride_w
,
stride_h
,
dila_w
,
dila_h
,
pad_w
,
pad_h
,
flag_bias
,
flag_relu
);
}
TEST
(
conv_arm
,
retrive_op
)
{
TEST
(
conv_arm
,
retrive_op
)
{
auto
conv
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
auto
conv
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"conv2d"
);
"conv2d"
);
...
@@ -116,12 +162,122 @@ TEST(conv_arm, retrive_op) {
...
@@ -116,12 +162,122 @@ TEST(conv_arm, retrive_op) {
ASSERT_TRUE
(
conv
.
front
());
ASSERT_TRUE
(
conv
.
front
());
}
}
TEST
(
conv_arm_int8
,
retrive_op
)
{
auto
conv
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
(
"conv2d"
);
ASSERT_FALSE
(
conv
.
empty
());
ASSERT_TRUE
(
conv
.
front
());
}
TEST
(
conv_arm
,
init
)
{
TEST
(
conv_arm
,
init
)
{
ConvCompute
conv
;
ConvCompute
conv
;
ASSERT_EQ
(
conv
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
conv
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
conv
.
target
(),
TARGET
(
kARM
));
ASSERT_EQ
(
conv
.
target
(),
TARGET
(
kARM
));
}
}
TEST
(
conv_arm_int8
,
init
)
{
ConvComputeInt8
<
PRECISION
(
kFloat
)
>
float_out
;
ASSERT_EQ
(
float_out
.
precision
(),
PRECISION
(
kInt8
));
ASSERT_EQ
(
float_out
.
target
(),
TARGET
(
kARM
));
ConvComputeInt8
<
PRECISION
(
kInt8
)
>
int8_out
;
ASSERT_EQ
(
float_out
.
precision
(),
PRECISION
(
kInt8
));
ASSERT_EQ
(
float_out
.
target
(),
TARGET
(
kARM
));
}
TEST
(
conv_arm_int8
,
compute
)
{
DeviceInfo
::
Init
();
for
(
auto
n
:
{
2
})
{
for
(
auto
ic
:
{
6
})
{
for
(
auto
oc
:
{
6
})
{
for
(
auto
ih
:
{
9
})
{
for
(
auto
iw
:
{
9
})
{
for
(
auto
flag_bias
:
{
false
,
/*true*/
})
{
for
(
auto
flag_relu
:
{
false
,
/*true*/
})
{
for
(
auto
depthwise
:
{
false
,
/*true*/
})
{
for
(
auto
dilation
:
{
1
})
{
for
(
auto
stride
:
{
1
})
{
for
(
auto
padding
:
{
0
})
{
for
(
auto
ks
:
{
1
})
{
int
group
=
1
;
if
(
depthwise
)
{
// depthwise convolution ?
group
=
oc
=
ic
;
}
const
int
dks
=
dilation
*
(
ks
-
1
)
+
1
;
int
oh
=
(
ih
+
2
*
padding
-
dks
)
/
stride
+
1
;
int
ow
=
(
iw
+
2
*
padding
-
dks
)
/
stride
+
1
;
std
::
vector
<
int64_t
>
input_shape
=
{
n
,
ic
,
ih
,
iw
};
std
::
vector
<
int64_t
>
filter_shape
=
{
oc
,
ic
/
group
,
ks
,
ks
};
std
::
vector
<
int64_t
>
output_shape
({
n
,
oc
,
oh
,
ow
});
Tensor
input_int8
;
Tensor
filter_int8
;
Tensor
output_int32
,
output_int32_ref
;
input_int8
.
Resize
(
input_shape
);
filter_int8
.
Resize
(
filter_shape
);
output_int32
.
Resize
(
output_shape
);
output_int32_ref
.
Resize
(
output_shape
);
int8_t
*
input_int8_data
=
input_int8
.
mutable_data
<
int8_t
>
();
int8_t
*
filter_int8_data
=
filter_int8
.
mutable_data
<
int8_t
>
();
for
(
int
i
=
0
;
i
<
input_int8
.
dims
().
production
();
i
++
)
{
input_int8_data
[
i
]
=
1.
f
;
}
for
(
int
i
=
0
;
i
<
filter_int8
.
dims
().
production
();
i
++
)
{
filter_int8_data
[
i
]
=
1.
f
;
}
operators
::
ConvParam
param
;
param
.
x
=
&
input_int8
;
param
.
filter
=
&
filter_int8
;
param
.
bias
=
nullptr
;
param
.
fuse_relu
=
false
;
param
.
paddings
=
std
::
vector
<
int
>
({
padding
,
padding
});
param
.
strides
=
std
::
vector
<
int
>
({
stride
,
stride
});
param
.
dilations
=
std
::
vector
<
int
>
({
dilation
,
dilation
});
param
.
groups
=
group
;
param
.
output
=
&
output_int32_ref
;
conv_compute_ref
<
int8_t
,
int
>
(
param
);
param
.
output
=
&
output_int32
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
lite
::
arm
::
math
::
GemmLikeConvInt8
<
PRECISION
(
kInt32
)
>
int8gemm_int32
;
int8gemm_int32
.
init
(
param
,
&
ctx
->
As
<
ARMContext
>
());
int8gemm_int32
.
create
(
param
,
&
ctx
->
As
<
ARMContext
>
());
int8gemm_int32
.
run
(
param
);
int32_t
*
output_int32_data
=
output_int32
.
mutable_data
<
int32_t
>
();
int32_t
*
output_int32_ref_data
=
output_int32_ref
.
mutable_data
<
int32_t
>
();
for
(
int
i
=
0
;
i
<
output_int32
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
output_int32_data
[
i
],
output_int32_ref_data
[
i
],
1e-3
);
}
}
}
}
}
}
}
}
}
}
}
}
}
}
TEST
(
conv_arm
,
compute
)
{
TEST
(
conv_arm
,
compute
)
{
DeviceInfo
::
Init
();
DeviceInfo
::
Init
();
#if 1
#if 1
...
@@ -219,7 +375,7 @@ TEST(conv_arm, compute) {
...
@@ -219,7 +375,7 @@ TEST(conv_arm, compute) {
conv
.
Launch
();
conv
.
Launch
();
// invoking ref implementation and compare results
// invoking ref implementation and compare results
param
.
output
=
&
output_ref
;
param
.
output
=
&
output_ref
;
conv_compute_ref
<
float
>
(
param
);
conv_compute_ref
<
float
,
float
>
(
param
);
auto
*
output_ref_data
=
auto
*
output_ref_data
=
output_ref
.
mutable_data
<
float
>
();
output_ref
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
...
...
paddle/fluid/lite/operators/op_params.h
浏览文件 @
5e5d4ae5
...
@@ -19,6 +19,11 @@
...
@@ -19,6 +19,11 @@
#include "paddle/fluid/lite/core/framework.pb.h"
#include "paddle/fluid/lite/core/framework.pb.h"
#include "paddle/fluid/lite/utils/all.h"
#include "paddle/fluid/lite/utils/all.h"
#define WITH_INT8_CONFIG \
bool enable_int8; \
float input_scale; \
std::vector<float> weight_scale{}; \
float output_scale;
/*
/*
* This file contains all the argument parameter data structure for operators.
* This file contains all the argument parameter data structure for operators.
*/
*/
...
@@ -147,6 +152,7 @@ struct ConvParam {
...
@@ -147,6 +152,7 @@ struct ConvParam {
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
std
::
string
data_format
{
"Anylayout"
};
std
::
string
data_format
{
"Anylayout"
};
WITH_INT8_CONFIG
};
};
// For BatchNorm op
// For BatchNorm op
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录