Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5e5d4ae5
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5e5d4ae5
编写于
6月 25, 2019
作者:
S
shixiaowei02
提交者:
xingzhaolong
6月 25, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add gemm-like conv
上级
e28b5a3c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
253 addition
and
70 deletion
+253
-70
paddle/fluid/lite/kernels/CMakeLists.txt
paddle/fluid/lite/kernels/CMakeLists.txt
+3
-1
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+25
-7
paddle/fluid/lite/kernels/arm/conv_compute.h
paddle/fluid/lite/kernels/arm/conv_compute.h
+1
-0
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
+218
-62
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+6
-0
未找到文件。
paddle/fluid/lite/kernels/CMakeLists.txt
浏览文件 @
5e5d4ae5
message
(
STATUS
"add lite kernels"
)
message
(
STATUS
"add lite kernels"
)
set
(
lite_kernel_deps type_system kernel_lite op_lite op_registry_lite context_lite
${
tensor_lite
}
)
set
(
lite_kernel_deps type_system kernel_lite op_lite op_registry_lite context_lite
${
tensor_lite
}
CACHE INTERNAL
""
FORCE
)
add_subdirectory
(
host
)
add_subdirectory
(
host
)
add_subdirectory
(
arm
)
add_subdirectory
(
arm
)
add_subdirectory
(
cuda
)
add_subdirectory
(
cuda
)
...
...
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
5e5d4ae5
...
@@ -92,8 +92,24 @@ void ConvCompute::Run() {
...
@@ -92,8 +92,24 @@ void ConvCompute::Run() {
// }
// }
}
}
void
ConvComputeInt8
::
PrepareForRun
()
{}
template
<
PrecisionType
Ptype_out
>
void
ConvComputeInt8
::
Run
()
{}
void
ConvComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
impl_
=
new
lite
::
arm
::
math
::
GemmLikeConvInt8
<
Ptype_out
>
;
CHECK
(
this
->
impl_
->
create
(
param
,
&
ctx
));
}
template
<
PrecisionType
Ptype_out
>
void
ConvComputeInt8
<
Ptype_out
>::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
CHECK
(
impl_
);
impl_
->
run
(
param
);
}
template
class
ConvComputeInt8
<
PRECISION
(
kInt8
)>;
template
class
ConvComputeInt8
<
PRECISION
(
kFloat
)>;
template
class
ConvComputeInt8
<
PRECISION
(
kInt32
)>;
}
// namespace arm
}
// namespace arm
}
// namespace kernels
}
// namespace kernels
...
@@ -116,8 +132,9 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
...
@@ -116,8 +132,9 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
REGISTER_LITE_KERNEL
(
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
<
PRECISION
(
kInt8
)
>
,
int8_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
.
BindInput
(
"Filter"
,
...
@@ -126,12 +143,13 @@ REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW,
...
@@ -126,12 +143,13 @@ REGISTER_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kInt8
,
kNCHW
,
REGISTER_LITE_KERNEL
(
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
<
PRECISION
(
kFloat
)
>
,
fp32_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
k
Int8
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
k
Float
))})
.
Finalize
();
.
Finalize
();
paddle/fluid/lite/kernels/arm/conv_compute.h
浏览文件 @
5e5d4ae5
...
@@ -41,6 +41,7 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
...
@@ -41,6 +41,7 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
nullptr
};
nullptr
};
};
};
template
<
PrecisionType
Ptype_out
>
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
public:
using
param_t
=
operators
::
ConvParam
;
using
param_t
=
operators
::
ConvParam
;
...
...
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
浏览文件 @
5e5d4ae5
...
@@ -14,9 +14,11 @@
...
@@ -14,9 +14,11 @@
#include "paddle/fluid/lite/kernels/arm/conv_compute.h"
#include "paddle/fluid/lite/kernels/arm/conv_compute.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <limits>
#include <memory>
#include <memory>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/arm/math/type_trans.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -24,83 +26,89 @@ namespace lite {
...
@@ -24,83 +26,89 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
template
<
typename
dtype
>
static
float
compute_max_kernel
(
const
float
*
din
,
int64_t
size
)
{
void
conv_compute_ref
(
const
operators
::
ConvParam
&
param
)
{
float
max_value
=
-
std
::
numeric_limits
<
float
>::
max
();
auto
input
=
param
.
x
;
for
(
int64_t
i
=
0
;
i
<
size
;
i
++
)
{
auto
filter
=
param
.
filter
;
max_value
=
max_value
>
din
[
0
]
?
max_value
:
din
[
0
];
auto
output
=
param
.
output
;
DDim
input_dims
=
param
.
x
->
dims
();
DDim
filter_dims
=
param
.
filter
->
dims
();
DDim
output_dims
=
param
.
output
->
dims
();
std
::
vector
<
int
>
paddings
=
param
.
paddings
;
std
::
vector
<
int
>
strides
=
param
.
strides
;
std
::
vector
<
int
>
dilations
=
param
.
dilations
;
int
groups
=
param
.
groups
;
auto
input_data
=
param
.
x
->
data
<
float
>
();
auto
output_data
=
param
.
output
->
mutable_data
<
float
>
();
auto
filter_data
=
param
.
filter
->
mutable_data
<
float
>
();
const
float
*
bias_data
=
nullptr
;
if
(
param
.
bias
!=
nullptr
)
{
bias_data
=
param
.
bias
->
mutable_data
<
float
>
();
}
}
bool
flag_bias
=
bias_data
!=
nullptr
;
LOG
(
INFO
)
<<
"[max_value]: "
<<
max_value
;
bool
flag_relu
=
param
.
fuse_relu
;
return
max_value
;
}
static
std
::
vector
<
float
>
get_tensor_scale_n
(
const
float
*
in_data
,
int
axis_size
,
int64_t
inner_size
,
float
scale_factor
)
{
std
::
vector
<
float
>
scale_out
(
axis_size
);
for
(
int
c
=
0
;
c
<
axis_size
;
++
c
)
{
// num
const
float
*
ptr_in
=
in_data
+
c
*
inner_size
;
// channel*width*height
scale_out
[
c
]
=
compute_max_kernel
(
ptr_in
,
inner_size
)
/
scale_factor
;
}
for
(
auto
s
:
scale_out
)
{
LOG
(
INFO
)
<<
"[Scale out]: "
<<
s
;
}
return
scale_out
;
}
template
<
typename
Dtype1
,
typename
Dtype2
>
static
void
conv_basic
(
const
Dtype1
*
din
,
Dtype2
*
dout
,
int
num
,
int
chout
,
int
hout
,
int
wout
,
int
chin
,
int
hin
,
int
win
,
const
Dtype1
*
weights
,
const
Dtype2
*
bias
,
int
group
,
int
kernel_w
,
int
kernel_h
,
int
stride_w
,
int
stride_h
,
int
dila_w
,
int
dila_h
,
int
pad_w
,
int
pad_h
,
bool
flag_bias
,
bool
flag_relu
)
{
Dtype2
beta
=
0
;
auto
src_data
=
din
;
auto
dst_data_ref
=
dout
;
auto
weights_data
=
weights
;
auto
with_bias
=
flag_bias
;
auto
bias_data
=
bias
;
int
in_num
=
num
;
int
out_channels
=
chout
;
int
out_h
=
hout
;
int
out_w
=
wout
;
int
num
=
input_dims
[
0
];
int
in_channel
=
chin
;
int
chout
=
output_dims
[
1
];
int
in_h
=
hin
;
int
hout
=
output_dims
[
2
];
int
in_w
=
win
;
int
wout
=
output_dims
[
3
];
int
out_c_group
=
out_channels
/
group
;
int
in_c_group
=
in_channel
/
group
;
int
chin
=
input_dims
[
1
];
int
hin
=
input_dims
[
2
];
for
(
int
n
=
0
;
n
<
in_num
;
++
n
)
{
int
win
=
input_dims
[
3
];
for
(
int
g
=
0
;
g
<
group
;
++
g
)
{
int
out_c_group
=
chout
/
groups
;
int
in_c_group
=
chin
/
groups
;
int
stride_h
=
strides
[
0
];
int
stride_w
=
strides
[
1
];
int
dilation_h
=
dilations
[
0
];
int
dilation_w
=
dilations
[
1
];
int
padding_h
=
paddings
[
0
];
int
padding_w
=
paddings
[
1
];
int
kernel_h
=
filter_dims
[
2
];
int
kernel_w
=
filter_dims
[
3
];
for
(
int
n
=
0
;
n
<
num
;
++
n
)
{
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
for
(
int
oc
=
0
;
oc
<
out_c_group
;
++
oc
)
{
for
(
int
oc
=
0
;
oc
<
out_c_group
;
++
oc
)
{
for
(
int
oh
=
0
;
oh
<
hout
;
++
oh
)
{
for
(
int
oh
=
0
;
oh
<
out_h
;
++
oh
)
{
for
(
int
ow
=
0
;
ow
<
wout
;
++
ow
)
{
for
(
int
ow
=
0
;
ow
<
out_w
;
++
ow
)
{
int
out_idx
=
n
*
group
s
*
out_c_group
*
hout
*
wout
+
int
out_idx
=
n
*
group
*
out_c_group
*
out_h
*
out_w
+
g
*
out_c_group
*
hout
*
wout
+
oc
*
hout
*
wout
+
g
*
out_c_group
*
out_h
*
out_w
+
oc
*
out_h
*
out_w
+
oh
*
wout
+
ow
;
oh
*
out_w
+
ow
;
output_data
[
out_idx
]
=
Dtype2
bias_d
=
flag_bias
?
static_cast
<
float
>
(
bias_data
[
g
*
out_c_group
+
oc
])
with_bias
?
(
bias_data
[
g
*
out_c_group
+
oc
])
:
(
Dtype2
)
0
;
:
0.
f
;
dst_data_ref
[
out_idx
]
=
bias_d
;
// + dst_data_ref[out_idx] * beta
;
for
(
int
ic
=
0
;
ic
<
in_c_group
;
++
ic
)
{
for
(
int
ic
=
0
;
ic
<
in_c_group
;
++
ic
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
iw
=
ow
*
stride_w
-
pad
ding_w
+
kw
*
(
dilation
_w
);
int
iw
=
ow
*
stride_w
-
pad
_w
+
kw
*
(
dila
_w
);
int
ih
=
oh
*
stride_h
-
pad
ding_h
+
kh
*
(
dilation
_h
);
int
ih
=
oh
*
stride_h
-
pad
_h
+
kh
*
(
dila
_h
);
if
(
iw
<
0
||
iw
>=
win
)
continue
;
if
(
iw
<
0
||
iw
>=
in_w
)
continue
;
if
(
ih
<
0
||
ih
>=
hin
)
continue
;
if
(
ih
<
0
||
ih
>=
in_h
)
continue
;
int
iidx
=
n
*
chin
*
hin
*
win
+
g
*
in_c_group
*
hin
*
win
+
int
iidx
=
n
*
in_channel
*
in_h
*
in_w
+
ic
*
hin
*
win
+
ih
*
win
+
iw
;
g
*
in_c_group
*
in_h
*
in_w
+
ic
*
in_h
*
in_w
+
ih
*
in_w
+
iw
;
int
widx
=
int
widx
=
g
*
out_c_group
*
in_c_group
*
kernel_h
*
kernel_w
+
g
*
out_c_group
*
in_c_group
*
kernel_h
*
kernel_w
+
oc
*
in_c_group
*
kernel_h
*
kernel_w
+
oc
*
in_c_group
*
kernel_h
*
kernel_w
+
ic
*
kernel_h
*
kernel_w
+
kh
*
kernel_w
+
kw
;
ic
*
kernel_h
*
kernel_w
+
kh
*
kernel_w
+
kw
;
output_data
[
out_idx
]
+=
dst_data_ref
[
out_idx
]
+=
src_data
[
iidx
]
*
weights_data
[
widx
];
(
dtype
)
input_data
[
iidx
]
*
(
dtype
)
filter_data
[
widx
];
}
}
}
}
}
}
if
(
flag_relu
)
{
if
(
flag_relu
)
{
output_data
[
out_idx
]
=
dst_data_ref
[
out_idx
]
=
dst_data_ref
[
out_idx
]
>
(
Dtype2
)
0
output_data
[
out_idx
]
>
0.
f
?
output_data
[
out_idx
]
:
0.
f
;
?
dst_data_ref
[
out_idx
]
:
(
Dtype2
)
0
;
}
}
}
}
}
}
...
@@ -109,6 +117,44 @@ void conv_compute_ref(const operators::ConvParam& param) {
...
@@ -109,6 +117,44 @@ void conv_compute_ref(const operators::ConvParam& param) {
}
}
}
}
template
<
typename
Dtype1
,
typename
Dtype2
>
void
conv_compute_ref
(
const
operators
::
ConvParam
&
param
)
{
const
Dtype1
*
din
=
param
.
x
->
data
<
Dtype1
>
();
Dtype2
*
dout
=
param
.
output
->
mutable_data
<
Dtype2
>
();
int
num
=
param
.
x
->
dims
()[
0
];
int
chout
=
param
.
output
->
dims
()[
1
];
int
hout
=
param
.
output
->
dims
()[
2
];
int
wout
=
param
.
output
->
dims
()[
3
];
int
chin
=
param
.
x
->
dims
()[
1
];
int
hin
=
param
.
x
->
dims
()[
2
];
int
win
=
param
.
x
->
dims
()[
3
];
const
Dtype1
*
weights
=
param
.
filter
->
mutable_data
<
Dtype1
>
();
Dtype2
*
bias
=
nullptr
;
if
(
param
.
bias
!=
nullptr
)
{
bias
=
param
.
bias
->
mutable_data
<
Dtype2
>
();
}
int
group
=
param
.
groups
;
int
kernel_w
=
param
.
filter
->
dims
()[
2
];
int
kernel_h
=
param
.
filter
->
dims
()[
3
];
int
stride_w
=
param
.
strides
[
0
];
int
stride_h
=
param
.
strides
[
1
];
int
dila_w
=
param
.
dilations
[
0
];
int
dila_h
=
param
.
dilations
[
1
];
int
pad_w
=
param
.
paddings
[
0
];
int
pad_h
=
param
.
paddings
[
1
];
bool
flag_bias
=
(
param
.
bias
!=
nullptr
);
bool
flag_relu
=
param
.
fuse_relu
;
conv_basic
(
din
,
dout
,
num
,
chout
,
hout
,
wout
,
chin
,
hin
,
win
,
weights
,
bias
,
group
,
kernel_w
,
kernel_h
,
stride_w
,
stride_h
,
dila_w
,
dila_h
,
pad_w
,
pad_h
,
flag_bias
,
flag_relu
);
}
TEST
(
conv_arm
,
retrive_op
)
{
TEST
(
conv_arm
,
retrive_op
)
{
auto
conv
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
auto
conv
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"conv2d"
);
"conv2d"
);
...
@@ -116,12 +162,122 @@ TEST(conv_arm, retrive_op) {
...
@@ -116,12 +162,122 @@ TEST(conv_arm, retrive_op) {
ASSERT_TRUE
(
conv
.
front
());
ASSERT_TRUE
(
conv
.
front
());
}
}
TEST
(
conv_arm_int8
,
retrive_op
)
{
auto
conv
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
(
"conv2d"
);
ASSERT_FALSE
(
conv
.
empty
());
ASSERT_TRUE
(
conv
.
front
());
}
TEST
(
conv_arm
,
init
)
{
TEST
(
conv_arm
,
init
)
{
ConvCompute
conv
;
ConvCompute
conv
;
ASSERT_EQ
(
conv
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
conv
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
conv
.
target
(),
TARGET
(
kARM
));
ASSERT_EQ
(
conv
.
target
(),
TARGET
(
kARM
));
}
}
TEST
(
conv_arm_int8
,
init
)
{
ConvComputeInt8
<
PRECISION
(
kFloat
)
>
float_out
;
ASSERT_EQ
(
float_out
.
precision
(),
PRECISION
(
kInt8
));
ASSERT_EQ
(
float_out
.
target
(),
TARGET
(
kARM
));
ConvComputeInt8
<
PRECISION
(
kInt8
)
>
int8_out
;
ASSERT_EQ
(
float_out
.
precision
(),
PRECISION
(
kInt8
));
ASSERT_EQ
(
float_out
.
target
(),
TARGET
(
kARM
));
}
TEST
(
conv_arm_int8
,
compute
)
{
DeviceInfo
::
Init
();
for
(
auto
n
:
{
2
})
{
for
(
auto
ic
:
{
6
})
{
for
(
auto
oc
:
{
6
})
{
for
(
auto
ih
:
{
9
})
{
for
(
auto
iw
:
{
9
})
{
for
(
auto
flag_bias
:
{
false
,
/*true*/
})
{
for
(
auto
flag_relu
:
{
false
,
/*true*/
})
{
for
(
auto
depthwise
:
{
false
,
/*true*/
})
{
for
(
auto
dilation
:
{
1
})
{
for
(
auto
stride
:
{
1
})
{
for
(
auto
padding
:
{
0
})
{
for
(
auto
ks
:
{
1
})
{
int
group
=
1
;
if
(
depthwise
)
{
// depthwise convolution ?
group
=
oc
=
ic
;
}
const
int
dks
=
dilation
*
(
ks
-
1
)
+
1
;
int
oh
=
(
ih
+
2
*
padding
-
dks
)
/
stride
+
1
;
int
ow
=
(
iw
+
2
*
padding
-
dks
)
/
stride
+
1
;
std
::
vector
<
int64_t
>
input_shape
=
{
n
,
ic
,
ih
,
iw
};
std
::
vector
<
int64_t
>
filter_shape
=
{
oc
,
ic
/
group
,
ks
,
ks
};
std
::
vector
<
int64_t
>
output_shape
({
n
,
oc
,
oh
,
ow
});
Tensor
input_int8
;
Tensor
filter_int8
;
Tensor
output_int32
,
output_int32_ref
;
input_int8
.
Resize
(
input_shape
);
filter_int8
.
Resize
(
filter_shape
);
output_int32
.
Resize
(
output_shape
);
output_int32_ref
.
Resize
(
output_shape
);
int8_t
*
input_int8_data
=
input_int8
.
mutable_data
<
int8_t
>
();
int8_t
*
filter_int8_data
=
filter_int8
.
mutable_data
<
int8_t
>
();
for
(
int
i
=
0
;
i
<
input_int8
.
dims
().
production
();
i
++
)
{
input_int8_data
[
i
]
=
1.
f
;
}
for
(
int
i
=
0
;
i
<
filter_int8
.
dims
().
production
();
i
++
)
{
filter_int8_data
[
i
]
=
1.
f
;
}
operators
::
ConvParam
param
;
param
.
x
=
&
input_int8
;
param
.
filter
=
&
filter_int8
;
param
.
bias
=
nullptr
;
param
.
fuse_relu
=
false
;
param
.
paddings
=
std
::
vector
<
int
>
({
padding
,
padding
});
param
.
strides
=
std
::
vector
<
int
>
({
stride
,
stride
});
param
.
dilations
=
std
::
vector
<
int
>
({
dilation
,
dilation
});
param
.
groups
=
group
;
param
.
output
=
&
output_int32_ref
;
conv_compute_ref
<
int8_t
,
int
>
(
param
);
param
.
output
=
&
output_int32
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
lite
::
arm
::
math
::
GemmLikeConvInt8
<
PRECISION
(
kInt32
)
>
int8gemm_int32
;
int8gemm_int32
.
init
(
param
,
&
ctx
->
As
<
ARMContext
>
());
int8gemm_int32
.
create
(
param
,
&
ctx
->
As
<
ARMContext
>
());
int8gemm_int32
.
run
(
param
);
int32_t
*
output_int32_data
=
output_int32
.
mutable_data
<
int32_t
>
();
int32_t
*
output_int32_ref_data
=
output_int32_ref
.
mutable_data
<
int32_t
>
();
for
(
int
i
=
0
;
i
<
output_int32
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
output_int32_data
[
i
],
output_int32_ref_data
[
i
],
1e-3
);
}
}
}
}
}
}
}
}
}
}
}
}
}
}
TEST
(
conv_arm
,
compute
)
{
TEST
(
conv_arm
,
compute
)
{
DeviceInfo
::
Init
();
DeviceInfo
::
Init
();
#if 1
#if 1
...
@@ -219,7 +375,7 @@ TEST(conv_arm, compute) {
...
@@ -219,7 +375,7 @@ TEST(conv_arm, compute) {
conv
.
Launch
();
conv
.
Launch
();
// invoking ref implementation and compare results
// invoking ref implementation and compare results
param
.
output
=
&
output_ref
;
param
.
output
=
&
output_ref
;
conv_compute_ref
<
float
>
(
param
);
conv_compute_ref
<
float
,
float
>
(
param
);
auto
*
output_ref_data
=
auto
*
output_ref_data
=
output_ref
.
mutable_data
<
float
>
();
output_ref
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
for
(
int
i
=
0
;
i
<
output
.
dims
().
production
();
i
++
)
{
...
...
paddle/fluid/lite/operators/op_params.h
浏览文件 @
5e5d4ae5
...
@@ -19,6 +19,11 @@
...
@@ -19,6 +19,11 @@
#include "paddle/fluid/lite/core/framework.pb.h"
#include "paddle/fluid/lite/core/framework.pb.h"
#include "paddle/fluid/lite/utils/all.h"
#include "paddle/fluid/lite/utils/all.h"
#define WITH_INT8_CONFIG \
bool enable_int8; \
float input_scale; \
std::vector<float> weight_scale{}; \
float output_scale;
/*
/*
* This file contains all the argument parameter data structure for operators.
* This file contains all the argument parameter data structure for operators.
*/
*/
...
@@ -147,6 +152,7 @@ struct ConvParam {
...
@@ -147,6 +152,7 @@ struct ConvParam {
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
std
::
string
data_format
{
"Anylayout"
};
std
::
string
data_format
{
"Anylayout"
};
WITH_INT8_CONFIG
};
};
// For BatchNorm op
// For BatchNorm op
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录