Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
f165d3e9
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f165d3e9
编写于
11月 28, 2018
作者:
R
Ray Liu
提交者:
GitHub
11月 28, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into fusion_conv_add_relu_int8_op
上级
262b72cd
067eaef4
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
466 addition
and
60 deletion
+466
-60
src/common/types.cpp
src/common/types.cpp
+3
-0
src/common/types.h
src/common/types.h
+1
-0
src/fpga/V2/api.cpp
src/fpga/V2/api.cpp
+2
-2
src/fpga/V2/api.h
src/fpga/V2/api.h
+1
-1
src/framework/load_ops.h
src/framework/load_ops.h
+4
-0
src/operators/fusion_dequant_add_bn_relu_op.cpp
src/operators/fusion_dequant_add_bn_relu_op.cpp
+40
-0
src/operators/fusion_dequant_add_bn_relu_op.h
src/operators/fusion_dequant_add_bn_relu_op.h
+76
-0
src/operators/kernel/arm/dequant_add_bn_relu_kernel.cpp
src/operators/kernel/arm/dequant_add_bn_relu_kernel.cpp
+116
-0
src/operators/kernel/arm/quantize_kernel.cpp
src/operators/kernel/arm/quantize_kernel.cpp
+13
-13
src/operators/kernel/dequant_add_bn_relu_kernel.h
src/operators/kernel/dequant_add_bn_relu_kernel.h
+37
-0
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
+1
-1
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
+1
-1
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
+1
-1
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
+2
-1
src/operators/op_param.h
src/operators/op_param.h
+46
-2
src/operators/quantize_op.cpp
src/operators/quantize_op.cpp
+4
-1
test/operators/test_quantize_op.cpp
test/operators/test_quantize_op.cpp
+111
-34
tools/op.cmake
tools/op.cmake
+5
-1
未找到文件。
src/common/types.cpp
浏览文件 @
f165d3e9
...
...
@@ -72,6 +72,8 @@ const char *G_OP_TYPE_SUM = "sum";
const
char
*
G_OP_TYPE_QUANTIZE
=
"quantize"
;
const
char
*
G_OP_TYPE_DEQUANTIZE
=
"dequantize"
;
const
char
*
G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU
=
"fusion_dequant_add_bn_relu"
;
const
char
*
G_OP_TYPE_TANH
=
"tanh"
;
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
=
"fusion_deconv_relu"
;
const
char
*
G_OP_TYPE_FUSION_DECONV_ADD
=
"fusion_deconv_add"
;
...
...
@@ -136,6 +138,7 @@ std::unordered_map<
{
G_OP_TYPE_ELEMENTWISE_MUL
,
{{
"X"
,
"Y"
},
{
"Out"
}}},
{
G_OP_TYPE_QUANTIZE
,
{{
"X"
},
{
"Out"
,
"OutScale"
}}},
{
G_OP_TYPE_DEQUANTIZE
,
{{
"X"
,
"Scale"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU
,
{{
"X"
,
"Scale"
},
{
"Out"
}}},
{
G_OP_TYPE_TANH
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_DECONV_ADD
,
{{
"Input"
},
{
"Out"
}}},
...
...
src/common/types.h
浏览文件 @
f165d3e9
...
...
@@ -139,6 +139,7 @@ extern const char *G_OP_TYPE_ELEMENTWISE_MUL;
extern
const
char
*
G_OP_TYPE_QUANTIZE
;
extern
const
char
*
G_OP_TYPE_DEQUANTIZE
;
extern
const
char
*
G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU
;
extern
const
char
*
G_OP_TYPE_TANH
;
extern
const
char
*
G_OP_TYPE_FUSION_DECONV_RELU
;
...
...
src/fpga/V2/api.cpp
浏览文件 @
f165d3e9
...
...
@@ -132,11 +132,11 @@ void format_concat_output(framework::Tensor *out, int height, int width,
}
int
format_conv_data
(
framework
::
Tensor
*
filter_tensor
,
framework
::
Tensor
*
ofm_tensor
,
float
*
bs_ptr
,
int
group
)
{
framework
::
Tensor
*
ofm_tensor
,
float
*
*
bs_ptr
,
int
group
)
{
float
max_value
=
fpga
::
filter_find_max
(
filter_tensor
);
fpga
::
format_filter
(
filter_tensor
,
max_value
,
group
);
int
aligned_num
=
get_aligned_filter_num
(
filter_tensor
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
fpga
::
format_bias_scale_array
(
bs_ptr
,
(
int
)
filter_tensor
->
dims
()[
0
],
// NOLINT
aligned_num
);
int
aligned_channel
=
fpga
::
get_conv_output_channel
(
filter_tensor
);
...
...
src/fpga/V2/api.h
浏览文件 @
f165d3e9
...
...
@@ -39,7 +39,7 @@ void format_bias_scale_array(float** bias_scale_array, int filter_num,
void
format_concat_output
(
framework
::
Tensor
*
out
,
int
height
,
int
width
,
uint32_t
out_channel
);
int
format_conv_data
(
framework
::
Tensor
*
filter_tensor
,
framework
::
Tensor
*
ofm_tensor
,
float
*
bs_ptr
,
int
group
);
framework
::
Tensor
*
ofm_tensor
,
float
*
*
bs_ptr
,
int
group
);
int
format_fc_data
(
framework
::
Tensor
*
filter_tensor
,
framework
::
Tensor
*
ofm_tensor
,
float
*
bs_ptr
);
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
...
...
src/framework/load_ops.h
浏览文件 @
f165d3e9
...
...
@@ -233,3 +233,7 @@ LOAD_OP1(quantize, CPU);
#ifdef DEQUANT_OP
LOAD_OP1
(
dequantize
,
CPU
);
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
LOAD_OP1
(
fusion_dequant_add_bn_relu
,
CPU
);
LOAD_FUSION_MATCHER
(
fusion_dequant_add_bn_relu
);
#endif
src/operators/fusion_dequant_add_bn_relu_op.cpp
0 → 100644
浏览文件 @
f165d3e9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
#include "operators/fusion_dequant_add_bn_relu_op.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
Dtype
,
typename
T
>
void
FusionDequantAddBNReluOp
<
Dtype
,
T
>::
InferShape
()
const
{
const
auto
&
input_dims
=
this
->
param_
.
input_
->
dims
();
this
->
param_
.
output_
->
Resize
(
input_dims
);
}
}
// namespace operators
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
REGISTER_FUSION_MATCHER
(
fusion_dequant_add_bn_relu
,
ops
::
FusionDequantAddBNReluMatcher
);
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_dequant_add_bn_relu
,
ops
::
FusionDequantAddBNReluOp
);
#endif
#endif
src/operators/fusion_dequant_add_bn_relu_op.h
0 → 100644
浏览文件 @
f165d3e9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/dequant_add_bn_relu_kernel.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
class
FusionDequantAddBNReluMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionDequantAddBNReluMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_DEQUANTIZE
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_ELEMENTWISE_ADD
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_BATCHNORM
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}},
{
G_OP_TYPE_BATCHNORM
,
{{
"Scale"
,
"BNScale"
},
{
"Mean"
,
"BNMean"
},
{
"Bias"
,
"BNBias"
},
{
"Variance"
,
"BNVariance"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionDequantAddBNReluOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDequantAddBNReluParam
<
DeviceType
>
,
operators
::
FusionDequantAddBNReluKernel
<
DeviceType
,
T
>>
{
public:
FusionDequantAddBNReluOp
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDequantAddBNReluParam
<
DeviceType
>
,
operators
::
FusionDequantAddBNReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
// inference output shape
void
InferShape
()
const
override
;
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/arm/dequant_add_bn_relu_kernel.cpp
0 → 100644
浏览文件 @
f165d3e9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
#include "operators/kernel/dequant_add_bn_relu_kernel.h"
#include <cmath>
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
#endif
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FusionDequantAddBNReluKernel
<
CPU
,
float
>::
Init
(
FusionDequantAddBNReluParam
<
CPU
>
*
param
)
{
// elementwise add params
const
Tensor
*
bias
=
param
->
bias_
;
// batch norm params
const
Tensor
*
bn_mean
=
param
->
bn_mean_
;
const
Tensor
*
bn_variance
=
param
->
bn_variance_
;
Tensor
*
bn_scale
=
param
->
bn_scale_
;
Tensor
*
bn_bias
=
param
->
bn_bias_
;
const
float
epsilon
=
param
->
epsilon_
;
const
float
*
bias_ptr
=
bias
->
data
<
float
>
();
const
float
*
mean_ptr
=
bn_mean
->
data
<
float
>
();
const
float
*
var_ptr
=
bn_variance
->
data
<
float
>
();
float
*
bn_scale_ptr
=
bn_scale
->
mutable_data
<
float
>
();
float
*
bn_bias_ptr
=
bn_bias
->
mutable_data
<
float
>
();
for
(
int
c
=
0
;
c
<
bn_scale
->
numel
();
++
c
)
{
float
inv_scale
=
bn_scale_ptr
[
c
]
/
(
std
::
sqrt
(
var_ptr
[
c
]
+
epsilon
));
bn_scale_ptr
[
c
]
=
inv_scale
;
bn_bias_ptr
[
c
]
=
inv_scale
*
(
bias_ptr
[
c
]
-
mean_ptr
[
c
])
+
bn_bias_ptr
[
c
];
}
return
true
;
}
template
<
>
void
FusionDequantAddBNReluKernel
<
CPU
,
float
>::
Compute
(
const
FusionDequantAddBNReluParam
<
CPU
>
&
param
)
{
const
int32_t
*
input
=
param
.
input_
->
data
<
int32_t
>
();
const
float
*
bn_scale
=
param
.
bn_scale_
->
data
<
float
>
();
const
float
*
bn_bias
=
param
.
bn_bias_
->
data
<
float
>
();
// dequantize params
const
float
activation_scale
=
param
.
activation_scale_
->
data
<
float
>
()[
0
];
const
float
weight_scale
=
param
.
weight_scale_
;
const
float
dequant_scale
=
activation_scale
/
weight_scale
;
float
*
output
=
param
.
output_
->
mutable_data
<
float
>
();
int
batch_size
=
param
.
input_
->
dims
()[
0
];
int
channels
=
param
.
input_
->
dims
()[
1
];
size_t
spatial_size
=
param
.
input_
->
dims
()[
2
]
*
param
.
input_
->
dims
()[
3
];
#pragma omp parallel for collapse(2)
for
(
int
batch
=
0
;
batch
<
batch_size
;
++
batch
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
float
scale
=
bn_scale
[
c
]
*
dequant_scale
;
float
bias
=
bn_bias
[
c
];
size_t
offset
=
(
batch
*
channels
+
c
)
*
spatial_size
;
const
int32_t
*
x
=
input
+
offset
;
float
*
y
=
output
+
offset
;
size_t
remain
=
spatial_size
;
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
int
loop
=
spatial_size
>>
4
;
remain
=
spatial_size
&
0xF
;
float32x4_t
__scale
=
vdupq_n_f32
(
scale
);
float32x4_t
__bias
=
vdupq_n_f32
(
bias
);
float32x4_t
__zero
=
vdupq_n_f32
(
0.
f
);
for
(
int
k
=
0
;
k
<
loop
;
++
k
,
x
+=
16
,
y
+=
16
)
{
int32x4_t
r0
=
vld1q_s32
(
x
);
int32x4_t
r1
=
vld1q_s32
(
x
+
4
);
int32x4_t
r2
=
vld1q_s32
(
x
+
8
);
int32x4_t
r3
=
vld1q_s32
(
x
+
12
);
float32x4_t
f0
=
vcvtq_f32_s32
(
r0
);
float32x4_t
f1
=
vcvtq_f32_s32
(
r1
);
float32x4_t
f2
=
vcvtq_f32_s32
(
r2
);
float32x4_t
f3
=
vcvtq_f32_s32
(
r3
);
f0
=
vmlaq_f32
(
__bias
,
__scale
,
f0
);
f1
=
vmlaq_f32
(
__bias
,
__scale
,
f1
);
f2
=
vmlaq_f32
(
__bias
,
__scale
,
f2
);
f3
=
vmlaq_f32
(
__bias
,
__scale
,
f3
);
f0
=
vmaxq_f32
(
__zero
,
f0
);
f1
=
vmaxq_f32
(
__zero
,
f1
);
f2
=
vmaxq_f32
(
__zero
,
f2
);
f3
=
vmaxq_f32
(
__zero
,
f3
);
vst1q_f32
(
y
,
f0
);
vst1q_f32
(
y
+
4
,
f1
);
vst1q_f32
(
y
+
8
,
f2
);
vst1q_f32
(
y
+
12
,
f3
);
}
#endif // __ARM_NEON__
for
(
int
k
=
0
;
k
<
remain
;
++
k
)
{
y
[
k
]
=
std
::
max
(
scale
*
x
[
k
]
+
bias
,
0.
f
);
}
}
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_DEQUANT_ADD_BN_RELU_OP
src/operators/kernel/arm/quantize_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -379,8 +379,8 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
const
float
*
x3
=
input3
+
h
*
input_w
;
int
loop
=
input_w
>>
4
;
int
remain
=
input_w
&
0xF
;
int
pad_loop
=
paddings
[
1
]
>>
1
;
int
pad_remain
=
paddings
[
1
]
&
0x1
;
int
pad_loop
=
paddings
[
1
]
>>
1
;
// (paddings[1] << 1) >> 2
int
pad_remain
=
(
paddings
[
1
]
<<
1
)
&
0x3
;
int
remain_steps
=
remain
;
asm
volatile
(
"vdup.f32 q0, %[scale]
\n
"
...
...
@@ -596,7 +596,7 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
"store_pad_2w_%=:
\n
"
"cmp %[pad_remain], #2
\n
"
"bl
e
store_pad_1w_%=
\n
"
"bl
t
store_pad_1w_%=
\n
"
"vst1.16 {d0[0]}, [%[y0]]!
\n
"
"vst1.16 {d0[0]}, [%[y1]]!
\n
"
"vst1.16 {d0[0]}, [%[y2]]!
\n
"
...
...
@@ -605,7 +605,7 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
"store_pad_1w_%=:
\n
"
"cmp %[pad_remain], #1
\n
"
"bl
e
end_%=
\n
"
"bl
t
end_%=
\n
"
"vst1.8 {d0[0]}, [%[y0]]!
\n
"
"vst1.8 {d0[0]}, [%[y1]]!
\n
"
"vst1.8 {d0[0]}, [%[y2]]!
\n
"
...
...
@@ -669,8 +669,8 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
const
float
*
x0
=
input0
+
h
*
input_w
;
int
loop
=
input_w
>>
4
;
int
remain
=
input_w
&
0xF
;
int
pad_loop
=
paddings
[
1
]
>>
1
;
int
pad_remain
=
paddings
[
1
]
&
0x1
;
int
pad_loop
=
paddings
[
1
]
>>
1
;
// (paddings[1] << 1) >> 2
int
pad_remain
=
(
paddings
[
1
]
<<
1
)
&
0x3
;
asm
volatile
(
"vdup.f32 q0, %[scale]
\n
"
"cmp %[loop], #0
\n
"
...
...
@@ -754,14 +754,14 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
"pad_remain_%=:
\n
"
"cmp %[pad_remain], #2
\n
"
"bl
e
store_pad_1w_%=
\n
"
"bl
t
store_pad_1w_%=
\n
"
"vst1.16 {d0[0]}, [%[y0]]!
\n
"
"sub %[pad_remain], #2
\n
"
"store_pad_1w_%=:
\n
"
"cmp %[pad_remain], #1
\n
"
"bl
e
end_%=
\n
"
"vst1.8
{d0[0]}, [%[y0]]!
\n
"
"bl
t
end_%=
\n
"
"vst1.8
{d0[0]}, [%[y0]]!
\n
"
"end_%=:
\n
"
:
[
x0
]
"+r"
(
x0
),
[
y0
]
"+r"
(
y0
),
[
loop
]
"+r"
(
loop
),
[
remain
]
"+r"
(
remain
),
[
pad_loop
]
"+r"
(
pad_loop
),
...
...
@@ -795,10 +795,10 @@ void QuantizeKernel<CPU, float>::Compute(const QuantizeParam<CPU> ¶m) {
// only support int8 currently
float
scale
=
127
/
max_abs
;
param
.
online_scale_
->
mutable_data
<
float
>
()[
0
]
=
max_abs
;
//
const auto &paddings = param.paddings_;
std
::
vector
<
int
>
paddings
=
{
0
,
0
};
//
const auto padding_val = param.padding_val_;
int8_t
padding_val
=
127
;
const
auto
&
paddings
=
param
.
paddings_
;
//
std::vector<int> paddings = {0, 0};
// const auto padding_val = param.padding_val_;
int8_t
padding_val
=
0
;
switch
(
param
.
round_type_
)
{
case
ROUND_NEAREST_TO_EVEN
:
quantize_round_to_even
(
input
,
scale
,
paddings
,
padding_val
,
output
);
...
...
src/operators/kernel/dequant_add_bn_relu_kernel.h
0 → 100644
浏览文件 @
f165d3e9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
#include "framework/operator.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
DeviceType
,
typename
T
>
class
FusionDequantAddBNReluKernel
:
public
framework
::
OpKernelBase
<
DeviceType
,
FusionDequantAddBNReluParam
<
DeviceType
>>
{
public:
void
Compute
(
const
FusionDequantAddBNReluParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionDequantAddBNReluParam
<
DeviceType
>
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/fpga/V2/conv_add_bn_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -58,7 +58,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -56,7 +56,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/V2/conv_add_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -38,7 +38,7 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
bs_ptr
[
i
]
=
bias_ptr
[
i
];
}
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/V2/conv_add_relu_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -38,7 +38,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bs_ptr
[
i
]
=
bias_ptr
[
i
];
}
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/V2/conv_bn_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -50,7 +50,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/kernel/fpga/V2/conv_bn_relu_kernel.cpp
浏览文件 @
f165d3e9
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include "fpga/V2/filter.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -50,7 +51,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
fpga
::
format_conv_data
(
filter
,
out
,
bs_ptr
,
param
->
Groups
());
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
...
...
src/operators/op_param.h
浏览文件 @
f165d3e9
...
...
@@ -2555,7 +2555,7 @@ class QuantizeParam : public OpParam {
output_
=
OutFrom
<
GType
>
(
outputs
,
scope
);
// online
// scale = max(abs(x))
online_scale_
=
GetVarValue
<
GType
>
(
"OutScale"
,
outputs
,
scope
);
online_scale_
=
OpParam
::
GetVarValue
<
GType
>
(
"OutScale"
,
outputs
,
scope
);
// offline
if
(
HasAttr
(
"static_scale"
,
attrs
))
{
is_static_
=
true
;
...
...
@@ -2565,6 +2565,11 @@ class QuantizeParam : public OpParam {
if
(
HasAttr
(
"round_type"
,
attrs
))
{
round_type_
=
GetAttr
<
RoundType
>
(
"round_type"
,
attrs
);
}
// get paddings
paddings_
=
std
::
vector
<
int
>
({
0
,
0
});
if
(
HasAttr
(
"paddings"
,
attrs
))
{
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
}
}
public:
...
...
@@ -2598,7 +2603,7 @@ class DequantizeParam : public OpParam {
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
output_
=
OutFrom
<
GType
>
(
outputs
,
scope
);
activation_scale_
=
GetVarValue
<
GType
>
(
"Scale"
,
inputs
,
scope
);
activation_scale_
=
OpParam
::
GetVarValue
<
GType
>
(
"Scale"
,
inputs
,
scope
);
// dequantization is performed as x = x / static_scale / online_scale
if
(
HasAttr
(
"weight_scale"
,
attrs
))
{
weight_scale_
=
GetAttr
<
float
>
(
"weight_scale"
,
attrs
);
...
...
@@ -2617,5 +2622,44 @@ class DequantizeParam : public OpParam {
};
#endif
#ifdef FUSION_DEQUANT_ADD_BN_RELU_OP
template
<
typename
Dtype
>
class
FusionDequantAddBNReluParam
:
public
DequantizeParam
<
Dtype
>
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
rtype
RType
;
public:
FusionDequantAddBNReluParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
:
DequantizeParam
<
Dtype
>
(
inputs
,
outputs
,
attrs
,
scope
)
{
// element wise add params
axis_
=
OpParam
::
GetAttr
<
int
>
(
"axis"
,
attrs
);
bias_
=
OpParam
::
InputYFrom
<
GType
>
(
inputs
,
scope
);
// batch norm params
bn_mean_
=
OpParam
::
GetVarValue
<
GType
>
(
"BNMean"
,
inputs
,
scope
);
bn_variance_
=
OpParam
::
GetVarValue
<
GType
>
(
"BNVariance"
,
inputs
,
scope
);
bn_scale_
=
OpParam
::
GetVarValue
<
GType
>
(
"BNScale"
,
inputs
,
scope
);
bn_bias_
=
OpParam
::
GetVarValue
<
GType
>
(
"BNBias"
,
inputs
,
scope
);
epsilon_
=
OpParam
::
GetAttr
<
float
>
(
"epsilon"
,
attrs
);
// output
output_
=
OpParam
::
OutFrom
<
GType
>
(
outputs
,
scope
);
}
public:
// elementwise add
int
axis_
;
RType
*
bias_
;
// batch norm
RType
*
bn_mean_
;
RType
*
bn_variance_
;
RType
*
bn_scale_
;
RType
*
bn_bias_
;
float
epsilon_
;
// output
RType
*
output_
;
};
#endif
}
// namespace operators
}
// namespace paddle_mobile
src/operators/quantize_op.cpp
浏览文件 @
f165d3e9
...
...
@@ -22,7 +22,10 @@ namespace operators {
template
<
typename
DeviceType
,
typename
T
>
void
QuantizeOp
<
DeviceType
,
T
>::
InferShape
()
const
{
const
auto
&
input_dims
=
this
->
param_
.
input_
->
dims
();
auto
input_dims
=
this
->
param_
.
input_
->
dims
();
const
std
::
vector
<
int
>
&
paddings
=
this
->
param_
.
paddings_
;
input_dims
[
2
]
+=
2
*
paddings
[
0
];
input_dims
[
3
]
+=
2
*
paddings
[
1
];
this
->
param_
.
output_
->
Resize
(
input_dims
);
auto
scale_dims
=
framework
::
make_ddim
(
std
::
vector
<
int
>
{
1
});
this
->
param_
.
online_scale_
->
Resize
(
scale_dims
);
...
...
test/operators/test_quantize_op.cpp
浏览文件 @
f165d3e9
...
...
@@ -12,58 +12,131 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/quantize_op.h"
namespace
paddle_mobile
{
static
float
find_abs_max
(
const
Tensor
*
input
)
{
float
max_abs
=
0.
f
;
const
float
*
x
=
input
->
data
<
const
float
>
();
size_t
size
=
input
->
numel
();
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
float
value
=
std
::
abs
(
x
[
i
]);
if
(
value
>
max_abs
)
{
max_abs
=
value
;
}
}
return
max_abs
;
namespace
round
{
enum
RoundType
{
RoundToEven
=
0
,
RoundAwayZero
=
1
,
RoundTowardsZero
=
2
,
};
}
static
void
quantize_round_to_even
(
const
Tensor
*
input
,
const
float
scale
,
Tensor
*
output
)
{
const
float
*
x
=
input
->
data
<
const
float
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
size_t
size
=
input
->
numel
();
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
float
value
=
x
[
i
]
*
scale
;
float
v
=
round
(
value
);
template
<
round
::
RoundType
T
>
struct
Round
{
int8_t
operator
()(
float
x
);
};
template
<
>
struct
Round
<
round
::
RoundAwayZero
>
{
int8_t
operator
()(
float
x
)
{
return
std
::
round
(
x
);
}
};
template
<
>
struct
Round
<
round
::
RoundTowardsZero
>
{
int8_t
operator
()(
float
x
)
{
return
int8_t
(
x
);
}
};
template
<
>
struct
Round
<
round
::
RoundToEven
>
{
int8_t
operator
()(
float
x
)
{
int8_t
ret
=
0
;
float
v
=
std
::
round
(
x
);
int32_t
q
=
(
int32_t
)
v
;
if
(
abs
(
abs
(
q
-
value
)
-
0.5
)
>
0
)
{
y
[
i
]
=
q
;
if
(
abs
(
abs
(
q
-
x
)
-
0.5
)
>
0
)
{
ret
=
q
;
}
else
{
if
(
abs
(
q
)
%
2
==
0
)
{
y
[
i
]
=
q
;
ret
=
q
;
}
else
{
y
[
i
]
=
q
+
((
q
>
0
)
?
-
1
:
1
);
ret
=
q
+
((
q
>
0
)
?
-
1
:
1
);
}
}
return
ret
;
}
};
template
<
round
::
RoundType
T
>
static
void
quantize
(
const
Tensor
*
input
,
const
float
scale
,
const
int
pad
,
const
int8_t
pad_val
,
Tensor
*
output
)
{
int
batch_size
=
input
->
dims
()[
0
];
int
channels
=
input
->
dims
()[
1
];
int
input_h
=
input
->
dims
()[
2
];
int
input_w
=
input
->
dims
()[
3
];
int
output_h
=
output
->
dims
()[
2
];
int
output_w
=
output
->
dims
()[
3
];
size_t
input_spatial
=
input_h
*
input_w
;
size_t
output_spatial
=
output_h
*
output_w
;
const
float
*
x
=
input
->
data
<
const
float
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
for
(
int
nc
=
0
;
nc
<
batch_size
*
channels
;
++
nc
)
{
const
float
*
xh
=
x
+
nc
*
input_spatial
;
int8_t
*
yh
=
y
+
nc
*
output_spatial
;
// pad top
for
(
int
h
=
0
;
h
<
pad
;
++
h
,
yh
+=
output_w
)
{
for
(
int
w
=
0
;
w
<
output_w
;
++
w
)
{
yh
[
w
]
=
pad_val
;
}
}
for
(
int
h
=
0
;
h
<
input_h
;
++
h
,
yh
+=
output_w
,
xh
+=
input_w
)
{
// pad left
for
(
int
w
=
0
;
w
<
pad
;
++
w
)
{
yh
[
w
]
=
pad_val
;
}
for
(
int
w
=
0
;
w
<
input_w
;
++
w
)
{
yh
[
w
+
pad
]
=
Round
<
T
>
()(
xh
[
w
]
*
scale
);
}
// pad right
for
(
int
w
=
0
;
w
<
pad
;
++
w
)
{
yh
[
pad
+
input_w
+
w
]
=
pad_val
;
}
}
// pad bottom
for
(
int
h
=
0
;
h
<
pad
;
++
h
,
yh
+=
output_w
)
{
for
(
int
w
=
0
;
w
<
output_w
;
++
w
)
{
yh
[
w
]
=
pad_val
;
}
}
}
}
static
void
quantize_round_to_nearest
(
const
Tensor
*
input
,
const
float
scale
,
Tensor
*
output
)
{
static
float
find_abs_max
(
const
Tensor
*
input
)
{
float
max_abs
=
0.
f
;
const
float
*
x
=
input
->
data
<
const
float
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
size_t
size
=
input
->
numel
();
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
y
[
i
]
=
round
(
x
[
i
]
*
scale
);
float
value
=
std
::
abs
(
x
[
i
]);
if
(
value
>
max_abs
)
{
max_abs
=
value
;
}
}
return
max_abs
;
}
int
TestQuqntizeOp
()
{
framework
::
DDim
dim
=
framework
::
make_ddim
({
1
,
3
,
224
,
224
});
int
TestQuqntizeOp
(
int
argc
,
char
*
argv
[])
{
if
(
argc
<
5
)
{
std
::
cout
<<
"Usage: ./test-quantize-op batch_size channel height width [pad]"
<<
std
::
endl
;
return
1
;
}
int
pad
=
0
;
int
batch_size
=
atoi
(
argv
[
1
]);
int
channel
=
atoi
(
argv
[
2
]);
int
height
=
atoi
(
argv
[
3
]);
int
width
=
atoi
(
argv
[
4
]);
if
(
argc
==
6
)
{
pad
=
atoi
(
argv
[
5
]);
}
std
::
cout
<<
"batch_size: "
<<
batch_size
<<
", channel: "
<<
channel
<<
", height: "
<<
height
<<
", width: "
<<
width
<<
std
::
endl
;
framework
::
DDim
dim
=
framework
::
make_ddim
({
batch_size
,
channel
,
height
,
width
});
VariableNameMap
inputs
;
VariableNameMap
outputs
;
...
...
@@ -80,6 +153,7 @@ int TestQuqntizeOp() {
auto
output_scale_var
=
scope
.
get
()
->
Var
(
"output_scale"
);
framework
::
AttributeMap
attrs
;
attrs
[
"paddings"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
pad
,
pad
}));
auto
*
op
=
new
operators
::
QuantizeOp
<
CPU
,
float
>
(
"quantize"
,
inputs
,
outputs
,
attrs
,
scope
);
op
->
InferShape
();
...
...
@@ -96,10 +170,11 @@ int TestQuqntizeOp() {
output_scale_cmp
,
output_scale_data
[
0
]);
framework
::
Tensor
output_cmp
;
output_cmp
.
Resize
(
dim
);
output_cmp
.
Resize
(
output
->
dims
()
);
float
scale
=
127
/
output_scale_cmp
;
// quantize_round_to_even(input, scale, &output_cmp);
quantize_round_to_nearest
(
input
,
scale
,
&
output_cmp
);
// quantize<round::RoundToEven>(input, scale, pad, 0, &output_cmp);
// quantize<round::RoundAwayZero>(input, scale, pad, 0, &output_cmp);
quantize
<
round
::
RoundTowardsZero
>
(
input
,
scale
,
pad
,
0
,
&
output_cmp
);
int8_t
*
output_cmp_data
=
output_cmp
.
data
<
int8_t
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
...
...
@@ -113,4 +188,6 @@ int TestQuqntizeOp() {
}
// namespace paddle_mobile
int
main
()
{
return
paddle_mobile
::
TestQuqntizeOp
();
}
int
main
(
int
argc
,
char
*
argv
[])
{
return
paddle_mobile
::
TestQuqntizeOp
(
argc
,
argv
);
}
tools/op.cmake
浏览文件 @
f165d3e9
...
...
@@ -250,6 +250,7 @@ if(NOT FOUND_MATCH)
set
(
SUM_OP ON
)
set
(
QUANT_OP ON
)
set
(
DEQUANT_OP ON
)
set
(
FUSION_DEQUANT_ADD_BN_RELU ON
)
endif
()
# option(BATCHNORM_OP "" ON)
...
...
@@ -454,6 +455,9 @@ endif()
if
(
DEQUANT_OP
)
add_definitions
(
-DDEQUANT_OP
)
endif
()
if
(
FUSION_DEQUANT_ADD_BN_RELU
)
add_definitions
(
-DFUSION_DEQUANT_ADD_BN_RELU_OP
)
endif
()
if
(
TANH_OP
)
add_definitions
(
-DTANH_OP
)
...
...
@@ -466,4 +470,4 @@ if (FUSION_DECONVADD_OP)
endif
()
if
(
FUSION_DECONVADDRELU_OP
)
add_definitions
(
-DFUSION_DECONVADDRELU_OP
)
endif
()
\ No newline at end of file
endif
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录