Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a80b04b9
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a80b04b9
编写于
11月 28, 2018
作者:
Z
Zhen Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add fusion_conv_add_relu_int8_op and unit test.
上级
ff8141ee
变更
17
显示空白变更内容
内联
并排
Showing
17 changed file
with
743 addition
and
11 deletion
+743
-11
src/common/types.cpp
src/common/types.cpp
+2
-0
src/common/types.h
src/common/types.h
+1
-0
src/framework/op_registry.h
src/framework/op_registry.h
+18
-0
src/operators/fusion_conv_add_relu_int8_op.cpp
src/operators/fusion_conv_add_relu_int8_op.cpp
+56
-0
src/operators/fusion_conv_add_relu_int8_op.h
src/operators/fusion_conv_add_relu_int8_op.h
+44
-0
src/operators/kernel/arm/conv_add_relu_int8_kernel.cpp
src/operators/kernel/arm/conv_add_relu_int8_kernel.cpp
+39
-0
src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
...perators/kernel/central-arm-func/conv_add_relu_arm_func.h
+1
-0
src/operators/kernel/central-arm-func/conv_add_relu_int8_arm_func.h
...ors/kernel/central-arm-func/conv_add_relu_int8_arm_func.h
+125
-0
src/operators/kernel/conv_add_relu_int8_kernel.h
src/operators/kernel/conv_add_relu_int8_kernel.h
+45
-0
src/operators/math/gemm_int8.cpp
src/operators/math/gemm_int8.cpp
+3
-0
src/operators/op_param.h
src/operators/op_param.h
+30
-1
test/CMakeLists.txt
test/CMakeLists.txt
+4
-0
test/common/test_gemm_int8_accuracy.cpp
test/common/test_gemm_int8_accuracy.cpp
+12
-5
test/common/test_gemm_perf.cpp
test/common/test_gemm_perf.cpp
+2
-2
test/operators/test_fusion_conv_add_relu_int8_op.cpp
test/operators/test_fusion_conv_add_relu_int8_op.cpp
+354
-0
test/operators/test_mul_op.cpp
test/operators/test_mul_op.cpp
+3
-3
tools/op.cmake
tools/op.cmake
+4
-0
未找到文件。
src/common/types.cpp
浏览文件 @
a80b04b9
...
@@ -24,6 +24,7 @@ const char *G_OP_TYPE_CONCAT = "concat";
...
@@ -24,6 +24,7 @@ const char *G_OP_TYPE_CONCAT = "concat";
const
char
*
G_OP_TYPE_ELEMENTWISE_ADD
=
"elementwise_add"
;
const
char
*
G_OP_TYPE_ELEMENTWISE_ADD
=
"elementwise_add"
;
const
char
*
G_OP_TYPE_FILL_CONSTANT
=
"fill_constant"
;
const
char
*
G_OP_TYPE_FILL_CONSTANT
=
"fill_constant"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_RELU
=
"fusion_conv_add_relu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_RELU
=
"fusion_conv_add_relu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_RELU_INT8
=
"fusion_conv_add_relu_int8"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_PRELU
=
"fusion_conv_add_prelu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_PRELU
=
"fusion_conv_add_prelu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU
=
"fusion_conv_add_add_prelu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU
=
"fusion_conv_add_add_prelu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_BN_RELU
=
"fusion_conv_add_bn_relu"
;
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_BN_RELU
=
"fusion_conv_add_bn_relu"
;
...
@@ -111,6 +112,7 @@ std::unordered_map<
...
@@ -111,6 +112,7 @@ std::unordered_map<
{
G_OP_TYPE_DEPTHWISE_CONV
,
{{
"Input"
},
{
"Output"
}}},
{
G_OP_TYPE_DEPTHWISE_CONV
,
{{
"Input"
},
{
"Output"
}}},
{
G_OP_TYPE_FILL_CONSTANT
,
{{},
{
"Out"
}}},
{
G_OP_TYPE_FILL_CONSTANT
,
{{},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_RELU_INT8
,
{{
"Input"
},
{
"Output"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_PRELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_PRELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_IM2SEQUENCE
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_IM2SEQUENCE
,
{{
"X"
},
{
"Out"
}}},
...
...
src/common/types.h
浏览文件 @
a80b04b9
...
@@ -99,6 +99,7 @@ extern const char *G_OP_TYPE_BOX_CODER;
...
@@ -99,6 +99,7 @@ extern const char *G_OP_TYPE_BOX_CODER;
extern
const
char
*
G_OP_TYPE_CONCAT
;
extern
const
char
*
G_OP_TYPE_CONCAT
;
extern
const
char
*
G_OP_TYPE_ELEMENTWISE_ADD
;
extern
const
char
*
G_OP_TYPE_ELEMENTWISE_ADD
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_RELU
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_RELU
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_RELU_INT8
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_PRELU
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_PRELU
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU
;
extern
const
char
*
G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU
;
extern
const
char
*
G_OP_TYPE_FC
;
extern
const
char
*
G_OP_TYPE_FC
;
...
...
src/framework/op_registry.h
浏览文件 @
a80b04b9
...
@@ -98,6 +98,24 @@ class OpRegistry {
...
@@ -98,6 +98,24 @@ class OpRegistry {
}
}
};
};
#define REGISTER_OPERATOR_INT8(op_type, op_class, device_name, device_type) \
template class op_class<device_type, int8_t>; \
template <typename Dtype, typename T> \
class _OpClass_##op_type##_##device_name : public op_class<Dtype, T> { \
public: \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_##device_name, op_class); \
}; \
static paddle_mobile::framework::OperatorRegistrar< \
device_type, _OpClass_##op_type##_##device_name<device_type, int8_t>> \
__op_registrar_##op_type##_##device_name(#op_type); \
int TouchOpRegistrar_##op_type##_##device_name() { \
__op_registrar_##op_type##_##device_name.Touch(); \
return 0; \
}
#define REGISTER_OPERATOR_CPU_INT8(op_type, op_class) \
REGISTER_OPERATOR_INT8(op_type, op_class, cpu, paddle_mobile::CPU);
#define REGISTER_OPERATOR(op_type, op_class, device_name, device_type) \
#define REGISTER_OPERATOR(op_type, op_class, device_name, device_type) \
template class op_class<device_type, float>; \
template class op_class<device_type, float>; \
template <typename Dtype, typename T> \
template <typename Dtype, typename T> \
...
...
src/operators/fusion_conv_add_relu_int8_op.cpp
0 → 100644
浏览文件 @
a80b04b9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_INT8_OP
#include "operators/fusion_conv_add_relu_int8_op.h"
#include <vector>
#include "operators/math/conv_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
Dtype
,
typename
T
>
void
FusionConvAddReluInt8Op
<
Dtype
,
T
>::
InferShape
()
const
{
auto
in_dims
=
this
->
param_
.
Input
()
->
dims
();
auto
filter_dims
=
this
->
param_
.
Filter
()
->
dims
();
const
std
::
vector
<
int
>
&
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
int
groups
=
this
->
param_
.
Groups
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
PADDLE_MOBILE_ENFORCE
((
in_dims
.
size
()
==
filter_dims
.
size
()
&&
dilations
.
size
()
==
paddings
.
size
()
&&
paddings
.
size
()
==
strides
.
size
()),
"ConvParam is not suitable"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
(
math
::
ConvOutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
framework
::
DDim
ddim
=
framework
::
make_ddim
(
output_shape
);
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
}
// namespace operators
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU_INT8
(
fusion_conv_add_relu_int8
,
ops
::
FusionConvAddReluInt8Op
);
#endif
#endif // FUSION_CONVADDRELU_INT8_OP
src/operators/fusion_conv_add_relu_int8_op.h
0 → 100644
浏览文件 @
a80b04b9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_INT8_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/conv_add_relu_int8_kernel.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
template
<
typename
DeviceType
,
typename
T
>
class
FusionConvAddReluInt8Op
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionConvAddReluInt8Param
<
DeviceType
>
,
operators
::
ConvAddReluInt8Kernel
<
DeviceType
,
T
>>
{
public:
FusionConvAddReluInt8Op
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionConvAddReluInt8Param
<
DeviceType
>
,
operators
::
ConvAddReluInt8Kernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
void
InferShape
()
const
override
;
protected:
};
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_CONVADDRELU_INT8_OP
src/operators/kernel/arm/conv_add_relu_int8_kernel.cpp
0 → 100644
浏览文件 @
a80b04b9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_INT8_OP
#include "operators/kernel/conv_add_relu_int8_kernel.h"
#include "operators/kernel/central-arm-func/conv_add_relu_int8_arm_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ConvAddReluInt8Kernel
<
CPU
,
int8_t
>::
Init
(
FusionConvAddReluInt8Param
<
CPU
>
*
param
)
{
return
true
;
}
template
<
>
void
ConvAddReluInt8Kernel
<
CPU
,
int8_t
>::
Compute
(
const
FusionConvAddReluInt8Param
<
CPU
>
&
param
)
{
ConvAddReluInt8Compute
<
int8_t
>
(
param
);
}
template
class
ConvAddReluInt8Kernel
<
CPU
,
int8_t
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_CONVADDRELU_INT8_OP
src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
浏览文件 @
a80b04b9
...
@@ -33,6 +33,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam<CPU> ¶m) {
...
@@ -33,6 +33,7 @@ void ConvAddReluCompute(const FusionConvAddReluParam<CPU> ¶m) {
int
axis
=
param
.
Axis
();
int
axis
=
param
.
Axis
();
Tensor
*
output
=
param
.
Output
();
Tensor
*
output
=
param
.
Output
();
float
*
biase_data
=
bias
.
data
<
float
>
();
float
*
biase_data
=
bias
.
data
<
float
>
();
output
->
mutable_data
<
P
>
();
int
groups
=
param
.
Groups
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
...
...
src/operators/kernel/central-arm-func/conv_add_relu_int8_arm_func.h
0 → 100644
浏览文件 @
a80b04b9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_INT8_OP
#pragma once
#include <vector>
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
P
>
void
ConvAddReluInt8Compute
(
const
FusionConvAddReluInt8Param
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
bias
=
*
param
.
Bias
();
Tensor
scale
=
*
param
.
InputScale
();
int32_t
axis
=
param
.
Axis
();
Tensor
*
output
=
param
.
Output
();
output
->
mutable_data
<
P
>
();
int32_t
*
biase_data
=
bias
.
data
<
int32_t
>
();
float
scale_v
=
scale
.
data
<
float
>
()[
0
];
int32_t
groups
=
param
.
Groups
();
std
::
vector
<
int32_t
>
strides
=
param
.
Strides
();
std
::
vector
<
int32_t
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int32_t
>
dilations
=
param
.
Dilations
();
const
int32_t
batch_size
=
static_cast
<
int32_t
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
size_t
data_dim
=
filter_shape_vec
.
size
()
-
2
;
std
::
vector
<
int64_t
>
col_shape_vec
(
1
+
2
*
data_dim
);
col_shape_vec
[
0
]
=
input
->
dims
()[
1
]
/
groups
;
for
(
size_t
j
=
0
;
j
<
data_dim
;
++
j
)
{
col_shape_vec
[
j
+
1
]
=
filter_shape_vec
[
j
+
2
];
col_shape_vec
[
j
+
1
+
data_dim
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_shape_vec
));
framework
::
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
data_dim
+
1
);
bool
is_expand
=
math
::
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
P
>
(
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int32_t
>
(
input
->
dims
().
size
()));
framework
::
DDim
filter_matrix_shape
=
{
filter
.
dims
()[
0
],
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
framework
::
DDim
output_matrix_shape
=
{
output
->
dims
()[
1
],
output
->
numel
()
/
(
output
->
dims
()[
0
]
*
output
->
dims
()[
1
])};
// convolution operator: im2col(or vol2col) + gemm
int32_t
in_step
=
static_cast
<
int32_t
>
(
input
->
dims
()[
1
])
/
groups
;
int32_t
out_step
=
static_cast
<
int32_t
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
CPU
,
P
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
P
>
im2col
;
for
(
int32_t
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
for
(
int32_t
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
data_dim
==
2U
)
{
// im2col
im2col
(
in_slice
,
dilations
,
strides
,
std
::
vector
<
int32_t
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
}
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul_int8
(
filter_slice
,
false
,
col_matrix
,
false
,
scale_v
,
&
out_slice
,
static_cast
<
float
>
(
0
),
true
,
biase_data
);
}
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_CONVADDRELU_INT8_OP
src/operators/kernel/conv_add_relu_int8_kernel.h
0 → 100644
浏览文件 @
a80b04b9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_INT8_OP
#pragma once
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
DDim
;
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
ConvAddReluInt8Kernel
:
public
OpKernelBase
<
DeviceType
,
FusionConvAddReluInt8Param
<
DeviceType
>>
{
public:
void
Compute
(
const
FusionConvAddReluInt8Param
<
DeviceType
>
&
param
);
bool
Init
(
FusionConvAddReluInt8Param
<
DeviceType
>
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif // FUSION_CONVADDRELU_INT8_OP
src/operators/math/gemm_int8.cpp
浏览文件 @
a80b04b9
...
@@ -243,6 +243,9 @@ void Gemm::AddDot4x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
...
@@ -243,6 +243,9 @@ void Gemm::AddDot4x8(int32_t k, const int8_t *a, const int8_t *b, int32_t *c,
#endif // __ARM_NEON
#endif // __ARM_NEON
}
}
// The core idea of AddDot4x2 function is borrowed from the Google's gemmlowp
// open source library. The address of gemmlowp is
// https://github.com/google/gemmlowp.
void
Gemm
::
AddDot4x2
(
int32_t
k
,
const
int8_t
*
a
,
const
int8_t
*
b
,
int32_t
*
c
,
void
Gemm
::
AddDot4x2
(
int32_t
k
,
const
int8_t
*
a
,
const
int8_t
*
b
,
int32_t
*
c
,
int32_t
ldc
)
{
int32_t
ldc
)
{
#if __ARM_NEON
#if __ARM_NEON
...
...
src/operators/op_param.h
浏览文件 @
a80b04b9
...
@@ -437,7 +437,7 @@ class ConvParam : public OpParam {
...
@@ -437,7 +437,7 @@ class ConvParam : public OpParam {
#endif
#endif
pr
ivate
:
pr
otected
:
RType
*
input_
;
RType
*
input_
;
mutable
RType
*
output_
;
mutable
RType
*
output_
;
mutable
RType
*
filter_
;
mutable
RType
*
filter_
;
...
@@ -1709,6 +1709,35 @@ class FusionConvAddReluParam : public FusionConvAddParam<DeviceType> {
...
@@ -1709,6 +1709,35 @@ class FusionConvAddReluParam : public FusionConvAddParam<DeviceType> {
};
};
#endif
#endif
#ifdef FUSION_CONVADDRELU_INT8_OP
template
<
typename
Dtype
>
class
FusionConvAddReluInt8Param
:
public
ConvParam
<
Dtype
>
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
rtype
RType
;
public:
FusionConvAddReluInt8Param
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
:
ConvParam
<
Dtype
>
(
inputs
,
outputs
,
attrs
,
scope
)
{
scale_
=
OpParam
::
InputScaleFrom
<
GType
>
(
inputs
,
scope
);
bias_
=
OpParam
::
InputYFrom
<
GType
>
(
inputs
,
scope
);
axis_
=
OpParam
::
GetAttr
<
int
>
(
"axis"
,
attrs
);
}
const
RType
*
InputScale
()
const
{
return
scale_
;
}
RType
*
Bias
()
const
{
return
bias_
;
}
const
int
&
Axis
()
const
{
return
axis_
;
}
protected:
RType
*
scale_
;
RType
*
bias_
;
int
axis_
;
};
#endif
#ifdef FUSION_CONVADDPRELU_OP
#ifdef FUSION_CONVADDPRELU_OP
template
<
typename
Dtype
>
template
<
typename
Dtype
>
class
FusionConvAddPReluParam
:
public
ConvParam
<
Dtype
>
{
class
FusionConvAddPReluParam
:
public
ConvParam
<
Dtype
>
{
...
...
test/CMakeLists.txt
浏览文件 @
a80b04b9
...
@@ -324,6 +324,10 @@ if (NOT FOUND_MATCH)
...
@@ -324,6 +324,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE
(
test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h
)
ADD_EXECUTABLE
(
test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-conv-add-relu-op paddle-mobile
)
target_link_libraries
(
test-conv-add-relu-op paddle-mobile
)
# gen test
ADD_EXECUTABLE
(
test-conv-add-relu-int8-op operators/test_fusion_conv_add_relu_int8_op.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-conv-add-relu-int8-op paddle-mobile
)
# gen test
# gen test
ADD_EXECUTABLE
(
test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h
)
ADD_EXECUTABLE
(
test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-conv-add-bn-relu-op paddle-mobile
)
target_link_libraries
(
test-conv-add-bn-relu-op paddle-mobile
)
...
...
test/common/test_gemm_int8_accuracy.cpp
浏览文件 @
a80b04b9
...
@@ -65,12 +65,19 @@ int32_t qadd_int32(int32_t l, int32_t r) {
...
@@ -65,12 +65,19 @@ int32_t qadd_int32(int32_t l, int32_t r) {
return
static_cast
<
int32_t
>
(
res
);
return
static_cast
<
int32_t
>
(
res
);
}
}
// round to zero
float
round2zero
(
float
v
)
{
float
res
;
if
(
v
>
0
)
res
=
std
::
floor
(
v
);
else
if
(
v
<
0
)
res
=
std
::
ceil
(
v
);
return
res
;
}
int8_t
qscale_int32
(
int32_t
v
,
float
scale
)
{
int8_t
qscale_int32
(
int32_t
v
,
float
scale
)
{
float
res
=
static_cast
<
float
>
(
v
)
*
scale
;
float
res
=
static_cast
<
float
>
(
v
)
*
scale
;
if
(
res
>
0
)
res
=
round2zero
(
res
);
res
=
std
::
floor
(
res
);
else
if
(
res
<
0
)
res
=
std
::
ceil
(
res
);
// round to zero
if
(
res
>
127
)
if
(
res
>
127
)
return
static_cast
<
int8_t
>
(
127
);
return
static_cast
<
int8_t
>
(
127
);
else
if
(
res
<
-
127
)
else
if
(
res
<
-
127
)
...
@@ -155,7 +162,7 @@ int do_sgemm_with_bias(int m, int n, int k, bool relu, int pr) {
...
@@ -155,7 +162,7 @@ int do_sgemm_with_bias(int m, int n, int k, bool relu, int pr) {
int
lda
=
k
;
int
lda
=
k
;
int
ldb
=
n
;
int
ldb
=
n
;
int
ldc
=
n
;
int
ldc
=
n
;
float
scale
=
0.00628
;
float
scale
=
0.00628
f
;
default_random_engine
e
;
default_random_engine
e
;
uniform_int_distribution
<
int8_t
>
pixel
(
-
127
,
127
);
uniform_int_distribution
<
int8_t
>
pixel
(
-
127
,
127
);
int8_t
*
a
=
static_cast
<
int8_t
*>
(
int8_t
*
a
=
static_cast
<
int8_t
*>
(
...
...
test/common/test_gemm_perf.cpp
浏览文件 @
a80b04b9
...
@@ -103,13 +103,13 @@ int main() {
...
@@ -103,13 +103,13 @@ int main() {
// warm-up 10 times
// warm-up 10 times
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
paddle_mobile
::
operators
::
math
::
matmul_int8
(
paddle_mobile
::
operators
::
math
::
matmul_int8
(
aa_int8
,
false
,
bb_int8
,
false
,
static_cast
<
float
>
(
1
),
&
cc_int8
,
aa_int8
,
false
,
bb_int8
,
false
,
static_cast
<
float
>
(
0.618
),
&
cc_int8
,
static_cast
<
float
>
(
0
),
true
,
&
bias_data
[
0
]);
static_cast
<
float
>
(
0
),
true
,
&
bias_data
[
0
]);
}
}
auto
time5
=
time
();
auto
time5
=
time
();
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
paddle_mobile
::
operators
::
math
::
matmul_int8
(
paddle_mobile
::
operators
::
math
::
matmul_int8
(
aa_int8
,
false
,
bb_int8
,
false
,
static_cast
<
float
>
(
1
),
&
cc_int8
,
aa_int8
,
false
,
bb_int8
,
false
,
static_cast
<
float
>
(
0.618
),
&
cc_int8
,
static_cast
<
float
>
(
0
),
true
,
&
bias_data
[
0
]);
static_cast
<
float
>
(
0
),
true
,
&
bias_data
[
0
]);
}
}
auto
time6
=
time
();
auto
time6
=
time
();
...
...
test/operators/test_fusion_conv_add_relu_int8_op.cpp
0 → 100644
浏览文件 @
a80b04b9
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/fusion_conv_add_relu_int8_op.h"
namespace
paddle_mobile
{
int32_t
qadd_int32
(
int32_t
l
,
int32_t
r
)
{
int64_t
res
=
static_cast
<
int64_t
>
(
l
)
+
static_cast
<
int64_t
>
(
r
);
if
(
res
>
INT_MAX
)
return
INT_MAX
;
else
if
(
res
<
INT_MIN
)
return
INT_MIN
;
else
return
static_cast
<
int32_t
>
(
res
);
}
// round to zero
float
round2zero
(
float
v
)
{
float
res
;
if
(
v
>
0
)
res
=
std
::
floor
(
v
);
else
if
(
v
<
0
)
res
=
std
::
ceil
(
v
);
return
res
;
}
int8_t
qscale_int32
(
int32_t
v
,
float
scale
)
{
float
res
=
static_cast
<
float
>
(
v
)
*
scale
;
res
=
round2zero
(
res
);
if
(
res
>
127
)
return
static_cast
<
int8_t
>
(
127
);
else
if
(
res
<
-
127
)
return
static_cast
<
int8_t
>
(
-
127
);
else
return
static_cast
<
int8_t
>
(
res
);
}
// Reference convolution from Caffe for checking results.
// accumulate through explicit loops over input, output, and filters.
template
<
typename
T
>
void
conv2d
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
const
framework
::
Tensor
*
bias
,
const
framework
::
AttributeMap
&
attrs
,
framework
::
Tensor
*
output
,
float
scale
)
{
framework
::
AttrReader
attr_reader
(
attrs
);
std
::
vector
<
int
>
paddings
=
attr_reader
.
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
strides
=
attr_reader
.
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
dilations
=
attr_reader
.
Get
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
attr_reader
.
Get
<
int
>
(
"groups"
);
int
kernel_h
=
filter
->
dims
()[
2
];
int
kernel_w
=
filter
->
dims
()[
3
];
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
1
];
int
stride_h
=
strides
[
0
];
int
stride_w
=
strides
[
1
];
int
dilation_h
=
dilations
[
0
];
int
dilation_w
=
dilations
[
1
];
auto
in_shape
=
input
->
dims
();
auto
out_shape
=
output
->
dims
();
const
bool
has_depth
=
0
;
int
kernel_d
,
pad_d
,
stride_d
,
dilation_d
;
if
(
has_depth
)
{
kernel_d
=
kernel_h
;
stride_d
=
stride_h
;
pad_d
=
pad_h
;
dilation_d
=
dilation_h
;
}
else
{
kernel_d
=
stride_d
=
dilation_d
=
1
;
pad_d
=
0
;
}
// Groups
int
o_g
=
out_shape
[
1
]
/
groups
;
int
k_g
=
in_shape
[
1
]
/
groups
;
int
o_head
,
k_head
;
// Convolution
vector
<
int
>
weight_offset
(
4
+
has_depth
);
vector
<
int
>
in_offset
(
4
+
has_depth
);
vector
<
int
>
out_offset
(
4
+
has_depth
);
auto
offset
=
[](
const
framework
::
Tensor
*
input
,
const
vector
<
int
>
&
indics
)
{
framework
::
DDim
shape
=
input
->
dims
();
size_t
count
=
0
;
for
(
int
i
=
0
;
i
<
indics
.
size
();
++
i
)
{
count
*=
shape
[
i
];
count
+=
indics
[
i
];
}
return
count
;
};
const
T
*
in_data
=
input
->
data
<
T
>
();
const
T
*
w_data
=
filter
->
data
<
T
>
();
framework
::
Tensor
output_32
;
int32_t
*
out_data_32
=
output_32
.
mutable_data
<
int32_t
>
(
out_shape
);
memset
(
out_data_32
,
0
,
output_32
.
numel
()
*
sizeof
(
int32_t
));
for
(
int
n
=
0
;
n
<
out_shape
[
0
];
n
++
)
{
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
o_head
=
o_g
*
g
;
k_head
=
k_g
*
g
;
for
(
int
o
=
0
;
o
<
o_g
;
o
++
)
{
for
(
int
k
=
0
;
k
<
k_g
;
k
++
)
{
for
(
int
z
=
0
;
z
<
(
has_depth
?
out_shape
[
2
]
:
1
);
z
++
)
{
for
(
int
y
=
0
;
y
<
out_shape
[
2
+
has_depth
];
y
++
)
{
for
(
int
x
=
0
;
x
<
out_shape
[
3
+
has_depth
];
x
++
)
{
for
(
int
r
=
0
;
r
<
kernel_d
;
r
++
)
{
for
(
int
p
=
0
;
p
<
kernel_h
;
p
++
)
{
for
(
int
q
=
0
;
q
<
kernel_w
;
q
++
)
{
int
in_z
=
z
*
stride_d
-
pad_d
+
r
*
dilation_d
;
int
in_y
=
y
*
stride_h
-
pad_h
+
p
*
dilation_h
;
int
in_x
=
x
*
stride_w
-
pad_w
+
q
*
dilation_w
;
if
(
in_z
>=
0
&&
in_z
<
(
has_depth
?
in_shape
[
2
]
:
1
)
&&
in_y
>=
0
&&
in_y
<
in_shape
[
2
+
has_depth
]
&&
in_x
>=
0
&&
in_x
<
in_shape
[
3
+
has_depth
])
{
weight_offset
[
0
]
=
o
+
o_head
;
weight_offset
[
1
]
=
k
;
if
(
has_depth
)
{
weight_offset
[
2
]
=
r
;
}
weight_offset
[
2
+
has_depth
]
=
p
;
weight_offset
[
3
+
has_depth
]
=
q
;
in_offset
[
0
]
=
n
;
in_offset
[
1
]
=
k
+
k_head
;
if
(
has_depth
)
{
in_offset
[
2
]
=
in_z
;
}
in_offset
[
2
+
has_depth
]
=
in_y
;
in_offset
[
3
+
has_depth
]
=
in_x
;
out_offset
[
0
]
=
n
;
out_offset
[
1
]
=
o
+
o_head
;
if
(
has_depth
)
{
out_offset
[
2
]
=
z
;
}
out_offset
[
2
+
has_depth
]
=
y
;
out_offset
[
3
+
has_depth
]
=
x
;
out_data_32
[
offset
(
output
,
out_offset
)]
+=
in_data
[
offset
(
input
,
in_offset
)]
*
w_data
[
offset
(
filter
,
weight_offset
)];
}
}
}
}
}
}
}
}
}
}
}
T
*
out_data
=
output
->
mutable_data
<
T
>
();
int32_t
n
=
out_shape
[
0
];
int32_t
c
=
out_shape
[
1
];
int32_t
h
=
out_shape
[
2
];
int32_t
w
=
out_shape
[
3
];
const
int32_t
*
bias_data
=
bias
->
data
<
int32_t
>
();
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
j
=
0
;
j
<
c
;
++
j
)
{
int32_t
bias_v
=
bias_data
[
j
];
for
(
int
k
=
0
;
k
<
h
;
++
k
)
{
for
(
int
l
=
0
;
l
<
w
;
++
l
)
{
int32_t
tmp
=
out_data_32
[
i
*
c
*
h
*
w
+
j
*
h
*
w
+
k
*
w
+
l
];
tmp
=
qadd_int32
(
tmp
,
bias_v
);
tmp
=
std
::
max
(
0
,
tmp
);
out_data
[
i
*
c
*
h
*
w
+
j
*
h
*
w
+
k
*
w
+
l
]
=
qscale_int32
(
tmp
,
scale
);
}
}
}
}
}
template
<
typename
T
,
int
Kernel
,
int
Pad
,
int
Stride
>
int
TestConvOp
(
int
in_channels
,
int
in_height
,
int
in_width
,
int
out_channels
)
{
int
kernel_h
=
Kernel
;
int
kernel_w
=
Kernel
;
int
pad_h
=
Pad
;
int
pad_w
=
Pad
;
int
stride_h
=
Stride
;
int
stride_w
=
Stride
;
int
dilation_h
=
1
;
int
dilation_w
=
1
;
int
batch_size
=
1
;
int
input_c
=
in_channels
;
int
input_h
=
in_height
;
int
input_w
=
in_width
;
int
output_c
=
out_channels
;
framework
::
DDim
input_shape
=
framework
::
make_ddim
({
batch_size
,
input_c
,
input_h
,
input_w
});
framework
::
DDim
filter_shape
=
framework
::
make_ddim
({
output_c
,
input_c
,
kernel_h
,
kernel_w
});
int
kernel_extent_h
=
dilation_h
*
(
kernel_h
-
1
)
+
1
;
int
kernel_extent_w
=
dilation_w
*
(
kernel_w
-
1
)
+
1
;
int
output_h
=
(
input_h
+
2
*
pad_h
-
kernel_extent_h
)
/
stride_h
+
1
;
int
output_w
=
(
input_w
+
2
*
pad_w
-
kernel_extent_w
)
/
stride_w
+
1
;
framework
::
DDim
output_shape
=
framework
::
make_ddim
(
std
::
vector
<
int
>
({
batch_size
,
output_c
,
output_h
,
output_w
}));
framework
::
DDim
bias_shape
=
framework
::
make_ddim
({
output_c
});
VariableNameMap
inputs
;
VariableNameMap
outputs
;
auto
scope
=
std
::
make_shared
<
framework
::
Scope
>
();
inputs
[
"Input"
]
=
std
::
vector
<
std
::
string
>
({
"input"
});
inputs
[
"Filter"
]
=
std
::
vector
<
std
::
string
>
({
"filter"
});
inputs
[
"Scale"
]
=
std
::
vector
<
std
::
string
>
({
"scale"
});
inputs
[
"Y"
]
=
std
::
vector
<
std
::
string
>
({
"y"
});
outputs
[
"Output"
]
=
std
::
vector
<
std
::
string
>
({
"output"
});
auto
input_var
=
scope
.
get
()
->
Var
(
"input"
);
auto
input
=
input_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
T
>
(
input
,
input_shape
,
-
127
,
127
);
auto
filter_var
=
scope
.
get
()
->
Var
(
"filter"
);
auto
filter
=
filter_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
T
>
(
filter
,
filter_shape
,
-
127
,
127
);
auto
scale_var
=
scope
.
get
()
->
Var
(
"scale"
);
auto
scale
=
scale_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
scale
->
Resize
(
framework
::
make_ddim
({
1
}));
float
scale_v
=
0.000828
f
;
scale
->
mutable_data
<
float
>
()[
0
]
=
scale_v
;
auto
bias_var
=
scope
.
get
()
->
Var
(
"y"
);
auto
bias
=
bias_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
int32_t
>
(
bias
,
bias_shape
,
-
127
,
127
);
auto
output_var
=
scope
.
get
()
->
Var
(
"output"
);
framework
::
AttributeMap
attrs
;
attrs
[
"strides"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
stride_h
,
stride_w
}));
attrs
[
"paddings"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
pad_h
,
pad_w
}));
attrs
[
"dilations"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
dilation_h
,
dilation_w
}));
attrs
[
"groups"
].
Set
<
int
>
(
1
);
attrs
[
"axis"
].
Set
<
int
>
(
0
);
auto
*
op
=
new
operators
::
FusionConvAddReluInt8Op
<
CPU
,
int8_t
>
(
"fusion_conv_add_relu_int8"
,
inputs
,
outputs
,
attrs
,
scope
);
op
->
InferShape
();
op
->
Init
();
op
->
Run
();
framework
::
Tensor
output_cmp
;
output_cmp
.
mutable_data
<
T
>
(
output_shape
);
conv2d
<
T
>
(
input
,
filter
,
bias
,
attrs
,
&
output_cmp
,
scale_v
);
// compare results
int
eq
=
0
;
int
neq
=
0
;
auto
output
=
output_var
->
template
Get
<
framework
::
LoDTensor
>();
const
T
*
output_data
=
output
->
data
<
T
>
();
T
*
output_cmp_data
=
output_cmp
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
"The execution of test_fusion_conv_add_relu_int8_op is failed!"
);
if
(
output_data
[
i
]
==
output_cmp_data
[
i
])
{
++
eq
;
}
else
{
++
neq
;
}
}
std
::
cout
<<
"eq = "
<<
eq
<<
", neq = "
<<
neq
<<
std
::
endl
;
delete
op
;
return
0
;
}
}
// namespace paddle_mobile
int
main
(
int
argc
,
char
*
argv
[])
{
if
(
argc
<
5
)
{
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"Usage:
\n
"
<<
" ./test-conv-add-relu-int8-op in_channels in_height in_width "
"out_channels
\n
"
<<
" params:
\n
"
<<
" -in_channels: int, input image's channels
\n
"
<<
" -in_height: int, input image's height
\n
"
<<
" -in_width: int, input image's width
\n
"
<<
" -out_channels: int, conv output channels
\n
"
;
return
1
;
}
int
in_channels
=
atoi
(
argv
[
1
]);
int
in_height
=
atoi
(
argv
[
2
]);
int
in_width
=
atoi
(
argv
[
3
]);
int
out_channels
=
atoi
(
argv
[
4
]);
// kernel = 3, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8_t, kernel=3, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
3
,
1
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 0, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=0, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
0
,
2
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 1, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=1, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
1
,
2
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 3, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
3
,
2
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
0
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
1
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 3, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
3
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 5, stride = 3
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=5, stride=3"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
5
,
3
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 7, pad = 3, stride = 4
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=4"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
7
,
3
,
4
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 3, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=3, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
3
,
0
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 3, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=3, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
3
,
1
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 5, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=5, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
5
,
0
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
// kernel = 5, pad = 2, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=5, pad=2, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
5
,
2
,
1
>
(
in_channels
,
in_height
,
in_width
,
out_channels
);
}
test/operators/test_mul_op.cpp
浏览文件 @
a80b04b9
...
@@ -79,14 +79,14 @@ int TestMulOP() {
...
@@ -79,14 +79,14 @@ int TestMulOP() {
PADDLE_MOBILE_ENFORCE
(
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
c
[
i
],
"output[%d] = %d, output_cmp[%d] = %d"
,
i
,
output_data
[
i
]
==
c
[
i
],
"output[%d] = %d, output_cmp[%d] = %d"
,
i
,
static_cast
<
int32_t
>
(
output_data
[
i
]),
i
,
static_cast
<
int32_t
>
(
c
[
i
]));
static_cast
<
int32_t
>
(
output_data
[
i
]),
i
,
static_cast
<
int32_t
>
(
c
[
i
]));
if
(
static_cast
<
int
>
(
output_data
[
i
]
==
c
[
i
])
)
{
if
(
output_data
[
i
]
==
c
[
i
]
)
{
++
eq
;
++
eq
;
}
else
{
}
else
{
++
neq
;
++
neq
;
}
}
}
}
DLOG
<<
"mnk="
<<
m
<<
" "
<<
n
<<
" "
<<
k
<<
" eq="
<<
eq
std
::
cout
<<
"mnk="
<<
m
<<
" "
<<
n
<<
" "
<<
k
<<
" eq="
<<
eq
<<
" neq="
<<
neq
;
<<
" neq="
<<
neq
<<
std
::
endl
;
delete
op
;
delete
op
;
return
0
;
return
0
;
}
}
...
...
tools/op.cmake
浏览文件 @
a80b04b9
...
@@ -213,6 +213,7 @@ if(NOT FOUND_MATCH)
...
@@ -213,6 +213,7 @@ if(NOT FOUND_MATCH)
set
(
FUSION_CONVADD_OP ON
)
set
(
FUSION_CONVADD_OP ON
)
set
(
FUSION_CONVADDPRELU_OP ON
)
set
(
FUSION_CONVADDPRELU_OP ON
)
set
(
FUSION_CONVADDRELU_OP ON
)
set
(
FUSION_CONVADDRELU_OP ON
)
set
(
FUSION_CONVADDRELU_INT8_OP ON
)
set
(
FUSION_FC_OP ON
)
set
(
FUSION_FC_OP ON
)
set
(
LRN_OP ON
)
set
(
LRN_OP ON
)
set
(
MUL_OP ON
)
set
(
MUL_OP ON
)
...
@@ -306,6 +307,9 @@ endif()
...
@@ -306,6 +307,9 @@ endif()
if
(
FUSION_CONVADDRELU_OP
)
if
(
FUSION_CONVADDRELU_OP
)
add_definitions
(
-DFUSION_CONVADDRELU_OP
)
add_definitions
(
-DFUSION_CONVADDRELU_OP
)
endif
()
endif
()
if
(
FUSION_CONVADDRELU_INT8_OP
)
add_definitions
(
-DFUSION_CONVADDRELU_INT8_OP
)
endif
()
if
(
FUSION_CONVADDPRELU_OP
)
if
(
FUSION_CONVADDPRELU_OP
)
add_definitions
(
-DFUSION_CONVADDPRELU_OP
)
add_definitions
(
-DFUSION_CONVADDPRELU_OP
)
endif
()
endif
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录