Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
8e11ee09
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
338
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8e11ee09
编写于
10月 23, 2018
作者:
R
Ray Liu
提交者:
GitHub
10月 23, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into fill_constant_op-dev
上级
2e0a06d6
ac7b0bc2
变更
37
展开全部
显示空白变更内容
内联
并排
Showing
37 changed file
with
2491 addition
and
602 deletion
+2491
-602
src/common/variant.h
src/common/variant.h
+4
-2
src/framework/attribute.h
src/framework/attribute.h
+1
-1
src/framework/selected_rows.h
src/framework/selected_rows.h
+1
-1
src/framework/tensor.h
src/framework/tensor.h
+3
-1
src/io/executor.cpp
src/io/executor.cpp
+6
-4
src/operators/dequantize_op.cpp
src/operators/dequantize_op.cpp
+4
-0
src/operators/dequantize_op.h
src/operators/dequantize_op.h
+4
-0
src/operators/elementwise_mul_op.cpp
src/operators/elementwise_mul_op.cpp
+1
-1
src/operators/kernel/arm/dequantize_kernel.cpp
src/operators/kernel/arm/dequantize_kernel.cpp
+3
-2
src/operators/kernel/arm/quantize_kernel.cpp
src/operators/kernel/arm/quantize_kernel.cpp
+8
-7
src/operators/kernel/central-arm-func/conv_arm_func.h
src/operators/kernel/central-arm-func/conv_arm_func.h
+81
-21
src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
...erators/kernel/central-arm-func/depthwise_conv_arm_func.h
+1
-1
src/operators/kernel/central-arm-func/elementwise_add_arm_func.h
...rators/kernel/central-arm-func/elementwise_add_arm_func.h
+57
-0
src/operators/kernel/central-arm-func/relu_arm_func.h
src/operators/kernel/central-arm-func/relu_arm_func.h
+97
-65
src/operators/kernel/central-arm-func/sum_arm_func.h
src/operators/kernel/central-arm-func/sum_arm_func.h
+7
-16
src/operators/kernel/dequantize_kernel.h
src/operators/kernel/dequantize_kernel.h
+4
-0
src/operators/kernel/elementwise_mul_kernel.h
src/operators/kernel/elementwise_mul_kernel.h
+0
-2
src/operators/kernel/quantize_kernel.h
src/operators/kernel/quantize_kernel.h
+4
-0
src/operators/kernel/sum_kernel.h
src/operators/kernel/sum_kernel.h
+0
-2
src/operators/math/conv3x3_arm_int8.cpp
src/operators/math/conv3x3_arm_int8.cpp
+761
-0
src/operators/math/conv5x5_arm_int8.cpp
src/operators/math/conv5x5_arm_int8.cpp
+551
-0
src/operators/math/conv_arm_int8.h
src/operators/math/conv_arm_int8.h
+37
-0
src/operators/math/im2col.cpp
src/operators/math/im2col.cpp
+437
-393
src/operators/math/math_function.h
src/operators/math/math_function.h
+1
-0
src/operators/math/pad.cpp
src/operators/math/pad.cpp
+52
-0
src/operators/math/pad.h
src/operators/math/pad.h
+31
-0
src/operators/math/vol2col.cpp
src/operators/math/vol2col.cpp
+2
-59
src/operators/op_param.h
src/operators/op_param.h
+6
-4
src/operators/quantize_op.cpp
src/operators/quantize_op.cpp
+4
-0
src/operators/quantize_op.h
src/operators/quantize_op.h
+4
-0
src/operators/sum_op.cpp
src/operators/sum_op.cpp
+1
-1
test/CMakeLists.txt
test/CMakeLists.txt
+4
-0
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+11
-7
test/operators/test_dequantize_op.cpp
test/operators/test_dequantize_op.cpp
+1
-1
test/operators/test_int8_conv_op.cpp
test/operators/test_int8_conv_op.cpp
+279
-0
test/operators/test_quantize_op.cpp
test/operators/test_quantize_op.cpp
+14
-11
tools/op.cmake
tools/op.cmake
+9
-0
未找到文件。
src/common/variant.h
浏览文件 @
8e11ee09
...
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#include <cstdlib>
#include <cstdlib>
#include <cstring>
#include <cstring>
#include <string>
#include "common/enforce.h"
#include "common/enforce.h"
#include "common/log.h"
#include "common/log.h"
#pragma once
namespace
paddle_mobile
{
namespace
paddle_mobile
{
template
<
int
ID
,
typename
Type
>
template
<
int
ID
,
typename
Type
>
struct
IDToType
{
struct
IDToType
{
typedef
Type
type_t
;
typedef
Type
type_t
;
...
...
src/framework/attribute.h
浏览文件 @
8e11ee09
...
@@ -156,7 +156,7 @@ class AttrReader {
...
@@ -156,7 +156,7 @@ class AttrReader {
template
<
typename
T
>
template
<
typename
T
>
inline
T
Get
(
const
string
&
name
)
const
{
inline
T
Get
(
const
string
&
name
)
const
{
PADDLE_MOBILE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
PADDLE_MOBILE_ENFORCE
(
attrs_
.
count
(
name
)
!=
0
,
"%s should be in AttributeMap"
,
name
);
"%s should be in AttributeMap"
,
name
.
c_str
()
);
return
((
Attribute
)
attrs_
.
at
(
name
)).
Get
<
T
>
();
return
((
Attribute
)
attrs_
.
at
(
name
)).
Get
<
T
>
();
}
}
...
...
src/framework/selected_rows.h
浏览文件 @
8e11ee09
...
@@ -18,9 +18,9 @@ limitations under the License. */
...
@@ -18,9 +18,9 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "framework/lod_tensor.h"
#include "framework/lod_tensor.h"
#include "framework/mixed_vector.h"
#include "framework/tensor.h"
#include "framework/tensor.h"
#include "memory/t_malloc.h"
#include "memory/t_malloc.h"
#include "mixed_vector.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
...
...
src/framework/tensor.h
浏览文件 @
8e11ee09
...
@@ -343,7 +343,9 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
...
@@ -343,7 +343,9 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
}
else
if
(
tensor
.
type
()
==
typeid
(
int64_t
))
{
}
else
if
(
tensor
.
type
()
==
typeid
(
int64_t
))
{
printer
<<
tensor
.
data
<
int64_t
>
()[
i
]
<<
" "
;
printer
<<
tensor
.
data
<
int64_t
>
()[
i
]
<<
" "
;
}
else
if
(
tensor
.
type
()
==
typeid
(
int8_t
))
{
}
else
if
(
tensor
.
type
()
==
typeid
(
int8_t
))
{
printer
<<
static_cast
<
int32_t
>
(
tensor
.
data
<
int8_t
>
()[
i
])
<<
" "
;
printer
<<
static_cast
<
int
>
(
tensor
.
data
<
int8_t
>
()[
i
])
<<
" "
;
}
else
if
(
tensor
.
type
()
==
typeid
(
int32_t
))
{
printer
<<
tensor
.
data
<
int32_t
>
()[
i
]
<<
" "
;
}
}
}
}
#endif
#endif
...
...
src/io/executor.cpp
浏览文件 @
8e11ee09
...
@@ -80,12 +80,13 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
...
@@ -80,12 +80,13 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
}
}
template
<
typename
Dtype
>
template
<
typename
Dtype
>
void
LoadMemInternal
(
void
**
data
,
framework
::
LoDTensor
*
tensor
)
{
static
void
LoadMemInternal
(
void
**
data
,
framework
::
LoDTensor
*
tensor
,
bool
quant_uint8
=
false
)
{
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
char
**
data_buf
=
reinterpret_cast
<
char
**>
(
data
);
int64_t
size
=
tensor
->
numel
();
int64_t
size
=
tensor
->
numel
();
Dtype
*
tensor_data
=
tensor
->
mutable_data
<
Dtype
>
();
Dtype
*
tensor_data
=
tensor
->
mutable_data
<
Dtype
>
();
if
(
0
)
{
if
(
quant_uint8
)
{
//
TODO(hjchen2)
should be moved into operator init function
// should be moved into operator init function
float
min_value
;
float
min_value
;
float
max_value
;
float
max_value
;
memcpy
(
&
min_value
,
data_buf
,
sizeof
(
float
));
memcpy
(
&
min_value
,
data_buf
,
sizeof
(
float
));
...
@@ -141,7 +142,8 @@ void Executor<Dtype, P>::LoadMemory(
...
@@ -141,7 +142,8 @@ void Executor<Dtype, P>::LoadMemory(
// parse tensor from stream
// parse tensor from stream
switch
(
tensor_desc
.
DataType
())
{
switch
(
tensor_desc
.
DataType
())
{
case
framework
::
VARTYPE_TYPE_FP32
:
case
framework
::
VARTYPE_TYPE_FP32
:
LoadMemInternal
<
float
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
LoadMemInternal
<
float
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
,
program_
.
quantification
);
break
;
break
;
case
framework
::
VARTYPE_TYPE_INT8
:
case
framework
::
VARTYPE_TYPE_INT8
:
LoadMemInternal
<
int8_t
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
LoadMemInternal
<
int8_t
>
(
reinterpret_cast
<
void
**>
(
data_buf
),
tensor
);
...
...
src/operators/dequantize_op.cpp
浏览文件 @
8e11ee09
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef DEQUANT_OP
#include "operators/dequantize_op.h"
#include "operators/dequantize_op.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
@@ -30,3 +32,5 @@ namespace ops = paddle_mobile::operators;
...
@@ -30,3 +32,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
dequantize
,
ops
::
DequantizeOp
);
REGISTER_OPERATOR_CPU
(
dequantize
,
ops
::
DequantizeOp
);
#endif
#endif
#endif
src/operators/dequantize_op.h
浏览文件 @
8e11ee09
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef DEQUANT_OP
#pragma once
#pragma once
#include <string>
#include <string>
...
@@ -41,3 +43,5 @@ class DequantizeOp
...
@@ -41,3 +43,5 @@ class DequantizeOp
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
src/operators/elementwise_mul_op.cpp
浏览文件 @
8e11ee09
...
@@ -14,7 +14,7 @@ limitations under the License. */
...
@@ -14,7 +14,7 @@ limitations under the License. */
#ifdef ELEMENTWISEMUL_OP
#ifdef ELEMENTWISEMUL_OP
#include "elementwise_mul_op.h"
#include "
operators/
elementwise_mul_op.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
...
...
src/operators/kernel/arm/dequantize_kernel.cpp
浏览文件 @
8e11ee09
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef
PADDLE_MOBILE_CPU
#ifdef
DEQUANT_OP
#include "operators/kernel/dequantize_kernel.h"
#include "operators/kernel/dequantize_kernel.h"
...
@@ -38,7 +38,8 @@ void DequantizeKernel<CPU, float>::Compute(
...
@@ -38,7 +38,8 @@ void DequantizeKernel<CPU, float>::Compute(
const
int32_t
*
x
=
input
->
data
<
const
int32_t
>
();
const
int32_t
*
x
=
input
->
data
<
const
int32_t
>
();
float
*
y
=
output
->
mutable_data
<
float
>
();
float
*
y
=
output
->
mutable_data
<
float
>
();
size_t
size
=
output
->
numel
();
size_t
size
=
output
->
numel
();
float
scale
=
1.
f
/
(
activation_scale
*
weight_scale
);
// float scale = 1.f / (activation_scale * weight_scale);
float
scale
=
activation_scale
/
weight_scale
;
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t
loop
=
size
>>
4
;
size_t
loop
=
size
>>
4
;
size_t
remain
=
size
&
0xF
;
size_t
remain
=
size
&
0xF
;
...
...
src/operators/kernel/arm/quantize_kernel.cpp
浏览文件 @
8e11ee09
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef
PADDLE_MOBILE_CPU
#ifdef
QUANT_OP
#include "operators/kernel/quantize_kernel.h"
#include "operators/kernel/quantize_kernel.h"
#include <cmath>
#include <cmath>
...
@@ -225,7 +225,7 @@ static void quantize_round_to_nearest(const Tensor *input, const float scale,
...
@@ -225,7 +225,7 @@ static void quantize_round_to_nearest(const Tensor *input, const float scale,
const
float
*
x
=
input
->
data
<
const
float
>
();
const
float
*
x
=
input
->
data
<
const
float
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
size_t
size
=
input
->
numel
();
size_t
size
=
input
->
numel
();
#if
def
defined(__ARM_NEON__) || defined(__ARM_NEON)
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t
loop
=
size
>>
4
;
size_t
loop
=
size
>>
4
;
size_t
remain
=
size
&
0xF
;
size_t
remain
=
size
&
0xF
;
for
(
size_t
i
=
0
;
i
<
loop
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
loop
;
++
i
)
{
...
@@ -280,17 +280,18 @@ void QuantizeKernel<CPU, float>::Compute(
...
@@ -280,17 +280,18 @@ void QuantizeKernel<CPU, float>::Compute(
}
}
max_abs
=
std
::
max
(
max_abs
,
1e-6
f
);
max_abs
=
std
::
max
(
max_abs
,
1e-6
f
);
// only support int8 currently
// only support int8 currently
float
online_
scale
=
127
/
max_abs
;
float
scale
=
127
/
max_abs
;
param
.
online_scale_
->
mutable_data
<
float
>
()[
0
]
=
online_scale
;
param
.
online_scale_
->
mutable_data
<
float
>
()[
0
]
=
max_abs
;
switch
(
param
.
round_type_
)
{
switch
(
param
.
round_type_
)
{
case
ROUND_NEAREST_TO_EVEN
:
case
ROUND_NEAREST_TO_EVEN
:
quantize_round_to_even
(
input
,
online_
scale
,
output
);
quantize_round_to_even
(
input
,
scale
,
output
);
break
;
break
;
case
ROUND_NEAREST_TOWARDS_ZERO
:
case
ROUND_NEAREST_TOWARDS_ZERO
:
quantize_round_to_zero
(
input
,
online_
scale
,
output
);
quantize_round_to_zero
(
input
,
scale
,
output
);
break
;
break
;
case
ROUND_NEAREST_AWAY_ZERO
:
case
ROUND_NEAREST_AWAY_ZERO
:
quantize_round_to_nearest
(
input
,
online_scale
,
output
);
quantize_round_to_nearest
(
input
,
scale
,
output
);
break
;
default:
default:
LOG
(
kLOG_ERROR
)
<<
"round type is not supported."
;
LOG
(
kLOG_ERROR
)
<<
"round type is not supported."
;
break
;
break
;
...
...
src/operators/kernel/central-arm-func/conv_arm_func.h
浏览文件 @
8e11ee09
...
@@ -16,24 +16,27 @@ limitations under the License. */
...
@@ -16,24 +16,27 @@ limitations under the License. */
#pragma once
#pragma once
#include <vector>
#include <vector>
#include "operators/math/conv_arm_int8.h"
#include "operators/math/conv_func.h"
#include "operators/math/conv_func.h"
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/im2col.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/math_function.h"
#include "operators/math/pad.h"
#include "operators/math/vol2col.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
template
<
typename
Dtype
>
inline
void
ConvBasic
(
const
ConvParam
<
CPU
>
&
param
)
{
inline
void
ConvBasic
(
const
ConvParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
Tensor
*
output
=
param
.
Output
();
output
->
mutable_data
<
float
>
();
int
groups
=
param
.
Groups
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
const
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
const
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
@@ -57,7 +60,7 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
...
@@ -57,7 +60,7 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
Tensor
col
;
Tensor
col
;
Tensor
col_matrix
;
Tensor
col_matrix
;
if
(
is_expand
)
{
if
(
is_expand
)
{
col
.
mutable_data
<
float
>
(
col_shape
);
col
.
mutable_data
<
Dtype
>
(
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
...
@@ -76,8 +79,8 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
...
@@ -76,8 +79,8 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
CPU
,
float
>
vol2col
;
math
::
Vol2ColFunctor
<
CPU
,
Dtype
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
float
>
im2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
Dtype
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
...
@@ -96,6 +99,7 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
...
@@ -96,6 +99,7 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
paddings
[
1
]},
&
col
);
&
col
);
}
else
if
(
data_dim
==
3U
)
{
}
else
if
(
data_dim
==
3U
)
{
// vol2col
// vol2col
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
...
@@ -104,15 +108,70 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
...
@@ -104,15 +108,70 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
// gemm
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
math
::
matmul
<
Dtype
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
0
));
static_cast
<
float
>
(
0
));
}
}
}
}
}
}
inline
void
ConvCompute_int8
(
const
ConvParam
<
CPU
>
&
param
)
{
typedef
void
(
*
ConvFunc
)(
const
Tensor
&
input
,
const
Tensor
&
kernel
,
Tensor
*
output
);
static
ConvFunc
conv_funcs_table
[
7
][
5
]
=
{
{
0
,
0
,
0
,
0
,
0
},
// k = 1
{
0
,
0
,
0
,
0
,
0
},
{
conv3x3s1_int8
,
0
,
0
,
0
,
0
},
// k = 3
{
0
,
0
,
0
,
0
,
0
},
{
conv5x5s1_int8
,
0
,
0
,
0
,
0
},
// k = 5
{
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
},
// k = 7
};
const
Tensor
*
input
=
param
.
Input
();
Tensor
*
filter
=
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
int
groups
=
param
.
Groups
();
const
std
::
vector
<
int
>
&
strides
=
param
.
Strides
();
const
std
::
vector
<
int
>
&
paddings
=
param
.
Paddings
();
const
std
::
vector
<
int
>
&
dilations
=
param
.
Dilations
();
int
kernel_h
=
filter
->
dims
()[
2
];
int
kernel_w
=
filter
->
dims
()[
3
];
output
->
mutable_data
<
int32_t
>
();
ConvFunc
conv_func
=
0
;
if
(
strides
[
1
]
==
strides
[
0
]
&&
strides
[
1
]
<
6
&&
kernel_h
==
kernel_w
&&
kernel_h
<
8
&&
groups
==
1
&&
dilations
[
0
]
==
dilations
[
1
]
&&
dilations
[
1
]
==
1
)
{
conv_func
=
conv_funcs_table
[
kernel_h
-
1
][
strides
[
0
]
-
1
];
}
if
(
conv_func
)
{
int
batch_size
=
input
->
dims
()[
0
];
math
::
PadFunctor
<
CPU
,
int8_t
>
pad
;
Tensor
input_pad
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
);
if
(
paddings
[
0
]
==
0
&&
paddings
[
1
]
==
0
)
{
input_pad
=
in_batch
;
}
else
{
framework
::
DDim
pad_shape
=
in_batch
.
dims
();
pad_shape
[
2
]
+=
2
*
paddings
[
0
];
pad_shape
[
3
]
+=
2
*
paddings
[
1
];
input_pad
.
mutable_data
<
int8_t
>
(
pad_shape
);
pad
(
in_batch
,
paddings
[
0
],
paddings
[
1
],
&
input_pad
);
}
conv_func
(
input_pad
,
*
filter
,
&
out_batch
);
}
}
else
{
ConvBasic
<
int8_t
>
(
param
);
}
}
template
<
typename
P
>
template
<
typename
P
>
void
ConvCompute
(
const
ConvParam
<
CPU
>
&
param
)
{
void
ConvCompute
(
const
ConvParam
<
CPU
>
&
param
)
{
if
(
param
.
Input
()
->
type
()
==
typeid
(
int8_t
))
{
ConvCompute_int8
(
param
);
}
else
{
param
.
Output
()
->
mutable_data
<
float
>
();
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
...
@@ -126,7 +185,8 @@ void ConvCompute(const ConvParam<CPU> ¶m) {
...
@@ -126,7 +185,8 @@ void ConvCompute(const ConvParam<CPU> ¶m) {
math
::
DepthwiseConv3x3
(
param
.
Input
(),
param
.
Strides
(),
param
.
Paddings
(),
math
::
DepthwiseConv3x3
(
param
.
Input
(),
param
.
Strides
(),
param
.
Paddings
(),
param
.
Filter
(),
nullptr
,
param
.
Output
(),
false
);
param
.
Filter
(),
nullptr
,
param
.
Output
(),
false
);
}
else
{
}
else
{
ConvBasic
(
param
);
ConvBasic
<
float
>
(
param
);
}
}
}
}
}
...
...
src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
浏览文件 @
8e11ee09
...
@@ -44,7 +44,7 @@ void DepthwiseConvCompute(const ConvParam<CPU> ¶m) {
...
@@ -44,7 +44,7 @@ void DepthwiseConvCompute(const ConvParam<CPU> ¶m) {
Bias
,
false
);
Bias
,
false
);
}
else
{
}
else
{
ConvBasic
(
param
);
ConvBasic
<
float
>
(
param
);
}
}
}
}
...
...
src/operators/kernel/central-arm-func/elementwise_add_arm_func.h
浏览文件 @
8e11ee09
...
@@ -15,8 +15,12 @@ limitations under the License. */
...
@@ -15,8 +15,12 @@ limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#ifdef ELEMENTWISEADD_OP
#pragma once
#pragma once
#include "operators/math/elementwise_op_function.h"
#include "operators/math/elementwise_op_function.h"
#include "operators/op_param.h"
#include "operators/op_param.h"
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
#endif
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
...
@@ -33,8 +37,61 @@ void ElementwiseAddCompute(const ElementwiseAddParam<CPU> ¶m) {
...
@@ -33,8 +37,61 @@ void ElementwiseAddCompute(const ElementwiseAddParam<CPU> ¶m) {
Tensor
*
Out
=
param
.
Out
();
Tensor
*
Out
=
param
.
Out
();
Out
->
mutable_data
<
float
>
();
Out
->
mutable_data
<
float
>
();
int
axis
=
param
.
Axis
();
int
axis
=
param
.
Axis
();
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
const
auto
&
x_dims
=
input_x
->
dims
();
const
auto
&
y_dims
=
input_y
->
dims
();
/// axis = -1 represent the last dimensions.
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims
.
size
()
:
axis
);
size_t
batch
=
1
;
size_t
channels
=
1
;
size_t
elementwise_num
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
++
i
)
{
batch
*=
x_dims
[
i
];
}
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
++
i
)
{
channels
*=
y_dims
[
i
];
}
for
(
int
i
=
y_dims
.
size
()
+
axis
;
i
<
x_dims
.
size
();
++
i
)
{
elementwise_num
*=
x_dims
[
i
];
}
const
float
*
bias_data
=
input_y
->
data
<
float
>
();
const
float
*
input_data
=
input_x
->
data
<
float
>
();
float
*
output_data
=
Out
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
for
(
int
j
=
0
;
j
<
channels
;
++
j
)
{
size_t
offset
=
(
i
*
channels
+
j
)
*
elementwise_num
;
const
float
*
input
=
input_data
+
offset
;
const
float
*
bias
=
bias_data
+
j
;
float
*
output
=
output_data
+
offset
;
int
loop
=
elementwise_num
>>
0x4
;
int
remain
=
elementwise_num
&
0xF
;
for
(
int
k
=
0
;
k
<
loop
;
++
k
)
{
float32x4_t
rb
=
vdupq_n_f32
(
*
bias
);
float32x4_t
r0
=
vld1q_f32
(
input
);
float32x4_t
r1
=
vld1q_f32
(
input
+
4
);
float32x4_t
r2
=
vld1q_f32
(
input
+
8
);
float32x4_t
r3
=
vld1q_f32
(
input
+
12
);
r0
=
vaddq_f32
(
r0
,
rb
);
r1
=
vaddq_f32
(
r1
,
rb
);
r2
=
vaddq_f32
(
r2
,
rb
);
r3
=
vaddq_f32
(
r3
,
rb
);
vst1q_f32
(
output
,
r0
);
vst1q_f32
(
output
+
4
,
r1
);
vst1q_f32
(
output
+
8
,
r2
);
vst1q_f32
(
output
+
12
,
r3
);
input
+=
16
;
output
+=
16
;
}
for
(
int
k
=
0
;
k
<
remain
;
++
k
)
{
output
[
k
]
=
input
[
k
]
+
*
bias
;
}
}
}
#else
ElementwiseComputeEx
<
AddFunctor
<
float
>
,
float
>
(
input_x
,
input_y
,
axis
,
ElementwiseComputeEx
<
AddFunctor
<
float
>
,
float
>
(
input_x
,
input_y
,
axis
,
AddFunctor
<
float
>
(),
Out
);
AddFunctor
<
float
>
(),
Out
);
#endif
}
}
template
class
ElementwiseAddKernel
<
CPU
,
float
>;
template
class
ElementwiseAddKernel
<
CPU
,
float
>;
...
...
src/operators/kernel/central-arm-func/relu_arm_func.h
浏览文件 @
8e11ee09
...
@@ -17,6 +17,9 @@ limitations under the License. */
...
@@ -17,6 +17,9 @@ limitations under the License. */
#include <operators/math/transform.h>
#include <operators/math/transform.h>
#include "operators/op_param.h"
#include "operators/op_param.h"
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
#endif
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
...
@@ -37,71 +40,100 @@ void ReluCompute(const ReluParam<CPU> ¶m) {
...
@@ -37,71 +40,100 @@ void ReluCompute(const ReluParam<CPU> ¶m) {
auto
*
out_ptr
=
out
->
mutable_data
<
float
>
();
auto
*
out_ptr
=
out
->
mutable_data
<
float
>
();
int
numel
=
input_x
->
numel
();
int
numel
=
input_x
->
numel
();
// if (numel > 64) {
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
// asm volatile(
#if __aarch64__
// "pld [%[input_x_ptr], #0] \n\t"
if
(
numel
>
0
)
{
// "vmov.f32 q8, #0.0 \n\t"
int
loop
=
numel
>>
0x4
;
// "subs %[num], %[num], #32 \n\t"
int
remain
=
numel
&
0xF
;
// "blt end_num_%= \n\t"
float32x4_t
zero
=
vdupq_n_f32
(
0.
f
);
// "loop_num_%=: \n\t"
for
(
int
i
=
0
;
i
<
loop
;
++
i
)
{
// "pld [%[input_x_ptr], #1024] \n\t"
float32x4_t
r0
=
vld1q_f32
(
input_x_ptr
);
//
float32x4_t
r1
=
vld1q_f32
(
input_x_ptr
+
4
);
// "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t"
float32x4_t
r2
=
vld1q_f32
(
input_x_ptr
+
8
);
// "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t"
float32x4_t
r3
=
vld1q_f32
(
input_x_ptr
+
12
);
// "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t"
r0
=
vmaxq_f32
(
r0
,
zero
);
// "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t"
r1
=
vmaxq_f32
(
r1
,
zero
);
//
r2
=
vmaxq_f32
(
r2
,
zero
);
// "vmax.f32 q0, q0, q8 \n\t"
r3
=
vmaxq_f32
(
r3
,
zero
);
// "vmax.f32 q1, q1, q8 \n\t"
vst1q_f32
(
out_ptr
,
r0
);
// "vmax.f32 q2, q2, q8 \n\t"
vst1q_f32
(
out_ptr
+
4
,
r1
);
// "vmax.f32 q3, q3, q8 \n\t"
vst1q_f32
(
out_ptr
+
8
,
r2
);
// "vmax.f32 q4, q4, q8 \n\t"
vst1q_f32
(
out_ptr
+
12
,
r3
);
// "vmax.f32 q5, q5, q8 \n\t"
input_x_ptr
+=
16
;
// "vmax.f32 q6, q6, q8 \n\t"
out_ptr
+=
16
;
// "vmax.f32 q7, q7, q8 \n\t"
}
//
for
(
int
i
=
0
;
i
<
remain
;
++
i
)
{
// "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t"
out_ptr
[
i
]
=
(
input_x_ptr
[
i
]
>
0
)
*
input_x_ptr
[
i
];
// "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t"
}
// "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t"
#else
// "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t"
if
(
numel
>
64
)
{
//
asm
volatile
(
// "subs %[num], %[num], #32 \n\t"
"pld [%[input_x_ptr], #0]
\n\t
"
// "bge loop_num_%= \n\t"
"vmov.f32 q8, #0.0
\n\t
"
// "end_num_%=: \n\t"
"subs %[num], %[num], #32
\n\t
"
// "cmp %[num], #0 \n\t"
"blt end_num_%=
\n\t
"
// "bge end_%= \n\t"
"loop_num_%=:
\n\t
"
// "mov r6, #4 \n\t"
"pld [%[input_x_ptr], #1024]
\n\t
"
// "mul r5, %[num], r6 \n\t"
// "add %[input_x_ptr], %[input_x_ptr], r5 \n\t"
"vld1.32 {q0, q1}, [%[input_x_ptr]]!
\n\t
"
// "vld1.32 {q0, q1}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q2, q3}, [%[input_x_ptr]]!
\n\t
"
// "vld1.32 {q2, q3}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q4, q5}, [%[input_x_ptr]]!
\n\t
"
// "vld1.32 {q4, q5}, [%[input_x_ptr]]! \n\t"
"vld1.32 {q6, q7}, [%[input_x_ptr]]!
\n\t
"
// "vld1.32 {q6, q7}, [%[input_x_ptr]]! \n\t"
// "vmax.f32 q0, q0, q8 \n\t"
"vmax.f32 q0, q0, q8
\n\t
"
// "vmax.f32 q1, q1, q8 \n\t"
"vmax.f32 q1, q1, q8
\n\t
"
// "vmax.f32 q2, q2, q8 \n\t"
"vmax.f32 q2, q2, q8
\n\t
"
// "vmax.f32 q3, q3, q8 \n\t"
"vmax.f32 q3, q3, q8
\n\t
"
// "vmax.f32 q4, q4, q8 \n\t"
"vmax.f32 q4, q4, q8
\n\t
"
// "vmax.f32 q5, q5, q8 \n\t"
"vmax.f32 q5, q5, q8
\n\t
"
// "vmax.f32 q6, q6, q8 \n\t"
"vmax.f32 q6, q6, q8
\n\t
"
// "vmax.f32 q7, q7, q8 \n\t"
"vmax.f32 q7, q7, q8
\n\t
"
// "add %[out_ptr], %[out_ptr], r5 \n\t"
// "vst1.32 {q0, q1}, [%[out_ptr]]! \n\t"
"vst1.32 {q0, q1}, [%[out_ptr]]!
\n\t
"
// "vst1.32 {q2, q3}, [%[out_ptr]]! \n\t"
"vst1.32 {q2, q3}, [%[out_ptr]]!
\n\t
"
// "vst1.32 {q4, q5}, [%[out_ptr]]! \n\t"
"vst1.32 {q4, q5}, [%[out_ptr]]!
\n\t
"
// "vst1.32 {q6, q7}, [%[out_ptr]]! \n\t"
"vst1.32 {q6, q7}, [%[out_ptr]]!
\n\t
"
// "end_%=: \n\t"
// :
"subs %[num], %[num], #32
\n\t
"
// :
"bge loop_num_%=
\n\t
"
// [out_ptr] "r"(out_ptr), [input_x_ptr] "r"(input_x_ptr), [num]
"end_num_%=:
\n\t
"
// "r"(numel) : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6",
"cmp %[num], #0
\n\t
"
// "q7", "q8", "r5",
"bge end_%=
\n\t
"
// "r6");
"mov r6, #4
\n\t
"
// } else {
"mul r5, %[num], r6
\n\t
"
"add %[input_x_ptr], %[input_x_ptr], r5
\n\t
"
"vld1.32 {q0, q1}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q2, q3}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q4, q5}, [%[input_x_ptr]]!
\n\t
"
"vld1.32 {q6, q7}, [%[input_x_ptr]]!
\n\t
"
"vmax.f32 q0, q0, q8
\n\t
"
"vmax.f32 q1, q1, q8
\n\t
"
"vmax.f32 q2, q2, q8
\n\t
"
"vmax.f32 q3, q3, q8
\n\t
"
"vmax.f32 q4, q4, q8
\n\t
"
"vmax.f32 q5, q5, q8
\n\t
"
"vmax.f32 q6, q6, q8
\n\t
"
"vmax.f32 q7, q7, q8
\n\t
"
"add %[out_ptr], %[out_ptr], r5
\n\t
"
"vst1.32 {q0, q1}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q2, q3}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q4, q5}, [%[out_ptr]]!
\n\t
"
"vst1.32 {q6, q7}, [%[out_ptr]]!
\n\t
"
"end_%=:
\n\t
"
:
:
[
out_ptr
]
"r"
(
out_ptr
),
[
input_x_ptr
]
"r"
(
input_x_ptr
),
[
num
]
"r"
(
numel
)
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6"
,
"q7"
,
"q8"
,
"r5"
,
"r6"
);
#endif
}
else
{
#endif
ReluFunctor
<
float
>
func_
;
ReluFunctor
<
float
>
func_
;
math
::
Transform
trans
;
math
::
Transform
trans
;
trans
(
input_x_ptr
,
input_x_ptr
+
numel
,
out_ptr
,
func_
);
trans
(
input_x_ptr
,
input_x_ptr
+
numel
,
out_ptr
,
func_
);
// }
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
}
#endif
}
}
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
...
...
src/operators/kernel/central-arm-func/sum_arm_func.h
浏览文件 @
8e11ee09
...
@@ -15,11 +15,14 @@ limitations under the License. */
...
@@ -15,11 +15,14 @@ limitations under the License. */
#ifdef SUM_OP
#ifdef SUM_OP
#pragma once
#pragma once
#include <vector>
#include "operators/math/selected_rows_functor.h"
#include "operators/math/selected_rows_functor.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
using
LoDTensorArray
=
std
::
vector
<
LoDTensor
>
;
using
LoDTensorArray
=
std
::
vector
<
LoDTensor
>
;
template
<
typename
P
>
template
<
typename
P
>
void
SumCompute
(
const
SumParam
<
CPU
>
&
param
)
{
void
SumCompute
(
const
SumParam
<
CPU
>
&
param
)
{
auto
inputsvars
=
param
.
InputsVars
();
auto
inputsvars
=
param
.
InputsVars
();
...
@@ -63,31 +66,21 @@ void SumCompute(const SumParam<CPU> ¶m) {
...
@@ -63,31 +66,21 @@ void SumCompute(const SumParam<CPU> ¶m) {
std
::
unique_ptr
<
framework
::
SelectedRows
>
in0
;
std
::
unique_ptr
<
framework
::
SelectedRows
>
in0
;
if
(
in_place
)
{
if
(
in_place
)
{
// If is in_place, we store the input[0] to in0
// If is in_place, we store the input[0] to in0
auto
*
in_sel0
=
inputsvars
[
0
]
->
Get
<
SelectedRows
>
();
auto
*
in_sel0
=
inputsvars
[
0
]
->
Get
<
framework
::
SelectedRows
>
();
auto
&
rows
=
in_sel0
->
rows
();
auto
&
rows
=
in_sel0
->
rows
();
//#ifdef PADDLE_WITH_CUDA
// std::vector<int64_t> rows_in_cpu;
// rows_in_cpu.reserve(rows.size());
// for (auto item : rows) {
// rows_in_cpu.push_back(item);
// }
// in0.reset(new framework::SelectedRows(rows_in_cpu,
// in_sel0.height()));
//#else
in0
.
reset
(
new
framework
::
SelectedRows
(
rows
,
in_sel0
->
height
()));
in0
.
reset
(
new
framework
::
SelectedRows
(
rows
,
in_sel0
->
height
()));
//#endif
in0
->
mutable_value
()
->
ShareDataWith
(
in_sel0
->
value
());
in0
->
mutable_value
()
->
ShareDataWith
(
in_sel0
->
value
());
}
}
auto
get_selected_row
=
[
&
](
size_t
i
)
->
const
SelectedRows
&
{
auto
get_selected_row
=
[
&
](
size_t
i
)
->
const
framework
::
SelectedRows
&
{
if
(
i
==
0
&&
in0
)
{
if
(
i
==
0
&&
in0
)
{
return
*
in0
.
get
();
return
*
in0
.
get
();
}
else
{
}
else
{
return
*
(
inputsvars
[
i
]
->
Get
<
SelectedRows
>
());
return
*
(
inputsvars
[
i
]
->
Get
<
framework
::
SelectedRows
>
());
}
}
};
};
auto
*
out
=
outvar
->
GetMutable
<
SelectedRows
>
();
auto
*
out
=
outvar
->
GetMutable
<
framework
::
SelectedRows
>
();
out
->
mutable_rows
()
->
clear
();
out
->
mutable_rows
()
->
clear
();
auto
*
out_value
=
out
->
mutable_value
();
auto
*
out_value
=
out
->
mutable_value
();
...
@@ -150,8 +143,6 @@ void SumCompute(const SumParam<CPU> ¶m) {
...
@@ -150,8 +143,6 @@ void SumCompute(const SumParam<CPU> ¶m) {
}
}
}
}
}
else
{
}
else
{
if
(
outvar
->
IsType
<
framework
::
Tensor
>
())
{
}
PADDLE_MOBILE_THROW_EXCEPTION
(
PADDLE_MOBILE_THROW_EXCEPTION
(
"Unexpected branch, output variable type is %s"
,
outvar
->
Type
().
name
());
"Unexpected branch, output variable type is %s"
,
outvar
->
Type
().
name
());
}
}
...
...
src/operators/kernel/dequantize_kernel.h
浏览文件 @
8e11ee09
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef DEQUANT_OP
#pragma once
#pragma once
#include "framework/operator.h"
#include "framework/operator.h"
...
@@ -30,3 +32,5 @@ class DequantizeKernel
...
@@ -30,3 +32,5 @@ class DequantizeKernel
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
src/operators/kernel/elementwise_mul_kernel.h
浏览文件 @
8e11ee09
...
@@ -23,8 +23,6 @@ limitations under the License. */
...
@@ -23,8 +23,6 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
using
namespace
framework
;
template
<
typename
DeviceType
,
typename
T
>
template
<
typename
DeviceType
,
typename
T
>
class
ElementwiseMulKernel
class
ElementwiseMulKernel
:
public
framework
::
OpKernelBase
<
DeviceType
,
:
public
framework
::
OpKernelBase
<
DeviceType
,
...
...
src/operators/kernel/quantize_kernel.h
浏览文件 @
8e11ee09
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef QUANT_OP
#pragma once
#pragma once
#include "framework/operator.h"
#include "framework/operator.h"
...
@@ -30,3 +32,5 @@ class QuantizeKernel
...
@@ -30,3 +32,5 @@ class QuantizeKernel
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
src/operators/kernel/sum_kernel.h
浏览文件 @
8e11ee09
...
@@ -21,8 +21,6 @@ limitations under the License. */
...
@@ -21,8 +21,6 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
using
namespace
framework
;
template
<
typename
DeviceType
,
typename
T
>
template
<
typename
DeviceType
,
typename
T
>
class
SumKernel
class
SumKernel
:
public
framework
::
OpKernelBase
<
DeviceType
,
SumParam
<
DeviceType
>>
{
:
public
framework
::
OpKernelBase
<
DeviceType
,
SumParam
<
DeviceType
>>
{
...
...
src/operators/math/conv3x3_arm_int8.cpp
0 → 100644
浏览文件 @
8e11ee09
此差异已折叠。
点击以展开。
src/operators/math/conv5x5_arm_int8.cpp
0 → 100644
浏览文件 @
8e11ee09
此差异已折叠。
点击以展开。
src/operators/math/conv_arm_int8.h
0 → 100644
浏览文件 @
8e11ee09
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "framework/tensor.h"
namespace
paddle_mobile
{
namespace
operators
{
void
conv3x3s1_int8
(
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
output
);
void
conv3x3s1_int8_4c
(
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
output
);
void
conv5x5s1_int8
(
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
output
);
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/math/im2col.cpp
浏览文件 @
8e11ee09
...
@@ -28,15 +28,11 @@ namespace math {
...
@@ -28,15 +28,11 @@ namespace math {
* [input_channels, filter_height, filter_width, output_height,
* [input_channels, filter_height, filter_width, output_height,
* output_width]
* output_width]
*/
*/
template
<
class
T
>
template
<
>
class
Im2ColFunctor
<
ColFormat
::
kCFO
,
CPU
,
T
>
{
void
Im2ColFunctor
<
ColFormat
::
kCFO
,
CPU
,
float
>::
operator
()(
public:
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>
&
dilation
,
void
operator
()(
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>
&
dilation
,
const
std
::
vector
<
int
>
&
stride
,
const
std
::
vector
<
int
>
&
padding
,
const
std
::
vector
<
int
>
&
stride
,
framework
::
Tensor
*
col
)
{
const
std
::
vector
<
int
>
&
padding
,
framework
::
Tensor
*
col
)
{
// PADDLE_ENFORCE(im.dims().size() == 3);
// PADDLE_ENFORCE(col->dims().size() == 5);
int
im_channels
=
im
.
dims
()[
0
];
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_height
=
im
.
dims
()[
1
];
int
im_width
=
im
.
dims
()[
2
];
int
im_width
=
im
.
dims
()[
2
];
...
@@ -45,30 +41,9 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
...
@@ -45,30 +41,9 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
int
col_height
=
col
->
dims
()[
3
];
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
int
col_width
=
col
->
dims
()[
4
];
// PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
// -
// ((dilation[0] * (filter_height - 1)
// + 1))) /
// stride[0] +
// 1,
// col_height,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
// -
// ((dilation[1] * (filter_width - 1)
// + 1))) /
// stride[1] +
// 1,
// col_width,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
const
float
*
im_data
=
im
.
data
<
float
>
();
T
*
col_data
=
col
->
data
<
T
>
();
float
*
col_data
=
col
->
data
<
float
>
();
#if __ARM_NEON
#if __ARM_NEON
const
int
osize
=
col_height
;
const
int
osize
=
col_height
;
const
int
isize
=
im_height
;
const
int
isize
=
im_height
;
...
@@ -249,8 +224,8 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
...
@@ -249,8 +224,8 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
col_data
+=
9
*
oosize
;
col_data
+=
9
*
oosize
;
im_data
+=
isize
*
isize
;
im_data
+=
isize
*
isize
;
}
}
}
else
if
(
stride
[
0
]
==
2
&&
filter_height
==
3
&&
pad
1
&&
}
else
if
(
stride
[
0
]
==
2
&&
filter_height
==
3
&&
pad1
&&
dilation
[
0
]
==
1
&&
dilation
[
0
]
==
1
&&
im_height
>
2
)
{
im_height
>
2
)
{
for
(
int
c
=
0
;
c
<
im_channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
im_channels
;
++
c
)
{
int
oosize
=
osize
*
osize
;
int
oosize
=
osize
*
osize
;
int
nk4
=
osize
/
4
;
int
nk4
=
osize
/
4
;
...
@@ -396,15 +371,13 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
...
@@ -396,15 +371,13 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
for
(
int
h
=
0
;
h
<
col_height
;
++
h
)
{
for
(
int
h
=
0
;
h
<
col_height
;
++
h
)
{
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
];
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
];
for
(
int
w
=
0
;
w
<
col_width
;
++
w
)
{
for
(
int
w
=
0
;
w
<
col_width
;
++
w
)
{
int
im_col_idx
=
int
im_col_idx
=
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
];
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
];
int
col_idx
=
(
c
*
col_height
+
h
)
*
col_width
+
w
;
int
col_idx
=
(
c
*
col_height
+
h
)
*
col_width
+
w
;
int
im_idx
=
int
im_idx
=
(
im_row_idx
+
c_im
*
im_height
)
*
im_width
+
im_col_idx
;
(
im_row_idx
+
c_im
*
im_height
)
*
im_width
+
im_col_idx
;
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
?
static_cast
<
T
>
(
0
)
?
static_cast
<
float
>
(
0
)
:
im_data
[
im_idx
];
:
im_data
[
im_idx
];
}
}
}
}
...
@@ -424,14 +397,138 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
...
@@ -424,14 +397,138 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
?
static_cast
<
T
>
(
0
)
?
static_cast
<
float
>
(
0
)
:
im_data
[
im_idx
];
:
im_data
[
im_idx
];
}
}
}
}
}
}
#endif
#endif
}
void
ExtractToImg
(
const
int8_t
*
im_data
,
int8_t
*
col_data
,
const
int
im_height
,
const
int
im_width
,
const
int
col_height
,
const
int
col_width
,
const
int
padding_h
,
const
int
padding_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
kh
,
const
int
kw
)
{
int
h
=
padding_h
-
kh
;
int
w
=
padding_w
-
kw
;
int
col_start_height
=
h
>
0
?
(
h
+
stride_h
-
1
)
/
stride_h
:
0
;
int
col_start_width
=
w
>
0
?
(
w
+
stride_w
-
1
)
/
stride_w
:
0
;
int
start_height
=
kh
+
col_start_height
*
stride_h
-
padding_h
;
int
start_width
=
kw
+
col_start_width
*
stride_w
-
padding_w
;
int
end_height
=
(
col_height
-
col_start_height
)
*
stride_h
+
start_height
;
end_height
=
end_height
>
im_height
?
im_height
:
end_height
;
int
end_width
=
(
col_width
-
col_start_width
)
*
stride_w
+
start_width
;
end_width
=
end_width
>
im_width
?
im_width
:
end_width
;
int
extract
=
(
end_width
-
start_width
+
stride_w
-
1
)
/
stride_w
;
im_data
+=
start_height
*
im_width
+
start_width
;
col_data
+=
col_start_height
*
col_width
+
col_start_width
;
for
(
int
i
=
start_height
;
i
<
end_height
;
i
+=
stride_h
)
{
if
(
stride_w
==
1
)
{
memcpy
(
col_data
,
im_data
,
extract
*
sizeof
(
int8_t
));
}
else
if
(
stride_w
==
2
)
{
int
s
=
0
;
#if __ARM_NEON
for
(;
s
<
extract
-
15
;
s
+=
16
)
{
int8x16x2_t
img
=
vld2q_s8
(
im_data
+
s
*
2
);
vst1q_s8
(
col_data
+
s
,
img
.
val
[
0
]);
}
}
};
#endif
for
(;
s
<
extract
;
++
s
)
{
col_data
[
s
]
=
im_data
[
s
*
2
];
}
}
else
if
(
stride_w
==
3
)
{
int
s
=
0
;
#if __ARM_NEON
for
(;
s
<
extract
-
15
;
s
+=
16
)
{
int8x16x3_t
img
=
vld3q_s8
(
im_data
+
s
*
3
);
vst1q_s8
(
col_data
+
s
,
img
.
val
[
0
]);
}
#endif
for
(;
s
<
extract
;
++
s
)
{
col_data
[
s
]
=
im_data
[
s
*
3
];
}
}
else
if
(
stride_w
==
4
)
{
int
s
=
0
;
#if __ARM_NEON
for
(;
s
<
extract
-
15
;
s
+=
16
)
{
int8x16x4_t
img
=
vld4q_s8
(
im_data
+
s
*
4
);
vst1q_s8
(
col_data
+
s
,
img
.
val
[
0
]);
}
#endif
for
(;
s
<
extract
;
++
s
)
{
col_data
[
s
]
=
im_data
[
s
*
4
];
}
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
"stride_w must be one of 1, 2, 3 and 4."
);
}
im_data
+=
im_width
*
stride_h
;
col_data
+=
col_width
;
}
}
/*
* im = [input_channels, input_height, input_width]
* col =
* [input_channels, filter_height, filter_width, output_height,
* output_width]
*/
template
<
>
void
Im2ColFunctor
<
ColFormat
::
kCFO
,
CPU
,
int8_t
>::
operator
()(
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>
&
dilation
,
const
std
::
vector
<
int
>
&
stride
,
const
std
::
vector
<
int
>
&
padding
,
framework
::
Tensor
*
col
)
{
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_width
=
im
.
dims
()[
2
];
int
filter_height
=
col
->
dims
()[
1
];
int
filter_width
=
col
->
dims
()[
2
];
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
const
int8_t
*
im_data
=
im
.
data
<
int8_t
>
();
int8_t
*
col_data
=
col
->
data
<
int8_t
>
();
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
if
(
stride
[
0
]
<=
4
&&
dilation
[
0
]
==
1
&&
dilation
[
0
]
==
dilation
[
1
])
{
// pad 0
memset
(
col_data
,
0
,
col
->
numel
()
*
sizeof
(
int8_t
));
for
(
int
ic
=
0
;
ic
<
im_channels
;
++
ic
)
{
for
(
int
kh
=
0
;
kh
<
filter_height
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filter_width
;
++
kw
)
{
ExtractToImg
(
im_data
,
col_data
,
im_height
,
im_width
,
col_height
,
col_width
,
padding
[
0
],
padding
[
1
],
stride
[
0
],
stride
[
1
],
kh
,
kw
);
col_data
+=
col_height
*
col_width
;
}
}
im_data
+=
im_height
*
im_width
;
}
}
else
{
#endif
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
c_im
=
c
/
(
filter_width
*
filter_height
);
for
(
int
h
=
0
;
h
<
col_height
;
++
h
)
{
int
im_row_idx
=
h
*
stride
[
0
]
-
padding
[
0
]
+
h_offset
*
dilation
[
0
];
for
(
int
w
=
0
;
w
<
col_width
;
++
w
)
{
int
im_col_idx
=
w
*
stride
[
1
]
-
padding
[
1
]
+
w_offset
*
dilation
[
1
];
int
col_idx
=
(
c
*
col_height
+
h
)
*
col_width
+
w
;
int
im_idx
=
(
im_row_idx
+
c_im
*
im_height
)
*
im_width
+
im_col_idx
;
col_data
[
col_idx
]
=
(
im_row_idx
<
0
||
im_row_idx
>=
im_height
||
im_col_idx
<
0
||
im_col_idx
>=
im_width
)
?
static_cast
<
int8_t
>
(
0
)
:
im_data
[
im_idx
];
}
}
}
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
}
#endif
}
/*
/*
* im = [input_channels, input_height, input_width]
* im = [input_channels, input_height, input_width]
...
@@ -456,27 +553,6 @@ class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
...
@@ -456,27 +553,6 @@ class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
int
col_height
=
col
.
dims
()[
3
];
int
col_height
=
col
.
dims
()[
3
];
int
col_width
=
col
.
dims
()[
4
];
int
col_width
=
col
.
dims
()[
4
];
// PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2]
// -
// ((dilation[0] * (filter_height - 1)
// + 1))) /
// stride[0] +
// 1,
// col_height,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
// PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3]
// -
// ((dilation[1] * (filter_width - 1)
// + 1))) /
// stride[1] +
// 1,
// col_width,
// "Output_height and
// padding(padding_up, padding_down)
// are " "inconsistent.");
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
T
*
im_data
=
im
->
data
<
T
>
();
T
*
im_data
=
im
->
data
<
T
>
();
...
@@ -503,9 +579,9 @@ class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
...
@@ -503,9 +579,9 @@ class Col2ImFunctor<ColFormat::kCFO, CPU, T> {
};
};
template
class
Im2ColFunctor
<
ColFormat
::
kCFO
,
CPU
,
float
>;
template
class
Im2ColFunctor
<
ColFormat
::
kCFO
,
CPU
,
float
>;
// template class Im2ColFunctor<ColFormat::kCFO, CPU, double
>;
template
class
Im2ColFunctor
<
ColFormat
::
kCFO
,
CPU
,
int8_t
>;
template
class
Col2ImFunctor
<
ColFormat
::
kCFO
,
CPU
,
float
>;
template
class
Col2ImFunctor
<
ColFormat
::
kCFO
,
CPU
,
float
>;
template
class
Col2ImFunctor
<
ColFormat
::
kCFO
,
CPU
,
double
>;
template
class
Col2ImFunctor
<
ColFormat
::
kCFO
,
CPU
,
int8_t
>;
/*
/*
* im = [input_channels, input_height, input_width]
* im = [input_channels, input_height, input_width]
...
@@ -519,8 +595,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
...
@@ -519,8 +595,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
void
operator
()(
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>
&
dilation
,
void
operator
()(
const
framework
::
Tensor
&
im
,
const
std
::
vector
<
int
>
&
dilation
,
const
std
::
vector
<
int
>
&
stride
,
const
std
::
vector
<
int
>
&
stride
,
const
std
::
vector
<
int
>
&
padding
,
framework
::
Tensor
*
col
)
{
const
std
::
vector
<
int
>
&
padding
,
framework
::
Tensor
*
col
)
{
// PADDLE_ENFORCE(im.dims().size() == 3);
// PADDLE_ENFORCE(col->dims().size() == 5);
int
im_channels
=
im
.
dims
()[
0
];
int
im_channels
=
im
.
dims
()[
0
];
int
im_height
=
im
.
dims
()[
1
];
int
im_height
=
im
.
dims
()[
1
];
int
im_width
=
im
.
dims
()[
2
];
int
im_width
=
im
.
dims
()[
2
];
...
@@ -529,19 +603,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
...
@@ -529,19 +603,6 @@ class Im2ColFunctor<ColFormat::kOCF, CPU, T> {
int
col_height
=
col
->
dims
()[
0
];
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
int
col_width
=
col
->
dims
()[
1
];
// PADDLE_ENFORCE_EQ(
// (im_height + padding[0] + padding[2] -
// filter_height) / stride[0]
// + 1, col_height, "Output_height and
// padding(padding_up,
// padding_down) are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_width + padding[1] + padding[3] -
// filter_width) / stride[1] +
// 1, col_width, "col_width and padding(padding_left,
// padding_right)
// are " "inconsistent.");
const
T
*
im_data
=
im
.
data
<
T
>
();
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
...
@@ -593,8 +654,6 @@ class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
...
@@ -593,8 +654,6 @@ class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
const
std
::
vector
<
int
>
&
dilation
,
const
std
::
vector
<
int
>
&
dilation
,
const
std
::
vector
<
int
>
&
stride
,
const
std
::
vector
<
int
>
&
stride
,
const
std
::
vector
<
int
>
&
padding
,
framework
::
Tensor
*
im
)
{
const
std
::
vector
<
int
>
&
padding
,
framework
::
Tensor
*
im
)
{
// PADDLE_ENFORCE(im->dims().size() == 3);
// PADDLE_ENFORCE(col.dims().size() == 5);
int
im_channels
=
im
->
dims
()[
0
];
int
im_channels
=
im
->
dims
()[
0
];
int
im_height
=
im
->
dims
()[
1
];
int
im_height
=
im
->
dims
()[
1
];
int
im_width
=
im
->
dims
()[
2
];
int
im_width
=
im
->
dims
()[
2
];
...
@@ -603,19 +662,6 @@ class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
...
@@ -603,19 +662,6 @@ class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
int
col_height
=
col
.
dims
()[
0
];
int
col_height
=
col
.
dims
()[
0
];
int
col_width
=
col
.
dims
()[
1
];
int
col_width
=
col
.
dims
()[
1
];
// PADDLE_ENFORCE_EQ(
// (im_height + padding[0] + padding[2] -
// filter_height) / stride[0]
// + 1, col_height, "Output_height and
// padding(padding_up,
// padding_down) are " "inconsistent.");
// PADDLE_ENFORCE_EQ(
// (im_width + padding[1] + padding[3] -
// filter_width) / stride[1] +
// 1, col_width, "col_width and padding(padding_left,
// padding_right)
// are " "inconsistent.");
T
*
im_data
=
im
->
data
<
T
>
();
T
*
im_data
=
im
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
...
@@ -655,9 +701,7 @@ class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
...
@@ -655,9 +701,7 @@ class Col2ImFunctor<ColFormat::kOCF, CPU, T> {
};
};
template
class
Im2ColFunctor
<
ColFormat
::
kOCF
,
CPU
,
float
>;
template
class
Im2ColFunctor
<
ColFormat
::
kOCF
,
CPU
,
float
>;
template
class
Im2ColFunctor
<
ColFormat
::
kOCF
,
CPU
,
double
>;
template
class
Col2ImFunctor
<
ColFormat
::
kOCF
,
CPU
,
float
>;
template
class
Col2ImFunctor
<
ColFormat
::
kOCF
,
CPU
,
float
>;
template
class
Col2ImFunctor
<
ColFormat
::
kOCF
,
CPU
,
double
>;
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
src/operators/math/math_function.h
浏览文件 @
8e11ee09
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <cmath>
#include <cmath>
#include <string>
#include "framework/tensor.h"
#include "framework/tensor.h"
namespace
paddle_mobile
{
namespace
paddle_mobile
{
...
...
src/operators/math/pad.cpp
0 → 100644
浏览文件 @
8e11ee09
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/math/pad.h"
namespace
paddle_mobile
{
namespace
operators
{
namespace
math
{
template
<
typename
T
>
class
PadFunctor
<
CPU
,
T
>
{
public:
void
operator
()(
const
framework
::
Tensor
&
input
,
const
int
pad_h
,
const
int
pad_w
,
framework
::
Tensor
*
output
)
{
const
T
*
in_data
=
input
.
data
<
T
>
();
T
*
out_data
=
output
->
mutable_data
<
T
>
();
const
framework
::
DDim
&
input_shape
=
input
.
dims
();
const
framework
::
DDim
&
output_shape
=
output
->
dims
();
// fill output with 0
memset
(
out_data
,
0
,
sizeof
(
T
)
*
output
->
numel
());
// should make sure the shape of output is match with input
for
(
int
i
=
0
;
i
<
input_shape
[
0
];
++
i
)
{
for
(
int
c
=
0
;
c
<
input_shape
[
1
];
++
c
)
{
out_data
+=
pad_h
*
output_shape
[
3
];
for
(
int
h
=
0
;
h
<
input_shape
[
2
];
++
h
)
{
memcpy
(
out_data
+
pad_w
,
in_data
,
sizeof
(
T
)
*
input_shape
[
3
]);
out_data
+=
output_shape
[
3
];
in_data
+=
input_shape
[
3
];
}
out_data
+=
pad_h
*
output_shape
[
3
];
}
}
}
};
template
class
PadFunctor
<
CPU
,
float
>;
template
class
PadFunctor
<
CPU
,
int8_t
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
src/operators/math/pad.h
0 → 100644
浏览文件 @
8e11ee09
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/tensor.h"
namespace
paddle_mobile
{
namespace
operators
{
namespace
math
{
template
<
typename
DeviceType
,
typename
T
>
class
PadFunctor
{
public:
void
operator
()(
const
framework
::
Tensor
&
input
,
const
int
pad_h
,
const
int
pad_w
,
framework
::
Tensor
*
output
);
};
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
src/operators/math/vol2col.cpp
浏览文件 @
8e11ee09
...
@@ -32,9 +32,6 @@ class Vol2ColFunctor<CPU, T> {
...
@@ -32,9 +32,6 @@ class Vol2ColFunctor<CPU, T> {
void
operator
()(
const
Tensor
&
vol
,
const
std
::
vector
<
int
>
&
dilations
,
void
operator
()(
const
Tensor
&
vol
,
const
std
::
vector
<
int
>
&
dilations
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
paddings
,
Tensor
*
col
)
const
{
const
std
::
vector
<
int
>
&
paddings
,
Tensor
*
col
)
const
{
// PADDLE_ENFORCE(vol.dims().size() == 4);
// PADDLE_ENFORCE(col->dims().size() == 7);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_height
=
vol
.
dims
()[
2
];
...
@@ -48,32 +45,6 @@ class Vol2ColFunctor<CPU, T> {
...
@@ -48,32 +45,6 @@ class Vol2ColFunctor<CPU, T> {
int
channels_col
=
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1)
// + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height -
// 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are
// "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1)
// + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
const
T
*
vol_data
=
vol
.
data
<
T
>
();
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
...
@@ -119,9 +90,6 @@ class Col2VolFunctor<CPU, T> {
...
@@ -119,9 +90,6 @@ class Col2VolFunctor<CPU, T> {
void
operator
()(
const
Tensor
&
col
,
const
std
::
vector
<
int
>
&
dilations
,
void
operator
()(
const
Tensor
&
col
,
const
std
::
vector
<
int
>
&
dilations
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
paddings
,
Tensor
*
vol
)
const
{
const
std
::
vector
<
int
>
&
paddings
,
Tensor
*
vol
)
const
{
// PADDLE_ENFORCE(vol->dims().size() == 4);
// PADDLE_ENFORCE(col.dims().size() == 7);
int
input_channels
=
vol
->
dims
()[
0
];
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_height
=
vol
->
dims
()[
2
];
int
input_height
=
vol
->
dims
()[
2
];
...
@@ -135,31 +103,6 @@ class Col2VolFunctor<CPU, T> {
...
@@ -135,31 +103,6 @@ class Col2VolFunctor<CPU, T> {
int
channels_col
=
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1)
// + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height -
// 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are
// "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1)
// + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
T
*
vol_data
=
vol
->
data
<
T
>
();
T
*
vol_data
=
vol
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
...
@@ -195,9 +138,9 @@ class Col2VolFunctor<CPU, T> {
...
@@ -195,9 +138,9 @@ class Col2VolFunctor<CPU, T> {
};
};
template
class
Vol2ColFunctor
<
CPU
,
float
>;
template
class
Vol2ColFunctor
<
CPU
,
float
>;
template
class
Vol2ColFunctor
<
CPU
,
double
>;
template
class
Vol2ColFunctor
<
CPU
,
int8_t
>;
template
class
Col2VolFunctor
<
CPU
,
float
>;
template
class
Col2VolFunctor
<
CPU
,
float
>;
template
class
Col2VolFunctor
<
CPU
,
double
>;
template
class
Col2VolFunctor
<
CPU
,
int8_t
>;
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
src/operators/op_param.h
浏览文件 @
8e11ee09
...
@@ -2330,6 +2330,7 @@ class ShapeParam : public OpParam {
...
@@ -2330,6 +2330,7 @@ class ShapeParam : public OpParam {
};
};
#endif
#endif
#ifdef QUANT_OP
template
<
typename
Dtype
>
template
<
typename
Dtype
>
class
QuantizeParam
:
public
OpParam
{
class
QuantizeParam
:
public
OpParam
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
...
@@ -2340,14 +2341,12 @@ class QuantizeParam : public OpParam {
...
@@ -2340,14 +2341,12 @@ class QuantizeParam : public OpParam {
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
input_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
out_
=
OutFrom
<
GType
>
(
outputs
,
scope
);
out_
=
OutFrom
<
GType
>
(
outputs
,
scope
);
if
(
HasAttr
(
"is_static"
,
attrs
))
{
is_static_
=
GetAttr
<
bool
>
(
"is_static"
,
attrs
);
}
// online
// online
// scale = max(abs(x))
// scale = max(abs(x))
online_scale_
=
GetVarValue
<
GType
>
(
"OutScale"
,
outputs
,
scope
);
online_scale_
=
GetVarValue
<
GType
>
(
"OutScale"
,
outputs
,
scope
);
// offline
// offline
if
(
HasAttr
(
"static_scale"
,
attrs
))
{
if
(
HasAttr
(
"static_scale"
,
attrs
))
{
is_static_
=
true
;
static_scale_
=
GetAttr
<
float
>
(
"static_scale"
,
attrs
);
static_scale_
=
GetAttr
<
float
>
(
"static_scale"
,
attrs
);
}
}
// x = round(scale * x)
// x = round(scale * x)
...
@@ -2369,9 +2368,11 @@ class QuantizeParam : public OpParam {
...
@@ -2369,9 +2368,11 @@ class QuantizeParam : public OpParam {
float
static_scale_
=
1.0
f
;
float
static_scale_
=
1.0
f
;
// round method type
// round method type
// nearest_zero and nearest_even is valid currently
// nearest_zero and nearest_even is valid currently
RoundType
round_type_
=
ROUND_NEAREST_
TO_EVEN
;
RoundType
round_type_
=
ROUND_NEAREST_
AWAY_ZERO
;
};
};
#endif
#ifdef DEQUANT_OP
template
<
typename
Dtype
>
template
<
typename
Dtype
>
class
DequantizeParam
:
public
OpParam
{
class
DequantizeParam
:
public
OpParam
{
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
typedef
typename
DtypeTensorTrait
<
Dtype
>::
gtype
GType
;
...
@@ -2399,6 +2400,7 @@ class DequantizeParam : public OpParam {
...
@@ -2399,6 +2400,7 @@ class DequantizeParam : public OpParam {
RType
*
activation_scale_
;
RType
*
activation_scale_
;
float
weight_scale_
;
float
weight_scale_
;
};
};
#endif
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
src/operators/quantize_op.cpp
浏览文件 @
8e11ee09
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef QUANT_OP
#include "operators/quantize_op.h"
#include "operators/quantize_op.h"
#include <vector>
#include <vector>
...
@@ -33,3 +35,5 @@ namespace ops = paddle_mobile::operators;
...
@@ -33,3 +35,5 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
quantize
,
ops
::
QuantizeOp
);
REGISTER_OPERATOR_CPU
(
quantize
,
ops
::
QuantizeOp
);
#endif
#endif
#endif
src/operators/quantize_op.h
浏览文件 @
8e11ee09
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#ifdef QUANT_OP
#pragma once
#pragma once
#include <string>
#include <string>
...
@@ -40,3 +42,5 @@ class QuantizeOp : public framework::OperatorWithKernel<
...
@@ -40,3 +42,5 @@ class QuantizeOp : public framework::OperatorWithKernel<
}
// namespace operators
}
// namespace operators
}
// namespace paddle_mobile
}
// namespace paddle_mobile
#endif
src/operators/sum_op.cpp
浏览文件 @
8e11ee09
...
@@ -26,7 +26,7 @@ void SumOp<Dtype, T>::InferShape() const {
...
@@ -26,7 +26,7 @@ void SumOp<Dtype, T>::InferShape() const {
auto
inputs
=
this
->
param_
.
Inputs
();
auto
inputs
=
this
->
param_
.
Inputs
();
const
size_t
n
=
inputs
.
size
();
const
size_t
n
=
inputs
.
size
();
std
::
vector
<
DDim
>
inputs_dims
;
std
::
vector
<
framework
::
DDim
>
inputs_dims
;
inputs_dims
.
reserve
(
n
);
inputs_dims
.
reserve
(
n
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
inputs_dims
.
push_back
(
inputs
[
i
]
->
dims
());
inputs_dims
.
push_back
(
inputs
[
i
]
->
dims
());
...
...
test/CMakeLists.txt
浏览文件 @
8e11ee09
...
@@ -213,6 +213,10 @@ if (NOT FOUND_MATCH)
...
@@ -213,6 +213,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE
(
test-dequantize-op operators/test_dequantize_op.cpp test_helper.h test_include.h
)
ADD_EXECUTABLE
(
test-dequantize-op operators/test_dequantize_op.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-dequantize-op paddle-mobile
)
target_link_libraries
(
test-dequantize-op paddle-mobile
)
# test int8 conv op
ADD_EXECUTABLE
(
test-int8-conv-op operators/test_int8_conv_op.cpp test_helper.h test_include.h
)
target_link_libraries
(
test-int8-conv-op paddle-mobile
)
# gen test log
# gen test log
ADD_EXECUTABLE
(
test-log common/test_log.cpp
)
ADD_EXECUTABLE
(
test-log common/test_log.cpp
)
target_link_libraries
(
test-log paddle-mobile
)
target_link_libraries
(
test-log paddle-mobile
)
...
...
test/net/test_googlenet.cpp
浏览文件 @
8e11ee09
...
@@ -25,27 +25,31 @@ int main() {
...
@@ -25,27 +25,31 @@ int main() {
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
#endif
#endif
paddle_mobile
.
SetThreadNum
(
4
);
paddle_mobile
.
SetThreadNum
(
1
);
bool
optimize
=
tru
e
;
bool
optimize
=
fals
e
;
auto
time1
=
time
();
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
auto
time2
=
time
();
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
float
>
input
;
std
::
vector
<
float
>
output
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224
,
&
input
,
dims
);
GetInput
<
float
>
(
g_test_image_1x3x224x224
,
&
input
,
dims
);
// 预热十次
//
//
预热十次
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
//
for (int i = 0; i < 10; ++i) {
auto
vec_resul
t
=
paddle_mobile
.
Predict
(
input
,
dims
);
// outpu
t = paddle_mobile.Predict(input, dims);
}
//
}
auto
time3
=
time
();
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_resul
t
=
paddle_mobile
.
Predict
(
input
,
dims
);
outpu
t
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
}
auto
time4
=
time
();
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
<<
std
::
endl
;
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
output
.
size
();
++
i
)
{
DLOG
<<
"result["
<<
i
<<
"] = "
<<
output
[
i
];
}
}
}
return
0
;
return
0
;
}
}
test/operators/test_dequantize_op.cpp
浏览文件 @
8e11ee09
...
@@ -59,7 +59,7 @@ int TestDequqntizeOp() {
...
@@ -59,7 +59,7 @@ int TestDequqntizeOp() {
framework
::
Tensor
output_cmp
;
framework
::
Tensor
output_cmp
;
output_cmp
.
Resize
(
dim
);
output_cmp
.
Resize
(
dim
);
float
dequant_scale
=
1.
f
/
(
1.27
*
1.74
)
;
float
dequant_scale
=
1.
27
/
1.74
;
dequantize
(
input
,
dequant_scale
,
&
output_cmp
);
dequantize
(
input
,
dequant_scale
,
&
output_cmp
);
const
float
*
output_cmp_data
=
output_cmp
.
data
<
float
>
();
const
float
*
output_cmp_data
=
output_cmp
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
...
...
test/operators/test_int8_conv_op.cpp
0 → 100644
浏览文件 @
8e11ee09
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/conv_op.h"
namespace
paddle_mobile
{
// Reference convolution for checking results:
// accumulate through explicit loops over input, output, and filters.
template
<
typename
Itype
,
typename
Otype
>
void
conv2d
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
const
framework
::
AttributeMap
&
attrs
,
framework
::
Tensor
*
output
)
{
framework
::
AttrReader
attr_reader
(
attrs
);
std
::
vector
<
int
>
paddings
=
attr_reader
.
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
strides
=
attr_reader
.
Get
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
dilations
=
attr_reader
.
Get
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
attr_reader
.
Get
<
int
>
(
"groups"
);
int
kernel_h
=
filter
->
dims
()[
2
];
int
kernel_w
=
filter
->
dims
()[
3
];
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
1
];
int
stride_h
=
strides
[
0
];
int
stride_w
=
strides
[
1
];
int
dilation_h
=
dilations
[
0
];
int
dilation_w
=
dilations
[
1
];
auto
in_shape
=
input
->
dims
();
auto
out_shape
=
output
->
dims
();
const
bool
has_depth
=
0
;
int
kernel_d
,
pad_d
,
stride_d
,
dilation_d
;
if
(
has_depth
)
{
kernel_d
=
kernel_h
;
stride_d
=
stride_h
;
pad_d
=
pad_h
;
dilation_d
=
dilation_h
;
}
else
{
kernel_d
=
stride_d
=
dilation_d
=
1
;
pad_d
=
0
;
}
// Groups
int
o_g
=
out_shape
[
1
]
/
groups
;
int
k_g
=
in_shape
[
1
]
/
groups
;
int
o_head
,
k_head
;
// Convolution
vector
<
int
>
weight_offset
(
4
+
has_depth
);
vector
<
int
>
in_offset
(
4
+
has_depth
);
vector
<
int
>
out_offset
(
4
+
has_depth
);
auto
offset
=
[](
const
framework
::
Tensor
*
input
,
const
vector
<
int
>
&
indics
)
{
framework
::
DDim
shape
=
input
->
dims
();
size_t
count
=
0
;
for
(
int
i
=
0
;
i
<
indics
.
size
();
++
i
)
{
count
*=
shape
[
i
];
count
+=
indics
[
i
];
}
return
count
;
};
const
Itype
*
in_data
=
input
->
data
<
Itype
>
();
const
Itype
*
w_data
=
filter
->
data
<
Itype
>
();
Otype
*
out_data
=
output
->
mutable_data
<
Otype
>
();
memset
(
out_data
,
0
,
output
->
numel
()
*
sizeof
(
Otype
));
for
(
int
n
=
0
;
n
<
out_shape
[
0
];
n
++
)
{
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
o_head
=
o_g
*
g
;
k_head
=
k_g
*
g
;
for
(
int
o
=
0
;
o
<
o_g
;
o
++
)
{
for
(
int
k
=
0
;
k
<
k_g
;
k
++
)
{
for
(
int
z
=
0
;
z
<
(
has_depth
?
out_shape
[
2
]
:
1
);
z
++
)
{
for
(
int
y
=
0
;
y
<
out_shape
[
2
+
has_depth
];
y
++
)
{
for
(
int
x
=
0
;
x
<
out_shape
[
3
+
has_depth
];
x
++
)
{
for
(
int
r
=
0
;
r
<
kernel_d
;
r
++
)
{
for
(
int
p
=
0
;
p
<
kernel_h
;
p
++
)
{
for
(
int
q
=
0
;
q
<
kernel_w
;
q
++
)
{
int
in_z
=
z
*
stride_d
-
pad_d
+
r
*
dilation_d
;
int
in_y
=
y
*
stride_h
-
pad_h
+
p
*
dilation_h
;
int
in_x
=
x
*
stride_w
-
pad_w
+
q
*
dilation_w
;
if
(
in_z
>=
0
&&
in_z
<
(
has_depth
?
in_shape
[
2
]
:
1
)
&&
in_y
>=
0
&&
in_y
<
in_shape
[
2
+
has_depth
]
&&
in_x
>=
0
&&
in_x
<
in_shape
[
3
+
has_depth
])
{
weight_offset
[
0
]
=
o
+
o_head
;
weight_offset
[
1
]
=
k
;
if
(
has_depth
)
{
weight_offset
[
2
]
=
r
;
}
weight_offset
[
2
+
has_depth
]
=
p
;
weight_offset
[
3
+
has_depth
]
=
q
;
in_offset
[
0
]
=
n
;
in_offset
[
1
]
=
k
+
k_head
;
if
(
has_depth
)
{
in_offset
[
2
]
=
in_z
;
}
in_offset
[
2
+
has_depth
]
=
in_y
;
in_offset
[
3
+
has_depth
]
=
in_x
;
out_offset
[
0
]
=
n
;
out_offset
[
1
]
=
o
+
o_head
;
if
(
has_depth
)
{
out_offset
[
2
]
=
z
;
}
out_offset
[
2
+
has_depth
]
=
y
;
out_offset
[
3
+
has_depth
]
=
x
;
out_data
[
offset
(
output
,
out_offset
)]
+=
in_data
[
offset
(
input
,
in_offset
)]
*
w_data
[
offset
(
filter
,
weight_offset
)];
}
}
}
}
}
}
}
}
}
}
}
}
template
<
typename
Itype
,
typename
Otype
,
int
Kernel
,
int
Pad
,
int
Stride
>
int
TestConvOp
()
{
int
kernel_h
=
Kernel
;
int
kernel_w
=
Kernel
;
int
pad_h
=
Pad
;
int
pad_w
=
Pad
;
int
stride_h
=
Stride
;
int
stride_w
=
Stride
;
int
dilation_h
=
1
;
int
dilation_w
=
1
;
int
batch_size
=
1
;
int
input_c
=
3
;
int
input_h
=
100
;
int
input_w
=
100
;
int
output_c
=
10
;
framework
::
DDim
input_shape
=
framework
::
make_ddim
({
batch_size
,
input_c
,
input_h
,
input_w
});
framework
::
DDim
filter_shape
=
framework
::
make_ddim
({
output_c
,
input_c
,
kernel_h
,
kernel_w
});
VariableNameMap
inputs
;
VariableNameMap
outputs
;
auto
scope
=
std
::
make_shared
<
framework
::
Scope
>
();
inputs
[
"Input"
]
=
std
::
vector
<
std
::
string
>
({
"input"
});
inputs
[
"Filter"
]
=
std
::
vector
<
std
::
string
>
({
"filter"
});
outputs
[
"Output"
]
=
std
::
vector
<
std
::
string
>
({
"output"
});
auto
input_var
=
scope
.
get
()
->
Var
(
"input"
);
auto
input
=
input_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
Itype
>
(
input
,
input_shape
,
-
20
,
20
);
auto
filter_var
=
scope
.
get
()
->
Var
(
"filter"
);
auto
filter
=
filter_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
Itype
>
(
filter
,
filter_shape
,
-
20
,
20
);
auto
output_var
=
scope
.
get
()
->
Var
(
"output"
);
framework
::
AttributeMap
attrs
;
attrs
[
"strides"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
stride_h
,
stride_w
}));
attrs
[
"paddings"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
pad_h
,
pad_w
}));
attrs
[
"dilations"
].
Set
<
vector
<
int
>>
(
std
::
vector
<
int
>
({
dilation_h
,
dilation_w
}));
attrs
[
"groups"
].
Set
<
int
>
(
1
);
auto
*
op
=
new
operators
::
ConvOp
<
CPU
,
float
>
(
"conv2d"
,
inputs
,
outputs
,
attrs
,
scope
);
// struct timespec ts_begin, ts_end;
op
->
InferShape
();
// warmup
// op->Run();
// clock_gettime(CLOCK_MONOTONIC, &ts_begin);
// for (int i = 0; i < 10; ++i) {
op
->
Run
();
// }
// clock_gettime(CLOCK_MONOTONIC, &ts_end);
// uint64_t elapsed = (ts_end.tv_sec - ts_begin.tv_sec) * 1e3 +
// (ts_end.tv_nsec - ts_begin.tv_nsec) / 1e6;
// LOG(kLOG_INFO) << "elapsed: " << elapsed / 10.0 << " ms";
int
kernel_extent_h
=
dilation_h
*
(
kernel_h
-
1
)
+
1
;
int
kernel_extent_w
=
dilation_w
*
(
kernel_w
-
1
)
+
1
;
int
output_h
=
(
input_h
+
2
*
pad_h
-
kernel_extent_h
)
/
stride_h
+
1
;
int
output_w
=
(
input_w
+
2
*
pad_w
-
kernel_extent_w
)
/
stride_w
+
1
;
auto
output_shape
=
framework
::
make_ddim
(
std
::
vector
<
int
>
({
batch_size
,
output_c
,
output_h
,
output_w
}));
framework
::
Tensor
output_cmp
;
output_cmp
.
mutable_data
<
Otype
>
(
output_shape
);
conv2d
<
Itype
,
Otype
>
(
input
,
filter
,
attrs
,
&
output_cmp
);
// compare results
auto
output
=
output_var
->
template
Get
<
framework
::
LoDTensor
>();
const
Otype
*
output_data
=
output
->
data
<
Otype
>
();
Otype
*
output_cmp_data
=
output_cmp
.
data
<
Otype
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
"output[%d] = %d, output_cmp[%d] = %d"
,
i
,
output_data
[
i
],
i
,
output_cmp_data
[
i
]);
}
delete
op
;
return
0
;
}
}
// namespace paddle_mobile
int
main
()
{
// kernel = 7, pad = 0, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=0, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
0
,
2
>
();
// kernel = 7, pad = 1, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=1, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
1
,
2
>
();
// kernel = 7, pad = 3, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
3
,
2
>
();
// kernel = 7, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
0
,
1
>
();
// kernel = 7, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
1
,
1
>
();
// kernel = 7, pad = 3, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
3
,
1
>
();
// kernel = 7, pad = 5, stride = 3
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=5, stride=3"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
5
,
3
>
();
// kernel = 7, pad = 3, stride = 4
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=4"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
3
,
4
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 3, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=3, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
3
,
0
,
1
>
();
// kernel = 3, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=3, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
3
,
0
,
1
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 3, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=3, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
3
,
1
,
1
>
();
// kernel = 3, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=3, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
3
,
1
,
1
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 5, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=5, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
5
,
0
,
1
>
();
// kernel = 5, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=5, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
5
,
0
,
1
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 5, pad = 2, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=5, pad=2, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
5
,
2
,
1
>
();
// kernel = 5, pad = 2, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=5, pad=2, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
5
,
2
,
1
>
();
}
test/operators/test_quantize_op.cpp
浏览文件 @
8e11ee09
...
@@ -18,14 +18,6 @@ limitations under the License. */
...
@@ -18,14 +18,6 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
// static float g_test_data[50] = {
// -5.55, -5.5, -5.45, -5.0, -4.55, -4.5, -4.45, -4.0, -3.55, -3.5,
// -3.45, -3.01, -2.75, -2.5, -2.501, -2.49, -2.01, -1.75, -1.5, -1.25,
// -1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 1.25,
// 1.5, 1.75, 2.01, 2.49, 2.501, 2.5, 2.75, 3.01, 3.45, 3.5,
// 3.55, 4.0, 4.45, 4.5, 4.55, 5.0, 5.45, 5.5, 5.55, 6.0,
// };
static
float
find_abs_max
(
const
Tensor
*
input
)
{
static
float
find_abs_max
(
const
Tensor
*
input
)
{
float
max_abs
=
0.
f
;
float
max_abs
=
0.
f
;
const
float
*
x
=
input
->
data
<
const
float
>
();
const
float
*
x
=
input
->
data
<
const
float
>
();
...
@@ -60,6 +52,16 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
...
@@ -60,6 +52,16 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
}
}
}
}
static
void
quantize_round_to_nearest
(
const
Tensor
*
input
,
const
float
scale
,
Tensor
*
output
)
{
const
float
*
x
=
input
->
data
<
const
float
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
size_t
size
=
input
->
numel
();
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
y
[
i
]
=
round
(
x
[
i
]
*
scale
);
}
}
int
TestQuqntizeOp
()
{
int
TestQuqntizeOp
()
{
framework
::
DDim
dim
=
framework
::
make_ddim
({
1
,
3
,
224
,
224
});
framework
::
DDim
dim
=
framework
::
make_ddim
({
1
,
3
,
224
,
224
});
...
@@ -88,15 +90,16 @@ int TestQuqntizeOp() {
...
@@ -88,15 +90,16 @@ int TestQuqntizeOp() {
auto
output_scale
=
output_scale_var
->
template
Get
<
framework
::
LoDTensor
>();
auto
output_scale
=
output_scale_var
->
template
Get
<
framework
::
LoDTensor
>();
const
float
*
output_scale_data
=
output_scale
->
data
<
float
>
();
const
float
*
output_scale_data
=
output_scale
->
data
<
float
>
();
float
max_abs
=
find_abs_max
(
input
);
float
output_scale_cmp
=
find_abs_max
(
input
);
float
output_scale_cmp
=
127
/
max_abs
;
PADDLE_MOBILE_ENFORCE
(
output_scale_cmp
==
output_scale_data
[
0
],
PADDLE_MOBILE_ENFORCE
(
output_scale_cmp
==
output_scale_data
[
0
],
"output_scale = %.6f, output_scale_cmp = %.6f"
,
"output_scale = %.6f, output_scale_cmp = %.6f"
,
output_scale_cmp
,
output_scale_data
[
0
]);
output_scale_cmp
,
output_scale_data
[
0
]);
framework
::
Tensor
output_cmp
;
framework
::
Tensor
output_cmp
;
output_cmp
.
Resize
(
dim
);
output_cmp
.
Resize
(
dim
);
quantize_round_to_even
(
input
,
output_scale_cmp
,
&
output_cmp
);
float
scale
=
127
/
output_scale_cmp
;
// quantize_round_to_even(input, scale, &output_cmp);
quantize_round_to_nearest
(
input
,
scale
,
&
output_cmp
);
int8_t
*
output_cmp_data
=
output_cmp
.
data
<
int8_t
>
();
int8_t
*
output_cmp_data
=
output_cmp
.
data
<
int8_t
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
...
...
tools/op.cmake
浏览文件 @
8e11ee09
...
@@ -224,6 +224,8 @@ if(NOT FOUND_MATCH)
...
@@ -224,6 +224,8 @@ if(NOT FOUND_MATCH)
set
(
SHAPE_OP ON
)
set
(
SHAPE_OP ON
)
set
(
ELEMENTWISEMUL_OP ON
)
set
(
ELEMENTWISEMUL_OP ON
)
set
(
SUM_OP ON
)
set
(
SUM_OP ON
)
set
(
QUANT_OP ON
)
set
(
DEQUANT_OP ON
)
endif
()
endif
()
# option(BATCHNORM_OP "" ON)
# option(BATCHNORM_OP "" ON)
...
@@ -411,3 +413,10 @@ if (SUM_OP)
...
@@ -411,3 +413,10 @@ if (SUM_OP)
add_definitions
(
-DSUM_OP
)
add_definitions
(
-DSUM_OP
)
endif
()
endif
()
if
(
QUANT_OP
)
add_definitions
(
-DQUANT_OP
)
endif
()
if
(
DEQUANT_OP
)
add_definitions
(
-DDEQUANT_OP
)
endif
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录