Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
7b3df2f0
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7b3df2f0
编写于
7月 23, 2018
作者:
D
dolphin8
提交者:
GitHub
7月 23, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into develop
上级
c0f0ec14
c1dc792d
变更
84
展开全部
隐藏空白更改
内联
并排
Showing
84 changed file
with
2475 addition
and
666 deletion
+2475
-666
CMakeLists.txt
CMakeLists.txt
+21
-7
README.md
README.md
+2
-2
src/common/types.cpp
src/common/types.cpp
+19
-1
src/common/types.h
src/common/types.h
+10
-0
src/common/variant.h
src/common/variant.h
+1
-1
src/fpga/api/fpga_api.cpp
src/fpga/api/fpga_api.cpp
+64
-0
src/fpga/api/fpga_api.h
src/fpga/api/fpga_api.h
+57
-0
src/framework/operator.cpp
src/framework/operator.cpp
+15
-0
src/framework/operator.h
src/framework/operator.h
+6
-1
src/framework/program/program-optimize/fusion_op_register.h
src/framework/program/program-optimize/fusion_op_register.h
+11
-2
src/framework/program/program-optimize/node.cpp
src/framework/program/program-optimize/node.cpp
+0
-17
src/framework/program/program-optimize/node.h
src/framework/program/program-optimize/node.h
+0
-3
src/framework/program/program-optimize/program_optimize.cpp
src/framework/program/program-optimize/program_optimize.cpp
+2
-3
src/framework/program/program.h
src/framework/program/program.h
+1
-0
src/io/executor.cpp
src/io/executor.cpp
+19
-5
src/io/loader.cpp
src/io/loader.cpp
+10
-6
src/io/loader.h
src/io/loader.h
+4
-1
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+5
-4
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+6
-2
src/memory/t_malloc.cpp
src/memory/t_malloc.cpp
+24
-0
src/operators/batchnorm_op.cpp
src/operators/batchnorm_op.cpp
+1
-1
src/operators/box_coder_op.cpp
src/operators/box_coder_op.cpp
+1
-1
src/operators/concat_op.cpp
src/operators/concat_op.cpp
+0
-1
src/operators/conv_op.cpp
src/operators/conv_op.cpp
+0
-2
src/operators/depthwise_conv_op.cpp
src/operators/depthwise_conv_op.cpp
+0
-2
src/operators/dropout_op.cpp
src/operators/dropout_op.cpp
+1
-1
src/operators/elementwise_add_op.cpp
src/operators/elementwise_add_op.cpp
+1
-1
src/operators/feed_op.cpp
src/operators/feed_op.cpp
+1
-4
src/operators/fetch_op.cpp
src/operators/fetch_op.cpp
+1
-4
src/operators/fusion_conv_add.cpp
src/operators/fusion_conv_add.cpp
+0
-1
src/operators/fusion_conv_add.h
src/operators/fusion_conv_add.h
+0
-2
src/operators/fusion_conv_add_bn_relu_op.cpp
src/operators/fusion_conv_add_bn_relu_op.cpp
+1
-1
src/operators/fusion_conv_add_bn_relu_op.h
src/operators/fusion_conv_add_bn_relu_op.h
+0
-2
src/operators/fusion_conv_bn_relu_op.cpp
src/operators/fusion_conv_bn_relu_op.cpp
+60
-0
src/operators/fusion_conv_bn_relu_op.h
src/operators/fusion_conv_bn_relu_op.h
+103
-0
src/operators/fusion_dwconv_bn_relu_op.cpp
src/operators/fusion_dwconv_bn_relu_op.cpp
+60
-0
src/operators/fusion_dwconv_bn_relu_op.h
src/operators/fusion_dwconv_bn_relu_op.h
+109
-0
src/operators/fusion_fc_op.cpp
src/operators/fusion_fc_op.cpp
+0
-1
src/operators/im2sequence_op.cpp
src/operators/im2sequence_op.cpp
+0
-2
src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp
src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp
+1
-1
src/operators/kernel/arm/conv_bn_relu_kernel.cpp
src/operators/kernel/arm/conv_bn_relu_kernel.cpp
+68
-0
src/operators/kernel/arm/dwconv_bn_relu_kernel.cpp
src/operators/kernel/arm/dwconv_bn_relu_kernel.cpp
+65
-0
src/operators/kernel/central-arm-func/batchnorm_arm_func.h
src/operators/kernel/central-arm-func/batchnorm_arm_func.h
+35
-1
src/operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h
...ators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h
+4
-7
src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h
...operators/kernel/central-arm-func/conv_bn_relu_arm_func.h
+139
-0
src/operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h
...erators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h
+137
-0
src/operators/kernel/central-arm-func/pool_arm_func.h
src/operators/kernel/central-arm-func/pool_arm_func.h
+7
-2
src/operators/kernel/central-arm-func/sigmoid_arm_func.h
src/operators/kernel/central-arm-func/sigmoid_arm_func.h
+1
-0
src/operators/kernel/conv_bn_relu_kernel.h
src/operators/kernel/conv_bn_relu_kernel.h
+45
-0
src/operators/kernel/dwconv_bn_relu_kernel.h
src/operators/kernel/dwconv_bn_relu_kernel.h
+45
-0
src/operators/lrn_op.cpp
src/operators/lrn_op.cpp
+1
-1
src/operators/math/depthwise_conv_3x3.cpp
src/operators/math/depthwise_conv_3x3.cpp
+29
-27
src/operators/math/gemm.cpp
src/operators/math/gemm.cpp
+784
-481
src/operators/math/gemm.h
src/operators/math/gemm.h
+6
-1
src/operators/math/im2col.cpp
src/operators/math/im2col.cpp
+2
-2
src/operators/math/math_function.cpp
src/operators/math/math_function.cpp
+3
-2
src/operators/math/math_function.h
src/operators/math/math_function.h
+2
-1
src/operators/math/pool_2x2.cpp
src/operators/math/pool_2x2.cpp
+11
-11
src/operators/math/pool_3x3.cpp
src/operators/math/pool_3x3.cpp
+19
-17
src/operators/math/softmax.cpp
src/operators/math/softmax.cpp
+1
-0
src/operators/mul_op.cpp
src/operators/mul_op.cpp
+1
-1
src/operators/multiclass_nms_op.cpp
src/operators/multiclass_nms_op.cpp
+1
-1
src/operators/op_param.h
src/operators/op_param.h
+160
-1
src/operators/pool_op.cpp
src/operators/pool_op.cpp
+1
-1
src/operators/prelu_op.cpp
src/operators/prelu_op.cpp
+1
-1
src/operators/prior_box_op.cpp
src/operators/prior_box_op.cpp
+1
-1
src/operators/relu_op.cpp
src/operators/relu_op.cpp
+1
-1
src/operators/reshape_op.cpp
src/operators/reshape_op.cpp
+1
-1
src/operators/resize_op.cpp
src/operators/resize_op.cpp
+1
-1
src/operators/scale_op.cpp
src/operators/scale_op.cpp
+1
-1
src/operators/sigmoid_op.cpp
src/operators/sigmoid_op.cpp
+1
-1
src/operators/slice_op.cpp
src/operators/slice_op.cpp
+1
-1
src/operators/softmax_op.cpp
src/operators/softmax_op.cpp
+1
-1
src/operators/transpose_op.cpp
src/operators/transpose_op.cpp
+1
-1
test/framework/test_load.cpp
test/framework/test_load.cpp
+3
-1
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+1
-1
test/net/test_mobilenet+ssd.cpp
test/net/test_mobilenet+ssd.cpp
+9
-4
test/net/test_mobilenet.cpp
test/net/test_mobilenet.cpp
+4
-3
test/test_helper.h
test/test_helper.h
+7
-3
tools/build.sh
tools/build.sh
+10
-4
tools/ios-cmake/ios.toolchain.cmake
tools/ios-cmake/ios.toolchain.cmake
+0
-1
tools/op.cmake
tools/op.cmake
+40
-0
tools/quantification/CMakeLists.txt
tools/quantification/CMakeLists.txt
+5
-0
tools/quantification/convert.cpp
tools/quantification/convert.cpp
+202
-0
未找到文件。
CMakeLists.txt
浏览文件 @
7b3df2f0
cmake_minimum_required
(
VERSION 3.
0
)
cmake_minimum_required
(
VERSION 3.
6
)
project
(
paddle-mobile
)
option
(
DEBUGING
"enable debug mode"
ON
)
option
(
USE_OPENMP
"openmp support"
O
N
)
option
(
USE_OPENMP
"openmp support"
O
FF
)
option
(
USE_EXCEPTION
"use std exception"
ON
)
option
(
LOG_PROFILE
"log profile"
ON
)
# select the platform to build
option
(
CPU
"armv7 with neon"
ON
)
option
(
MALI_GPU
"mali gpu"
OFF
)
option
(
FPGA
"fpga"
OFF
)
option
(
QUANTI
"quantification"
OFF
)
file
(
GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm
)
file
(
GLOB_RECURSE PADDLE_MOBILE_H src/*.h
)
include_directories
(
src/
)
if
(
IS_IOS
)
set
(
CMAKE_CXX_FLAGS
"-fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot
${
CMAKE_OSX_SYSROOT
}
${
CMAKE_CXX_FLAGS
}
"
)
set
(
CMAKE_CXX_FLAGS
"-
mfpu=neon -marm -
fobjc-abi-version=2 -fobjc-arc -std=gnu++11 -stdlib=libc++ -O3 -s -isysroot
${
CMAKE_OSX_SYSROOT
}
${
CMAKE_CXX_FLAGS
}
"
)
else
()
set
(
CMAKE_CXX_FLAGS
"-std=c++14 -O3 -s
${
CMAKE_CXX_FLAGS
}
"
)
endif
()
...
...
@@ -43,7 +44,7 @@ if (LOG_PROFILE)
add_definitions
(
-DPADDLE_MOBILE_PROFILE
)
endif
()
if
(
USE_OPENMP
)
if
(
USE_OPENMP
AND NOT IS_IOS
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fopenmp"
)
add_definitions
(
-DPADDLE_MOBILE_USE_OPENMP
)
endif
()
...
...
@@ -104,12 +105,21 @@ else()
foreach
(
f
${
_tmp_list_h
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
f
}
)
endforeach
()
endif
()
file
(
GLOB_RECURSE _tmp_list src/fpga/*.cpp src/fpga/*.cc
)
foreach
(
f
${
_tmp_list
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
f
}
)
endforeach
()
file
(
GLOB_RECURSE _tmp_list_h src/fpga/*.h
)
foreach
(
f
${
_tmp_list_h
}
)
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
f
}
)
endforeach
()
endif
()
if
(
ANDROID_NDK_TOOLCHAIN_INCLUDED
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-llog"
)
add_definitions
(
-DARMV7
)
else
()
list
(
REMOVE_ITEM PADDLE_MOBILE_H
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/jni/paddle_mobile_jni.h
)
list
(
REMOVE_ITEM PADDLE_MOBILE_CC
${
CMAKE_CURRENT_SOURCE_DIR
}
/src/jni/paddle_mobile_jni.cpp
)
...
...
@@ -131,7 +141,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default
set
(
NET
"defult"
CACHE STRING
"select net type"
)
set_property
(
CACHE NET PROPERTY STRINGS
"defult"
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
)
set_property
(
CACHE NET PROPERTY STRINGS
"defult"
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
"FPGAnets"
)
include
(
"
${
CMAKE_CURRENT_LIST_DIR
}
/tools/op.cmake"
)
...
...
@@ -153,3 +163,7 @@ if(DEBUGING)
endif
()
endif
()
if
(
QUANTI
)
add_subdirectory
(
tools/quantification
)
endif
()
README.md
浏览文件 @
7b3df2f0
...
...
@@ -27,10 +27,10 @@ Paddle-Moible是PaddlePaddle组织下的项目,是一个致力于嵌入式平
-
**ARM CPU**
![](
http://
7xop3k.com1.z0.glb.clouddn.com/15312108766575.jp
g
)
![](
http://
mms-graph.bj.bcebos.com/paddle-mobile%2F2018_07_18.pn
g
)
arm cpu是paddle-mobile的主要支持方向,cpu的通用性一直是其优势。嵌入式深度学习,需要大量的cpu汇编实现。我们正在紧锣密鼓的编码,为的是能充分硬件的每一点加速能力。
arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是1
2
0+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
arm cpu的优化工作还在进行中,现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是1
1
0+ms,显然这不是我们的最终目标,我们正在用大量的汇编改写,后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
-
**Mali GPU**
...
...
src/common/types.cpp
浏览文件 @
7b3df2f0
...
...
@@ -24,6 +24,8 @@ const std::string G_OP_TYPE_CONCAT = "concat";
const
std
::
string
G_OP_TYPE_ELEMENTWISE_ADD
=
"elementwise_add"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_ADD_RELU
=
"fusion_conv_add_relu"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_ADD_BN_RELU
=
"fusion_conv_add_bn_relu"
;
const
std
::
string
G_OP_TYPE_FUSION_DWCONV_BN_RELU
=
"fusion_dwconv_bn_relu"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_BN_RELU
=
"fusion_conv_bn_relu"
;
const
std
::
string
G_OP_TYPE_FC
=
"fusion_fc"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_ADD
=
"fusion_conv_add"
;
const
std
::
string
G_OP_TYPE_LRN
=
"lrn"
;
...
...
@@ -42,11 +44,21 @@ const std::string G_OP_TYPE_FETCH = "fetch";
const
std
::
string
G_OP_TYPE_DEPTHWISE_CONV
=
"depthwise_conv2d"
;
const
std
::
string
G_OP_TYPE_IM2SEQUENCE
=
"im2sequence"
;
const
std
::
string
G_OP_TYPE_DROPOUT
=
"dropout"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_RELU
=
"fusion_conv_relu"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_BN_SCALE
=
"fusion_conv_bn_scale"
;
const
std
::
string
G_OP_TYPE_FUSION_CONV_BN_SCALE_RELU
=
"fusion_conv_bn_scale_relu"
;
const
std
::
string
G_OP_TYPE_FUSION_POOL_BN
=
"fusion_pool_bn"
;
const
std
::
string
G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU
=
"fusion_elementwise_add_relu"
;
const
std
::
string
G_OP_TYPE_REGION
=
"region"
;
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
op_input_output_key
=
{
{
G_OP_TYPE_CONV
,
{{
"Input"
},
{
"Output"
}}},
{
G_OP_TYPE_FUSION_DWCONV_BN_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_BN_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_RELU
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_SOFTMAX
,
{{
"X"
},
{
"Out"
}}},
...
...
@@ -70,6 +82,12 @@ std::unordered_map<
{
G_OP_TYPE_DEPTHWISE_CONV
,
{{
"Input"
},
{
"Output"
}}},
{
G_OP_TYPE_FUSION_CONV_ADD_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_IM2SEQUENCE
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_DROPOUT
,
{{
"X"
},
{
"Out"
}}}};
{
G_OP_TYPE_DROPOUT
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_BN_SCALE
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_CONV_BN_SCALE_RELU
,
{{
"Input"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_POOL_BN
,
{{
"X"
},
{
"Out"
}}},
{
G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU
,
{{
"X"
,
"Y"
},
{
"Out"
}}},
{
G_OP_TYPE_REGION
,
{{
"X"
},
{
"Out"
}}}};
}
// namespace paddle_mobile
src/common/types.h
浏览文件 @
7b3df2f0
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace
paddle_mobile
{
...
...
@@ -81,6 +82,8 @@ extern const std::string G_OP_TYPE_FUSION_CONV_ADD_RELU;
extern
const
std
::
string
G_OP_TYPE_FC
;
extern
const
std
::
string
G_OP_TYPE_FUSION_CONV_ADD
;
extern
const
std
::
string
G_OP_TYPE_FUSION_CONV_ADD_BN_RELU
;
extern
const
std
::
string
G_OP_TYPE_FUSION_DWCONV_BN_RELU
;
extern
const
std
::
string
G_OP_TYPE_FUSION_CONV_BN_RELU
;
extern
const
std
::
string
G_OP_TYPE_LRN
;
extern
const
std
::
string
G_OP_TYPE_MUL
;
...
...
@@ -99,6 +102,13 @@ extern const std::string G_OP_TYPE_DEPTHWISE_CONV;
extern
const
std
::
string
G_OP_TYPE_IM2SEQUENCE
;
extern
const
std
::
string
G_OP_TYPE_DROPOUT
;
extern
const
std
::
string
G_OP_TYPE_FUSION_CONV_RELU
;
extern
const
std
::
string
G_OP_TYPE_FUSION_CONV_BN_SCALE
;
extern
const
std
::
string
G_OP_TYPE_FUSION_CONV_BN_SCALE_RELU
;
extern
const
std
::
string
G_OP_TYPE_FUSION_POOL_BN
;
extern
const
std
::
string
G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU
;
extern
const
std
::
string
G_OP_TYPE_REGION
;
extern
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
vector
<
std
::
string
>
,
std
::
vector
<
std
::
string
>>>
op_input_output_key
;
...
...
src/common/variant.h
浏览文件 @
7b3df2f0
...
...
@@ -84,7 +84,7 @@ struct Variant {
if
(
type_id
==
typeid
(
T
).
hash_code
())
{
return
*
const_cast
<
T
*>
(
reinterpret_cast
<
const
T
*>
(
&
data
));
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" bad cast in variant
"
);
PADDLE_MOBILE_THROW_EXCEPTION
(
" bad cast in variant"
);
exit
(
0
);
}
}
...
...
src/fpga/api/fpga_api.cpp
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <cstring>
#include "fpga/api/fpga_api.h"
namespace
paddle
{
namespace
mobile
{
namespace
fpga
{
namespace
api
{
static
int
fd
=
-
1
;
static
const
char
*
device_path
=
"/dev/fpgadrv0"
;
static
inline
int
do_ioctl
(
int
req
,
void
*
arg
)
{
return
ioctl
(
req
,
arg
);
}
int
open_device
()
{
if
(
fd
==
-
1
)
{
fd
=
open
(
device_path
,
O_RDWR
);
}
return
fd
;
}
// memory management;
void
*
fpga_malloc
(
size_t
size
)
{
return
reinterpret_cast
<
(
void
*
)
>
mmap64
(
NULL
,
size
,
PROT_READ
|
PROT_WRITE
,
MAP_SHARED
,
fd
,
0
);
}
void
fpga_free
(
void
*
ptr
)
{
munmap
(
ptr
,
0
);
}
void
fpga_copy
(
void
*
dest
,
const
void
*
src
,
size_t
num
)
{
memcpy
(
dest
,
src
,
num
);
}
}
// namespace api
}
// namespace fpga
}
// namespace mobile
}
// namespace paddle
src/fpga/api/fpga_api.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstddef>
#include <iostream>
#include <limits>
// memory management;
namespace
paddle
{
namespace
mobile
{
namespace
fpga
{
namespace
api
{
int
open_device
();
int
close_device
();
void
*
fpga_malloc
(
size_t
size
);
void
fpga_free
(
void
*
ptr
);
void
fpga_copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
);
struct
CnnVersionArgs
{
void
*
buf
;
};
struct
QuantArgs
{
float
scale
;
};
struct
BatchNormalizationArgs
{
bool
enable
;
};
struct
ScaleArgs
{};
#define IOCTL_CNN_MAGIC 'CNN'
#define IOCTL_VERSION _IOW(IOCTL_CNN_MAGIC, 1, struct CnnVersionArgs)
#define IOCTL_GET_QUANT _IOW(IOCTL_CNN_MAGIC, 2, struct QuantArgs)
#define IOCTL_SET_QUANT _IOW(IOCTL_CNN_MAGIC, 3, struct QuantArgs)
}
// namespace api
}
// namespace fpga
}
// namespace mobile
}
// namespace paddle
src/framework/operator.cpp
浏览文件 @
7b3df2f0
...
...
@@ -28,6 +28,16 @@ vector<string> OperatorBase<Dtype>::GetOutKeys() const {
return
it
->
second
.
second
;
}
template
<
typename
Dtype
>
vector
<
string
>
OperatorBase
<
Dtype
>::
GetInputKeys
()
const
{
auto
it
=
op_input_output_key
.
find
(
type_
);
if
(
it
==
op_input_output_key
.
end
())
{
DLOG
<<
type_
<<
" has no outputs"
;
return
{};
}
return
it
->
second
.
first
;
}
template
<
typename
Dtype
>
OperatorBase
<
Dtype
>::
OperatorBase
(
const
std
::
string
&
type
,
const
VariableNameMap
&
inputs
,
...
...
@@ -49,6 +59,11 @@ template <typename Dtype>
void
OperatorBase
<
Dtype
>::
Run
()
const
{
RunImpl
();
#ifdef PADDLE_MOBILE_DEBUG
vector
<
string
>
input_keys
=
GetInputKeys
();
for
(
const
auto
key
:
input_keys
)
{
Tensor
*
input
=
GetVarValue
<
framework
::
LoDTensor
>
(
key
,
inputs_
,
*
scope_
);
DLOG
<<
type_
<<
" input- "
<<
key
<<
"="
<<
*
input
;
}
vector
<
string
>
output_keys
=
GetOutKeys
();
for
(
const
auto
key
:
output_keys
)
{
Tensor
*
out_
=
GetVarValue
<
framework
::
LoDTensor
>
(
key
,
outputs_
,
*
scope_
);
...
...
src/framework/operator.h
浏览文件 @
7b3df2f0
...
...
@@ -61,6 +61,7 @@ class OperatorBase {
virtual
~
OperatorBase
()
{}
void
Run
()
const
;
std
::
vector
<
string
>
GetOutKeys
()
const
;
std
::
vector
<
string
>
GetInputKeys
()
const
;
virtual
void
RunImpl
()
const
=
0
;
virtual
void
Init
()
=
0
;
...
...
@@ -118,6 +119,10 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
virtual
void
InferShape
()
const
=
0
;
void
Init
()
{
// for (auto i : this->inputs_) {
// DLOG << i.first;
// DLOG << i.second;
// }
PADDLE_MOBILE_ENFORCE
(
kernel_
.
Init
(
&
param_
),
" %s kernel init failed"
,
this
->
type_
.
c_str
());
}
...
...
@@ -146,7 +151,7 @@ class OpKernelBase {
}
#endif
virtual
void
Compute
(
const
P
&
para
)
const
=
0
;
virtual
bool
Init
(
P
*
para
)
{
return
true
;
}
;
virtual
bool
Init
(
P
*
para
)
{
return
true
;
}
virtual
~
OpKernelBase
()
=
default
;
private:
...
...
src/framework/program/program-optimize/fusion_op_register.h
浏览文件 @
7b3df2f0
...
...
@@ -42,8 +42,17 @@ class FusionOpRegister {
matchers_
[
matcher
->
Type
()]
=
shared_matcher
;
}
const
std
::
map
<
std
::
string
,
std
::
shared_ptr
<
FusionOpMatcher
>>
Matchers
()
{
return
matchers_
;
const
std
::
vector
<
std
::
shared_ptr
<
FusionOpMatcher
>>
Matchers
()
{
std
::
vector
<
std
::
shared_ptr
<
FusionOpMatcher
>>
matchers
;
for
(
const
auto
&
match
:
matchers_
)
{
matchers
.
push_back
(
match
.
second
);
}
std
::
sort
(
matchers
.
begin
(),
matchers
.
end
(),
[](
std
::
shared_ptr
<
FusionOpMatcher
>
first
,
std
::
shared_ptr
<
FusionOpMatcher
>
second
)
{
return
first
->
BeginNode
().
Depth
()
>
second
->
BeginNode
().
Depth
();
});
return
matchers
;
}
private:
...
...
src/framework/program/program-optimize/node.cpp
浏览文件 @
7b3df2f0
...
...
@@ -44,23 +44,6 @@ bool Node::operator==(const Node &in) {
return
true
;
}
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
Node
::
OpDescs
(
int
size
)
{
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
op_descs
;
OpDescs
(
size
-
1
,
&
op_descs
);
return
op_descs
;
}
void
Node
::
OpDescs
(
int
index
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_desc
)
{
if
(
index
==
0
)
{
return
;
}
op_desc
->
push_back
(
this
->
op_desc_
);
for
(
auto
&
output
:
outputs_
)
{
output
->
OpDescs
(
index
,
op_desc
);
}
}
std
::
shared_ptr
<
Node
>
Node
::
To
(
int
size
)
{
std
::
shared_ptr
<
Node
>
node
=
std
::
make_shared
<
Node
>
();
this
->
To
(
size
-
1
,
node
);
...
...
src/framework/program/program-optimize/node.h
浏览文件 @
7b3df2f0
...
...
@@ -47,13 +47,10 @@ class Node {
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>>
change
,
std
::
vector
<
std
::
shared_ptr
<
Node
>>
*
removed_nodes
);
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
OpDescs
(
int
size
);
std
::
shared_ptr
<
framework
::
OpDesc
>
OpDescOfNode
()
{
return
op_desc_
;
}
std
::
string
Type
()
{
return
type_
;
}
private:
void
OpDescs
(
int
size
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_desc
);
void
To
(
int
index
,
std
::
shared_ptr
<
Node
>
);
void
Folder
(
std
::
shared_ptr
<
framework
::
OpDesc
>
op_desc
,
...
...
src/framework/program/program-optimize/program_optimize.cpp
浏览文件 @
7b3df2f0
...
...
@@ -78,9 +78,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
}
for
(
auto
&
registed
:
FusionOpRegister
::
Instance
()
->
Matchers
())
{
std
::
string
fusion_type
=
registed
.
first
;
std
::
shared_ptr
<
FusionOpMatcher
>
matcher
=
registed
.
second
;
// DLOG << " registed node \n " << matcher->BeginNode();
std
::
string
fusion_type
=
registed
->
Type
();
std
::
shared_ptr
<
FusionOpMatcher
>
matcher
=
registed
;
auto
match_vector
=
type_map
[
matcher
->
BeginType
()];
...
...
src/framework/program/program.h
浏览文件 @
7b3df2f0
...
...
@@ -30,6 +30,7 @@ class Program {
std
::
string
model_path
;
std
::
string
para_path
;
bool
combined
=
false
;
bool
quantification
=
false
;
private:
};
...
...
src/io/executor.cpp
浏览文件 @
7b3df2f0
...
...
@@ -154,7 +154,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
tensor
->
Resize
(
framework
::
make_ddim
(
desc
.
Dims
()));
void
*
memory
=
tenso
r
;
void
*
memory
=
nullpt
r
;
int
type_size
=
0
;
switch
(
desc
.
DataType
())
{
case
framework
::
VARTYPE_TYPE_FP16
:
...
...
@@ -179,11 +179,25 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
default:
break
;
}
for
(
int
n
=
0
;
n
<
memory_size
*
type_size
;
++
n
)
{
static_cast
<
char
*>
(
memory
)[
n
]
=
(
*
data
)[
n
];
if
(
program_
.
quantification
)
{
float
min_value
;
float
max_value
;
memcpy
(
&
min_value
,
*
data
,
sizeof
(
float
));
memcpy
(
&
max_value
,
*
data
+
sizeof
(
float
),
sizeof
(
float
));
*
data
+=
2
*
sizeof
(
float
);
const
float
factor
=
(
max_value
-
min_value
)
/
255.0
;
uint8_t
*
uint8_data
=
(
uint8_t
*
)(
*
data
);
for
(
int
k
=
0
;
k
<
memory_size
;
++
k
)
{
static_cast
<
float
*>
(
memory
)[
k
]
=
uint8_data
[
k
]
*
factor
+
min_value
;
}
*
data
+=
(
memory_size
*
sizeof
(
uint8_t
));
}
else
{
for
(
int
n
=
0
;
n
<
memory_size
*
type_size
;
++
n
)
{
static_cast
<
char
*>
(
memory
)[
n
]
=
(
*
data
)[
n
];
}
(
*
data
)
+=
(
sizeof
(
char
)
*
memory_size
*
type_size
);
}
(
*
data
)
+=
(
sizeof
(
char
)
*
memory_size
*
type_size
);
}
template
<
typename
Dtype
,
Precision
P
>
...
...
src/io/loader.cpp
浏览文件 @
7b3df2f0
...
...
@@ -44,26 +44,29 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
template
<
typename
Dtype
,
Precision
P
>
const
framework
::
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimize
,
bool
can_add_split
)
{
auto
program
=
this
->
LoadProgram
(
dirname
+
"/__model__"
,
optimize
,
can_add_split
);
const
std
::
string
&
dirname
,
bool
optimize
,
bool
quantification
,
bool
can_add_split
)
{
auto
program
=
this
->
LoadProgram
(
dirname
+
"/__model__"
,
optimize
,
quantification
,
can_add_split
);
program
.
model_path
=
dirname
;
return
program
;
}
template
<
typename
Dtype
,
Precision
P
>
const
framework
::
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
)
{
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
,
bool
quantification
)
{
auto
program
=
this
->
LoadProgram
(
model_path
,
optimize
);
program
.
para_path
=
para_path
;
program
.
combined
=
true
;
program
.
quantification
=
quantification
;
return
program
;
}
template
<
typename
Dtype
,
Precision
P
>
const
framework
::
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
LoadProgram
(
const
std
::
string
&
model_path
,
bool
optimize
,
bool
can_add_split
)
{
const
std
::
string
&
model_path
,
bool
optimize
,
bool
quantification
,
bool
can_add_split
)
{
std
::
string
model_filename
=
model_path
;
PaddleMobile__Framework__Proto__ProgramDesc
*
c_program
;
uint8_t
*
buf
=
NULL
;
...
...
@@ -82,6 +85,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
framework
::
Program
<
Dtype
,
P
>
program
;
program
.
originProgram
=
originProgramDesc
;
program
.
quantification
=
quantification
;
auto
scope
=
std
::
make_shared
<
framework
::
Scope
>
();
program
.
scope
=
scope
;
...
...
src/io/loader.h
浏览文件 @
7b3df2f0
...
...
@@ -30,6 +30,7 @@ class Loader {
* */
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
/*
...
...
@@ -38,11 +39,13 @@ class Loader {
* */
const
framework
::
Program
<
Dtype
,
P
>
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
=
false
);
bool
optimize
=
false
,
bool
quantification
=
false
);
private:
const
framework
::
Program
<
Dtype
,
P
>
LoadProgram
(
const
std
::
string
&
model_path
,
bool
optimize
=
false
,
bool
quantification
=
false
,
bool
can_add_split
=
false
);
};
...
...
src/io/paddle_mobile.cpp
浏览文件 @
7b3df2f0
...
...
@@ -26,7 +26,7 @@ void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
template
<
typename
Dtype
,
Precision
P
>
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
dirname
,
bool
optimize
,
int
batch_size
)
{
bool
quantification
,
int
batch_size
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
Loader
<
Dtype
,
P
>>
();
}
else
{
...
...
@@ -35,7 +35,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
Executor
<
Dtype
,
P
>>
(
loader_
->
Load
(
dirname
,
optimize
),
batch_size
,
optimize
);
loader_
->
Load
(
dirname
,
optimize
,
quantification
),
batch_size
,
optimize
);
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
...
...
@@ -46,7 +46,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
template
<
typename
Dtype
,
Precision
P
>
bool
PaddleMobile
<
Dtype
,
P
>::
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
,
int
batch_size
)
{
bool
quantification
,
int
batch_size
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
Loader
<
Dtype
,
P
>>
();
}
else
{
...
...
@@ -55,7 +55,8 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
Executor
<
Dtype
,
P
>>
(
loader_
->
Load
(
model_path
,
para_path
,
optimize
),
batch_size
,
optimize
);
loader_
->
Load
(
model_path
,
para_path
,
optimize
,
quantification
),
batch_size
,
optimize
);
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
...
...
src/io/paddle_mobile.h
浏览文件 @
7b3df2f0
...
...
@@ -39,14 +39,18 @@ class PaddleMobile {
* @b 加载分开形式的 fluid 模型
* */
bool
Load
(
const
std
::
string
&
dirname
,
bool
optimize
=
false
,
int
batch_size
=
1
);
bool
quantification
=
false
,
int
batch_size
=
1
);
/*
* @b load combine format fluid mode
* @b 加载结合在一起格式的模型
* */
bool
Load
(
const
std
::
string
&
model_path
,
const
std
::
string
&
para_path
,
bool
optimize
=
false
,
int
batch_size
=
1
);
bool
optimize
=
false
,
bool
quantification
=
false
,
int
batch_size
=
1
);
/*
* @b 设置线程数, 当 cmake 中开启 openmp 时生效
* */
void
SetThreadNum
(
int
num
);
/*
...
...
src/memory/t_malloc.cpp
浏览文件 @
7b3df2f0
...
...
@@ -16,10 +16,32 @@ limitations under the License. */
#include <cstdlib>
#include <cstring>
#ifdef PADDLE_MOBILE_FPGA
#include "fpga/api/fpga_api.h"
#endif
namespace
paddle_mobile
{
namespace
memory
{
const
int
MALLOC_ALIGN
=
64
;
#ifdef PADDLE_MOBILE_FPGA
namespace
api
=
paddle
::
mobile
::
fpga
::
api
;
void
Copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
void
*
Alloc
(
size_t
size
)
{
return
api
::
malloc
(
size
);
}
void
Free
(
void
*
ptr
)
{
if
(
ptr
)
{
api
::
fpga_free
(
ptr
);
}
}
#else
void
Copy
(
void
*
dst
,
const
void
*
src
,
size_t
num
)
{
std
::
memcpy
(
dst
,
src
,
num
);
}
...
...
@@ -42,5 +64,7 @@ void Free(void *ptr) {
}
}
#endif
}
// namespace memory
}
// namespace paddle_mobile
src/operators/batchnorm_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -26,7 +26,7 @@ void BatchNormOp<Dtype, T>::InferShape() const {
auto
x_dims
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
OutputY
()
->
Resize
(
x_dims
);
}
template
class
BatchNormOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/box_coder_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -47,7 +47,7 @@ void BoxCoderOp<Dtype, T>::InferShape() const {
this
->
param_
.
OutputBox
()
->
Resize
(
framework
::
make_ddim
(
{
input_targetbox_dims
[
0
],
input_priorbox_dims
[
0
],
4
}));
}
template
class
BoxCoderOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/concat_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -56,7 +56,6 @@ void ConcatOp<Dtype, T>::InferShape() const {
this
->
param_
.
Out
()
->
Resize
(
out_dims
);
}
template
class
ConcatOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/conv_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -48,8 +48,6 @@ void ConvOp<Dtype, T>::InferShape() const {
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
template
class
ConvOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/depthwise_conv_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -49,8 +49,6 @@ void DepthwiseConvOp<Dtype, T>::InferShape() const {
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
template
class
DepthwiseConvOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/dropout_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -22,7 +22,7 @@ void DropoutOp<Dtype, T>::InferShape() const {
auto
input_dims
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
input_dims
);
}
template
class
DropoutOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/elementwise_add_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -24,7 +24,7 @@ void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto
x_dim
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
x_dim
);
}
template
class
ElementwiseAddOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/feed_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -14,10 +14,7 @@ limitations under the License. */
#include "feed_op.h"
namespace
paddle_mobile
{
namespace
operators
{
template
class
FeedOp
<
CPU
,
float
>;
}
namespace
operators
{}
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
...
...
src/operators/fetch_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -14,10 +14,7 @@ limitations under the License. */
#include "fetch_op.h"
namespace
paddle_mobile
{
namespace
operators
{
template
class
FetchOp
<
CPU
,
float
>;
}
namespace
operators
{}
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
...
...
src/operators/fusion_conv_add.cpp
浏览文件 @
7b3df2f0
...
...
@@ -45,7 +45,6 @@ void FusionConvAddOp<Dtype, T>::InferShape() const {
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
template
class
FusionConvAddOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/fusion_conv_add.h
浏览文件 @
7b3df2f0
...
...
@@ -36,8 +36,6 @@ class FusionConvAddMatcher : public framework::FusionOpMatcher {
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
origin_descs
=
node
->
OpDescs
(
node_
.
Depth
());
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}}},
removed_nodes
);
}
...
...
src/operators/fusion_conv_add_bn_relu_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -44,7 +44,7 @@ void FusionConvAddBNReluOp<Dtype, T>::InferShape() const {
framework
::
DDim
ddim
=
framework
::
make_ddim
(
output_shape
);
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
template
class
FusionConvAddBNReluOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/fusion_conv_add_bn_relu_op.h
浏览文件 @
7b3df2f0
...
...
@@ -39,8 +39,6 @@ class FusionConvAddBNReluMatcher : public framework::FusionOpMatcher {
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
origin_descs
=
node
->
OpDescs
(
node_
.
Depth
());
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_ELEMENTWISE_ADD
,
{{
"Y"
,
"Y"
}}},
{
G_OP_TYPE_BATCHNORM
,
...
...
src/operators/fusion_conv_bn_relu_op.cpp
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/fusion_conv_bn_relu_op.h"
#include "operators/math/conv_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
Dtype
,
typename
T
>
void
FusionConvBNReluOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
in_dims
=
this
->
param_
.
Input
()
->
dims
();
auto
filter_dims
=
this
->
param_
.
Filter
()
->
dims
();
const
std
::
vector
<
int
>
&
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
int
groups
=
this
->
param_
.
Groups
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
PADDLE_MOBILE_ENFORCE
((
in_dims
.
size
()
==
filter_dims
.
size
()
&&
dilations
.
size
()
==
paddings
.
size
()
&&
paddings
.
size
()
==
strides
.
size
()),
"ConvParam is not suitable"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
(
math
::
ConvOutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
framework
::
DDim
ddim
=
framework
::
make_ddim
(
output_shape
);
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
}
// namespace operators
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_conv_bn_relu
,
ops
::
FusionConvBNReluOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/fusion_conv_bn_relu_op.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_bn_relu_kernel.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
using
std
::
vector
;
class
FusionConvBNReluMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionConvBNReluMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_CONV
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_BATCHNORM
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_BATCHNORM
,
{{
"Scale"
,
"Scale"
},
{
"Mean"
,
"Mean"
},
{
"Bias"
,
"Bias"
},
{
"Variance"
,
"Variance"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_CONV_BN_RELU
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionConvBNReluOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionConvBNReluParam
,
operators
::
ConvBNReluKernel
<
DeviceType
,
T
>>
{
public:
FusionConvBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionConvBNReluParam
,
operators
::
ConvBNReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
using
framework
::
OperatorWithKernel
<
DeviceType
,
FusionConvBNReluParam
,
operators
::
ConvBNReluKernel
<
DeviceType
,
T
>>::
OperatorWithKernel
;
void
InferShape
()
const
override
;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef FUSION_CONV_BN_RELU_REGISTER
static
framework
::
FusionOpRegistrar
fusion_conv_bn_relu_registrar
(
new
FusionConvBNReluMatcher
());
#define FUSION_CONV_BN_RELU_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
fusion_conv_bn_relu
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/fusion_dwconv_bn_relu_op.cpp
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP
#include "operators/fusion_dwconv_bn_relu_op.h"
#include "operators/math/conv_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
Dtype
,
typename
T
>
void
FusionDWConvBNReluOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
in_dims
=
this
->
param_
.
Input
()
->
dims
();
auto
filter_dims
=
this
->
param_
.
Filter
()
->
dims
();
const
std
::
vector
<
int
>
&
strides
=
this
->
param_
.
Strides
();
std
::
vector
<
int
>
paddings
=
this
->
param_
.
Paddings
();
int
groups
=
this
->
param_
.
Groups
();
std
::
vector
<
int
>
dilations
=
this
->
param_
.
Dilations
();
PADDLE_MOBILE_ENFORCE
((
in_dims
.
size
()
==
filter_dims
.
size
()
&&
dilations
.
size
()
==
paddings
.
size
()
&&
paddings
.
size
()
==
strides
.
size
()),
"ConvParam is not suitable"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
output_shape
.
push_back
(
math
::
ConvOutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
}
framework
::
DDim
ddim
=
framework
::
make_ddim
(
output_shape
);
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
}
// namespace operators
}
// namespace paddle_mobile
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU
(
fusion_dwconv_bn_relu
,
ops
::
FusionDWConvBNReluOp
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/fusion_dwconv_bn_relu_op.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP
#pragma once
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/dwconv_bn_relu_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
using
std
::
string
;
using
std
::
vector
;
class
FusionDWConvBNReluMatcher
:
public
framework
::
FusionOpMatcher
{
public:
FusionDWConvBNReluMatcher
()
{
node_
=
framework
::
Node
(
G_OP_TYPE_DEPTHWISE_CONV
);
node_
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_BATCHNORM
)
>
std
::
make_shared
<
framework
::
Node
>
(
G_OP_TYPE_RELU
);
}
void
FolderNodes
(
framework
::
Node
*
node
,
std
::
vector
<
std
::
shared_ptr
<
framework
::
Node
>>
*
removed_nodes
)
{
node
->
Folder
(
node_
.
Depth
(),
Type
(),
{{
G_OP_TYPE_BATCHNORM
,
{{
"Scale"
,
"Scale"
},
{
"Mean"
,
"Mean"
},
{
"Bias"
,
"Bias"
},
{
"Variance"
,
"Variance"
}}}},
removed_nodes
);
}
std
::
string
Type
()
{
return
G_OP_TYPE_FUSION_DWCONV_BN_RELU
;
}
};
template
<
typename
DeviceType
,
typename
T
>
class
FusionDWConvBNReluOp
:
public
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDWConvBNReluParam
,
operators
::
DWConvBNReluKernel
<
DeviceType
,
T
>>
{
public:
FusionDWConvBNReluOp
(
const
string
&
type
,
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
,
std
::
shared_ptr
<
framework
::
Scope
>
scope
)
:
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDWConvBNReluParam
,
operators
::
DWConvBNReluKernel
<
DeviceType
,
T
>>
(
type
,
inputs
,
outputs
,
attrs
,
scope
)
{}
using
framework
::
OperatorWithKernel
<
DeviceType
,
FusionDWConvBNReluParam
,
operators
::
DWConvBNReluKernel
<
DeviceType
,
T
>>::
OperatorWithKernel
;
void
InferShape
()
const
override
;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef FUSION_DWCONV_BN_RELU_REGISTER
static
framework
::
FusionOpRegistrar
fusion_dwconv_bn_relu_registrar
(
new
FusionDWConvBNReluMatcher
());
#define FUSION_DWCONV_BN_RELU_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#ifndef FUSION_DWCONV_BN_RELU_REGISTER
static
framework
::
FusionOpRegistrar
fusion_dwconv_bn_relu_registrar
(
new
FusionDWConvBNReluMatcher
());
#define FUSION_DWCONV_BN_RELU_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
}
// namespace operators
}
// namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU
(
fusion_dwconv_bn_relu
);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
src/operators/fusion_fc_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -50,7 +50,6 @@ void FusionFcOp<Dtype, T>::InferShape() const {
this
->
param_
.
Out
()
->
Resize
(
ddim
);
}
template
class
FusionFcOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/im2sequence_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -47,8 +47,6 @@ void Im2SequenceOp<Dtype, T>::InferShape() const {
this
->
param_
.
Output
()
->
Resize
(
ddim
);
}
template
class
Im2SequenceOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp
浏览文件 @
7b3df2f0
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "operators/kernel/central-arm-func/conv_add_bn_relu_func.h"
#include "operators/kernel/central-arm-func/conv_add_bn_relu_
arm_
func.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
src/operators/kernel/arm/conv_bn_relu_kernel.cpp
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#include "operators/kernel/conv_bn_relu_kernel.h"
#include "operators/kernel/central-arm-func/conv_bn_relu_arm_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ConvBNReluKernel
<
CPU
,
float
>::
Init
(
FusionConvBNReluParam
*
param
)
{
const
Tensor
*
mean
=
param
->
InputMean
();
const
Tensor
*
variance
=
param
->
InputVariance
();
const
Tensor
*
scale
=
param
->
InputScale
();
const
Tensor
*
bias
=
param
->
InputBias
();
const
float
epsilon
=
param
->
Epsilon
();
// DLOG << "variance: " << *variance;
auto
mean_ptr
=
mean
->
data
<
float
>
();
auto
variance_ptr
=
variance
->
data
<
float
>
();
auto
scale_ptr
=
scale
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
const
int
C
=
mean
->
numel
();
float
inv_std_ptr
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
C
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
C
});
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
new_scale_ptr
[
i
]
=
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
return
true
;
}
template
<
>
void
ConvBNReluKernel
<
CPU
,
float
>::
Compute
(
const
FusionConvBNReluParam
&
param
)
const
{
ConvBNReluCompute
<
float
>
(
param
);
}
template
class
ConvBNReluKernel
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/arm/dwconv_bn_relu_kernel.cpp
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP
#include "operators/kernel/dwconv_bn_relu_kernel.h"
#include "operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
DWConvBNReluKernel
<
CPU
,
float
>::
Init
(
FusionDWConvBNReluParam
*
param
)
{
const
Tensor
*
mean
=
param
->
InputMean
();
const
Tensor
*
variance
=
param
->
InputVariance
();
const
Tensor
*
scale
=
param
->
InputScale
();
const
Tensor
*
bias
=
param
->
InputBias
();
const
float
epsilon
=
param
->
Epsilon
();
auto
mean_ptr
=
mean
->
data
<
float
>
();
auto
variance_ptr
=
variance
->
data
<
float
>
();
auto
scale_ptr
=
scale
->
data
<
float
>
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
const
int
C
=
mean
->
numel
();
float
inv_std_ptr
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
C
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
C
});
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
new_scale_ptr
[
i
]
=
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
}
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
return
true
;
}
template
<
>
void
DWConvBNReluKernel
<
CPU
,
float
>::
Compute
(
const
FusionDWConvBNReluParam
&
param
)
const
{
DWConvBNReluCompute
<
float
>
(
param
);
}
template
class
DWConvBNReluKernel
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/central-arm-func/batchnorm_arm_func.h
浏览文件 @
7b3df2f0
...
...
@@ -54,7 +54,40 @@ void BatchnormCompute(const BatchNormParam ¶m) {
int
HXW
=
H
*
W
;
#ifdef ARMV7
#if __ARM_NEON
#if __aarch64__
float
*
inv_std_ptr
=
new
float
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
new_scale
;
auto
new_scale_ptr
=
new_scale
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
C
}));
Tensor
new_bias
;
auto
new_bias_ptr
=
new_bias
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
C
}));
/// ((x - est_mean) * (inv_var) * scale + bias equal to
/// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
new_scale_ptr
[
i
]
=
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
new_bias_ptr
[
i
]
=
bias_ptr
[
i
]
-
mean_ptr
[
i
]
*
inv_std_ptr
[
i
]
*
scale_ptr
[
i
];
{
for
(
int
n
=
0
;
n
<
N
;
n
++
)
{
for
(
int
h
=
0
;
h
<
H
;
h
++
)
{
int
tmp_index
=
n
*
stride0
+
i
*
stride1
+
h
*
stride2
;
for
(
int
w
=
0
;
w
<
W
;
w
++
)
{
int
index
=
tmp_index
+
w
;
out_ptr
[
index
]
=
input_x_ptr
[
index
]
*
new_scale_ptr
[
i
]
+
new_bias_ptr
[
i
];
}
}
}
}
}
delete
[]
inv_std_ptr
;
#else
if
(
HXW
>
32
)
{
int
NXC
=
N
*
C
;
float
*
inv_std_ptr
=
new
float
[
NXC
*
4
];
...
...
@@ -229,6 +262,7 @@ void BatchnormCompute(const BatchNormParam ¶m) {
delete
[]
inv_std_ptr
;
}
#endif
#else
float
*
inv_std_ptr
=
new
float
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
...
...
src/operators/kernel/central-arm-func/conv_add_bn_relu_func.h
→
src/operators/kernel/central-arm-func/conv_add_bn_relu_
arm_
func.h
浏览文件 @
7b3df2f0
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#pragma once
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/op_param.h"
...
...
@@ -23,14 +25,9 @@ namespace operators {
void
ConvAddBNReluBasic
(
const
FusionConvAddBNReluParam
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
bias
=
*
param
.
Bias
();
Tensor
new_bias
=
*
param
.
NewBias
();
Tensor
new_scale
=
*
param
.
NewScale
();
int
axis
=
param
.
Axis
();
Tensor
*
output
=
param
.
Output
();
math
::
expand_bias
(
bias
,
axis
,
output
->
dims
());
output
->
ShareDataWith
(
bias
);
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
...
...
@@ -107,7 +104,7 @@ void ConvAddBNReluBasic(const FusionConvAddBNReluParam ¶m) {
math
::
matmulWithBn
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
0
),
true
,
&
new_scale
,
&
new_bias
);
&
out_slice
,
static_cast
<
float
>
(
0
),
true
,
&
new_scale
,
&
new_bias
,
g
);
}
}
}
...
...
@@ -121,7 +118,7 @@ void ConvAddBNReluCompute(const FusionConvAddBNReluParam ¶m) {
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
1
)
{
math
::
DepthwiseConvAddBNRelu3x3s1p1
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
NewScale
(),
param
.
NewBias
(),
1
);
param
.
NewBias
(),
true
);
}
else
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
...
...
src/operators/kernel/central-arm-func/conv_bn_relu_arm_func.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVBNRELU_OP
#pragma once
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
void
ConvBNReluBasic
(
const
FusionConvBNReluParam
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
new_bias
=
*
param
.
NewBias
();
Tensor
new_scale
=
*
param
.
NewScale
();
Tensor
*
output
=
param
.
Output
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
size_t
data_dim
=
filter_shape_vec
.
size
()
-
2
;
std
::
vector
<
int64_t
>
col_shape_vec
(
1
+
2
*
data_dim
);
col_shape_vec
[
0
]
=
input
->
dims
()[
1
]
/
groups
;
for
(
size_t
j
=
0
;
j
<
data_dim
;
++
j
)
{
col_shape_vec
[
j
+
1
]
=
filter_shape_vec
[
j
+
2
];
col_shape_vec
[
j
+
1
+
data_dim
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_shape_vec
));
framework
::
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
data_dim
+
1
);
bool
is_expand
=
math
::
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
float
>
(
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
framework
::
DDim
filter_matrix_shape
=
{
filter
.
dims
()[
0
],
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
framework
::
DDim
output_matrix_shape
=
{
output
->
dims
()[
1
],
output
->
numel
()
/
(
output
->
dims
()[
0
]
*
output
->
dims
()[
1
])};
// convolution operator: im2col(or vol2col) + gemm
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
CPU
,
float
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
float
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
data_dim
==
2U
)
{
// im2col
im2col
(
in_slice
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
}
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmulWithBn
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
0
),
true
,
&
new_scale
,
&
new_bias
,
g
);
}
}
}
template
<
typename
P
>
void
ConvBNReluCompute
(
const
FusionConvBNReluParam
&
param
)
{
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
1
)
{
math
::
DepthwiseConvAddBNRelu3x3s1p1
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
NewScale
(),
param
.
NewBias
(),
true
);
}
else
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
2
)
{
// math::DepthwiseConvAddBNRelu3x3s2p1(param.Input(), param.Filter(),
// param.Output(), param.NewScale(),
// param.NewBias(), 1);
math
::
DepthwiseConvAddBNRelu3x3s2p1v2
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
NewScale
(),
param
.
NewBias
(),
true
);
}
else
{
ConvBNReluBasic
(
param
);
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/central-arm-func/dwconv_bn_relu_arm_func.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DWCONVBNRELU_OP
#pragma once
#include <vector>
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
void
DWConvBNReluBasic
(
const
FusionDWConvBNReluParam
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
new_bias
=
*
param
.
NewBias
();
Tensor
new_scale
=
*
param
.
NewScale
();
Tensor
*
output
=
param
.
Output
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
std
::
vector
<
int64_t
>
filter_shape_vec
(
framework
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
framework
::
vectorize
(
output
->
dims
()));
size_t
data_dim
=
filter_shape_vec
.
size
()
-
2
;
std
::
vector
<
int64_t
>
col_shape_vec
(
1
+
2
*
data_dim
);
col_shape_vec
[
0
]
=
input
->
dims
()[
1
]
/
groups
;
for
(
size_t
j
=
0
;
j
<
data_dim
;
++
j
)
{
col_shape_vec
[
j
+
1
]
=
filter_shape_vec
[
j
+
2
];
col_shape_vec
[
j
+
1
+
data_dim
]
=
output_shape_vec
[
j
+
2
];
}
framework
::
DDim
col_shape
(
framework
::
make_ddim
(
col_shape_vec
));
framework
::
DDim
col_matrix_shape
=
framework
::
flatten_to_2d
(
col_shape
,
data_dim
+
1
);
bool
is_expand
=
math
::
IsExpand
(
filter_shape_vec
,
strides
,
paddings
,
dilations
);
Tensor
col
;
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
float
>
(
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
framework
::
DDim
filter_matrix_shape
=
{
filter
.
dims
()[
0
],
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
framework
::
DDim
output_matrix_shape
=
{
output
->
dims
()[
1
],
output
->
numel
()
/
(
output
->
dims
()[
0
]
*
output
->
dims
()[
1
])};
// convolution operator: im2col(or vol2col) + gemm
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
CPU
,
float
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
float
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
if
(
!
is_expand
)
{
col
.
ShareDataWith
(
in_slice
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
else
if
(
data_dim
==
2U
)
{
// im2col
im2col
(
in_slice
,
dilations
,
strides
,
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
}
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmulWithBn
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
0
),
true
,
&
new_scale
,
&
new_bias
,
g
);
}
}
}
template
<
typename
P
>
void
DWConvBNReluCompute
(
const
FusionDWConvBNReluParam
&
param
)
{
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
1
)
{
math
::
DepthwiseConvAddBNRelu3x3s1p1
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
NewScale
(),
param
.
NewBias
(),
true
);
}
else
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
2
)
{
// math::DepthwiseConvAddBNRelu3x3s2p1(param.Input(), param.Filter(),
// param.Output(), param.NewScale(),
// param.NewBias(), 1);
math
::
DepthwiseConvAddBNRelu3x3s2p1v2
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
NewScale
(),
param
.
NewBias
(),
true
);
}
else
{
DWConvBNReluBasic
(
param
);
}
}
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/central-arm-func/pool_arm_func.h
浏览文件 @
7b3df2f0
...
...
@@ -76,15 +76,20 @@ void PoolCompute(const PoolParam ¶m) {
}
}
else
if
(
ksize
[
0
]
==
2
&&
ksize
[
0
]
==
ksize
[
1
])
{
#ifndef IOS
#if __ARM_NEON
#if __aarch64__
PoolBasic
(
pooling_type
,
ksize
,
strides
,
paddings
,
in_x
,
out
);
#else
if
(
pooling_type
==
"max"
)
{
math
::
Pool2x2Max
(
strides
,
paddings
,
in_x
,
out
);
}
else
if
(
pooling_type
==
"avg"
)
{
math
::
Pool2x2Avg
(
strides
,
paddings
,
in_x
,
out
);
}
#endif
#else
PoolBasic
(
pooling_type
,
ksize
,
strides
,
paddings
,
in_x
,
out
);
#endif
#endif // __ARM_NEON
}
else
{
PoolBasic
(
pooling_type
,
ksize
,
strides
,
paddings
,
in_x
,
out
);
}
...
...
src/operators/kernel/central-arm-func/sigmoid_arm_func.h
浏览文件 @
7b3df2f0
...
...
@@ -68,6 +68,7 @@ void sigmoid(const Tensor *X, Tensor *Y) {
input_outer_ptr
++
;
}
}
#else
#endif
}
...
...
src/operators/kernel/conv_bn_relu_kernel.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef FUSION_CONVBNRELU_OP
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
DDim
;
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
ConvBNReluKernel
:
public
OpKernelBase
<
DeviceType
,
FusionConvBNReluParam
>
{
public:
void
Compute
(
const
FusionConvBNReluParam
&
param
)
const
;
bool
Init
(
FusionConvBNReluParam
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/dwconv_bn_relu_kernel.h
0 → 100644
浏览文件 @
7b3df2f0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef FUSION_DWCONVBNRELU_OP
#include <vector>
#include "framework/ddim.h"
#include "framework/operator.h"
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
DDim
;
using
framework
::
OpKernelBase
;
template
<
typename
DeviceType
,
typename
T
>
class
DWConvBNReluKernel
:
public
OpKernelBase
<
DeviceType
,
FusionDWConvBNReluParam
>
{
public:
void
Compute
(
const
FusionDWConvBNReluParam
&
param
)
const
;
bool
Init
(
FusionDWConvBNReluParam
*
param
);
};
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/lrn_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -24,7 +24,7 @@ void LrnOp<Dtype, T>::InferShape() const {
auto
x_dims
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
x_dims
);
}
template
class
LrnOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/math/depthwise_conv_3x3.cpp
浏览文件 @
7b3df2f0
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/math/depthwise_conv_3x3.h"
#if
def
__ARM_NEON
#if __ARM_NEON
#include <arm_neon.h>
#endif
#include <vector>
...
...
@@ -23,7 +23,6 @@ namespace math {
void
DepthwiseConv3x3
(
const
Tensor
*
input
,
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
filter
,
Tensor
*
bias
,
Tensor
*
output
,
bool
if_bias
)
{
#ifdef __ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
...
...
@@ -181,7 +180,27 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
}
}
else
{
#if defined(ARMV17)
#if __ARM_NEON
#if __aarch64__
const
float32x4_t
data1
=
vld1q_f32
(
pos1
);
const
float32x4_t
data2
=
vld1q_f32
(
pos2
);
const
float32x4_t
data3
=
vld1q_f32
(
pos3
);
const
float32x4_t
v_filter1
=
vld1q_f32
(
filter1
);
const
float32x4_t
v_filter2
=
vld1q_f32
(
filter2
);
const
float32x4_t
v_filter3
=
vld1q_f32
(
filter3
);
float32x4_t
mula
=
vmulq_f32
(
data1
,
v_filter1
);
mula
=
vmlaq_f32
(
mula
,
data2
,
v_filter2
);
mula
=
vmlaq_f32
(
mula
,
data3
,
v_filter3
);
float32x2_t
res
=
vpadd_f32
(
vget_high_f32
(
vsetq_lane_f32
(
0
,
mula
,
3
)),
vget_low_f32
(
mula
));
res
=
vpadd_f32
(
res
,
res
);
if
(
if_bias
)
{
output_data
[
ph
*
output_width
+
pw
]
+=
vget_lane_f32
(
res
,
0
);
}
else
{
output_data
[
ph
*
output_width
+
pw
]
=
vget_lane_f32
(
res
,
0
);
}
#else
asm
volatile
(
"vld1.32 {q1}, [%[pos1]]
\n\t
"
...
...
@@ -209,26 +228,10 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
[
filter2
]
"r"
(
filter2
),
[
filter3
]
"r"
(
filter3
),
[
output_ptr
]
"r"
(
output_ptr
),
[
zero
]
"r"
(
zero
)
:
"memory"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6"
);
#endif // __aarch64__
#else
const
float32x4_t
data1
=
vld1q_f32
(
pos1
);
const
float32x4_t
data2
=
vld1q_f32
(
pos2
);
const
float32x4_t
data3
=
vld1q_f32
(
pos3
);
const
float32x4_t
v_filter1
=
vld1q_f32
(
filter1
);
const
float32x4_t
v_filter2
=
vld1q_f32
(
filter2
);
const
float32x4_t
v_filter3
=
vld1q_f32
(
filter3
);
float32x4_t
mula
=
vmulq_f32
(
data1
,
v_filter1
);
mula
=
vmlaq_f32
(
mula
,
data2
,
v_filter2
);
mula
=
vmlaq_f32
(
mula
,
data3
,
v_filter3
);
float32x2_t
res
=
vpadd_f32
(
vget_high_f32
(
vsetq_lane_f32
(
0
,
mula
,
3
)),
vget_low_f32
(
mula
));
res
=
vpadd_f32
(
res
,
res
);
if
(
if_bias
)
{
output_data
[
ph
*
output_width
+
pw
]
+=
vget_lane_f32
(
res
,
0
);
}
else
{
output_data
[
ph
*
output_width
+
pw
]
=
vget_lane_f32
(
res
,
0
);
}
#endif
#endif // __ARM_NEON
}
}
}
...
...
@@ -239,12 +242,11 @@ void DepthwiseConv3x3(const Tensor *input, vector<int> strides,
input_data
+=
input_batch_stride
;
output_data
+=
output_batch_stride
;
}
#endif
}
void
DepthwiseConv3x3s1p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
Tensor
*
bias
,
bool
if_bias
)
{
#if
def
__ARM_NEON
#if __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
...
@@ -520,7 +522,7 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
void
DepthwiseConvAddBNRelu3x3s1p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
const
Tensor
*
new_bias
,
bool
if_relu
)
{
#if
def
__ARM_NEON
#if __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
...
@@ -824,7 +826,7 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
void
DepthwiseConvAddBNRelu3x3s2p1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
const
Tensor
*
new_bias
,
bool
if_relu
)
{
#if
def
__ARM_NEON
#if __ARM_NEON
const
int
batch_size
=
input
->
dims
()[
0
];
...
...
@@ -1022,7 +1024,7 @@ void DepthwiseConvAddBNRelu3x3s2p1(const Tensor *input, const Tensor *filter,
void
DepthwiseConv3x3s2p1v2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
Tensor
bias
,
bool
if_bias
)
{
#if
def
__ARM_NEON
#if __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
...
@@ -1225,7 +1227,7 @@ void DepthwiseConv3x3s2p1v2(const Tensor *input, const Tensor *filter,
void
DepthwiseConvAddBNRelu3x3s2p1v2
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
Tensor
*
output
,
const
Tensor
*
new_scale
,
const
Tensor
*
new_bias
,
bool
if_relu
)
{
#if
def
__ARM_NEON
#if __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
data
<
float
>
();
...
...
src/operators/math/gemm.cpp
浏览文件 @
7b3df2f0
此差异已折叠。
点击以展开。
src/operators/math/gemm.h
浏览文件 @
7b3df2f0
...
...
@@ -28,6 +28,7 @@ namespace paddle_mobile {
namespace
operators
{
namespace
math
{
/*
// 将 A 矩阵分块复制到连续内存(ColMajor)
void PackMatrixA(int m, int k, int m_tail, const float *A, int lda,
float *buffer);
...
...
@@ -35,6 +36,7 @@ void PackMatrixA(int m, int k, int m_tail, const float *A, int lda,
// 将 B 矩阵分块复制到连续内存(ColMajor)
void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
float *buffer);
*/
// 将 A 矩阵分块复制到连续内存(RowMajor)
void
PackMatrixA_
(
int
m
,
int
k
,
int
m_tail
,
const
float
*
A
,
int
lda
,
...
...
@@ -51,7 +53,7 @@ void InnerKernel(int mc, int nc, float alpha, const float *a, const float *b,
void
InnerKernelWithBn
(
int
mc
,
int
nc
,
float
alpha
,
const
float
*
a
,
const
float
*
b
,
float
beta
,
float
*
c
,
float
*
C
,
int
ldc
,
bool
relu
,
float
*
new_scale
,
float
*
new_bias
);
/*
// 向量矩阵乘法 (M = 1)
void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
const float *B, int ldb, float beta, float *C, int ldc,
...
...
@@ -60,6 +62,7 @@ void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
void VectorKernelWithBn(int m, int n, int k, float alpha, const float *A,
int lda, const float *B, int ldb, float beta, float *C,
int ldc, bool relu, float *new_scale, float *new_bias);
*/
// 计算一个更小的 C 矩阵分块
void
AddDot4x4
(
int
k
,
const
float
*
a
,
const
float
*
b
,
float
*
c
,
int
ldc
);
...
...
@@ -81,6 +84,7 @@ void WriteWithBn(int mc, int nc, float *c, float *C, int ldc, float *new_scale,
void
WriteWithBnRelu
(
int
mc
,
int
nc
,
float
*
c
,
float
*
C
,
int
ldc
,
float
*
new_scale
,
float
*
new_bias
);
/*
// 向量矩阵乘法结果回写
// C = A * B
void VecWriteBasic(int n, float *c, float *C, int ldc);
...
...
@@ -96,6 +100,7 @@ void VecWriteWithBn(int n, float *c, float *C, int ldc, float *new_scale,
// C = A * B, batchnorm(C), relu(C)
void VecWriteWithBnRelu(int n, float *c, float *C, int ldc, float *new_scale,
float *new_bias);
*/
// 32位 float 矩阵乘法
void
Sgemm
(
int
m
,
int
n
,
int
k
,
float
alpha
,
const
float
*
A
,
int
lda
,
...
...
src/operators/math/im2col.cpp
浏览文件 @
7b3df2f0
...
...
@@ -15,7 +15,7 @@ limitations under the License. */
#include "operators/math/im2col.h"
#include <vector>
#ifdef __ARM_NEON
#include
"arm_neon.h"
#include
<arm_neon.h>
#endif
#include "common/types.h"
namespace
paddle_mobile
{
...
...
@@ -69,7 +69,7 @@ class Im2ColFunctor<ColFormat::kCFO, CPU, T> {
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
#if
def
__ARM_NEON
#if __ARM_NEON
const
int
osize
=
col_height
;
const
int
isize
=
im_height
;
bool
pad1
=
padding
[
0
]
>
0
;
...
...
src/operators/math/math_function.cpp
浏览文件 @
7b3df2f0
...
...
@@ -50,7 +50,7 @@ void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
float
alpha
,
framework
::
Tensor
*
matrix_out
,
float
beta
,
bool
relu
,
framework
::
Tensor
*
new_scale
,
framework
::
Tensor
*
new_bias
)
{
framework
::
Tensor
*
new_bias
,
int
group
)
{
auto
dim_a
=
matrix_a
.
dims
();
auto
dim_b
=
matrix_b
.
dims
();
auto
dim_out
=
matrix_out
->
dims
();
...
...
@@ -71,7 +71,8 @@ void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
SgemmWithBn
(
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
(),
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
,
relu
,
new_scale
->
data
<
float
>
(),
new_bias
->
data
<
float
>
());
new_scale
->
data
<
float
>
()
+
group
,
new_bias
->
data
<
float
>
()
+
group
);
}
}
// namespace math
...
...
src/operators/math/math_function.h
浏览文件 @
7b3df2f0
...
...
@@ -31,7 +31,8 @@ template <typename T>
void
matmulWithBn
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
T
alpha
,
framework
::
Tensor
*
matrix_out
,
T
beta
,
bool
relu
,
framework
::
Tensor
*
new_scale
,
framework
::
Tensor
*
new_bias
);
framework
::
Tensor
*
new_scale
,
framework
::
Tensor
*
new_bias
,
int
group
);
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
src/operators/math/pool_2x2.cpp
浏览文件 @
7b3df2f0
...
...
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#include "pool_2x2.h"
#include "operators/math/pool_2x2.h"
#include <algorithm>
#include <vector>
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -21,10 +23,10 @@ namespace math {
void
Pool2x2Max
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
Tensor
*
output
)
{
#ifdef __ARM_NEON
#ifdef ARMV7
#if __ARM_NEON
#if __aarch64__
#else
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
...
...
@@ -93,15 +95,16 @@ void Pool2x2Max(vector<int> strides, vector<int> paddings, const Tensor *input,
output_data
+=
output_batch_stride
;
}
#endif
#else
#endif
}
void
Pool2x2Avg
(
vector
<
int
>
strides
,
vector
<
int
>
paddings
,
const
Tensor
*
input
,
Tensor
*
output
)
{
#if
def
__ARM_NEON
#if __ARM_NEON
#ifdef ARMV7
#if __aarch64__
#else
const
int
batch_size
=
input
->
dims
()[
0
];
const
int
input_height
=
input
->
dims
()[
2
];
...
...
@@ -171,12 +174,9 @@ void Pool2x2Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
input_data
+=
input_batch_stride
;
output_data
+=
output_batch_stride
;
}
#else
// TODO(): to imp other asm
#endif
#else
#endif
}
...
...
src/operators/math/pool_3x3.cpp
浏览文件 @
7b3df2f0
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
#include <omp.h>
#endif
#include "framework/tensor.h"
#include "pool_3x3.h"
#include "
operators/math/
pool_3x3.h"
#if __ARM_NEON
#include <arm_neon.h>
#endif // __ARM_NEON
...
...
@@ -518,6 +518,8 @@ void Pool3x3Maxs1p1(const Tensor *input, Tensor *output) {
input_data
+=
input_batch_stride
;
out_data
+=
output_batch_stride
;
}
#else
#endif
}
...
...
@@ -582,7 +584,18 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
}
output_seg
[
ph
*
output_width
+
pw
]
=
max_value
;
}
else
{
#if defined(ARMV7)
#if __aarch64__
const
float32x4_t
data1
=
vld1q_f32
(
pos1
);
const
float32x4_t
data2
=
vld1q_f32
(
pos1
+
input_width
);
const
float32x4_t
data3
=
vld1q_f32
(
pos1
+
2
*
input_width
);
const
float32x4_t
max_data
=
vmaxq_f32
(
vmaxq_f32
(
data1
,
data2
),
data3
);
float32x2_t
res
=
vpmax_f32
(
vget_high_f32
(
vsetq_lane_f32
(
-
INT_MAX
,
max_data
,
3
)),
vget_low_f32
(
max_data
));
res
=
vpmax_f32
(
res
,
res
);
output_seg
[
ph
*
output_width
+
pw
]
=
vget_lane_f32
(
res
,
0
);
#else
asm
volatile
(
"vld1.32 {q1}, [%[pos1]]
\n\t
"
"vld1.32 {q2}, [%[pos2]]
\n\t
"
...
...
@@ -598,17 +611,6 @@ void Pool3x3Max(vector<int> strides, vector<int> paddings, const Tensor *input,
[
pos2
]
"r"
(
pos2
),
[
pos3
]
"r"
(
pos3
),
[
output_ptr
]
"r"
(
output_ptr
),
[
negative_max
]
"r"
(
negative_max
)
:
"memory"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
);
#else
const
float32x4_t
data1
=
vld1q_f32
(
pos1
);
const
float32x4_t
data2
=
vld1q_f32
(
pos1
+
input_width
);
const
float32x4_t
data3
=
vld1q_f32
(
pos1
+
2
*
input_width
);
const
float32x4_t
max_data
=
vmaxq_f32
(
vmaxq_f32
(
data1
,
data2
),
data3
);
float32x2_t
res
=
vpmax_f32
(
vget_high_f32
(
vsetq_lane_f32
(
-
INT_MAX
,
max_data
,
3
)),
vget_low_f32
(
max_data
));
res
=
vpmax_f32
(
res
,
res
);
output_seg
[
ph
*
output_width
+
pw
]
=
vget_lane_f32
(
res
,
0
);
#endif
}
}
...
...
@@ -676,8 +678,8 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
}
output_seg
[
ph
*
output_width
+
pw
]
=
sum
/
9.0
;
}
else
{
#if
defined(ARMV7)
#if
__aarch64__
#else
asm
volatile
(
"vld1.32 {q1}, [%[pos1]]
\n\t
"
"vld1.32 {q2}, [%[pos2]]
\n\t
"
...
...
@@ -696,7 +698,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
[
output_ptr
]
"r"
(
output_ptr
),
[
zero
]
"r"
(
zero
),
[
nine_ptr
]
"r"
(
nine_ptr
)
:
"memory"
,
"r6"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
);
#e
lse
#e
ndif
const
float32x4_t
data1
=
vld1q_f32
(
pos1
);
const
float32x4_t
data2
=
vld1q_f32
(
pos2
);
const
float32x4_t
data3
=
vld1q_f32
(
pos3
);
...
...
@@ -707,7 +709,6 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
vget_low_f32
(
sum_data
));
res
=
vpadd_f32
(
res
,
res
);
output_seg
[
ph
*
output_width
+
pw
]
=
vget_lane_f32
(
res
,
0
)
/
9.0
;
#endif
}
}
}
...
...
@@ -715,6 +716,7 @@ void Pool3x3Avg(vector<int> strides, vector<int> paddings, const Tensor *input,
input_data
+=
input_batch_stride
;
output_data
+=
output_batch_stride
;
}
#else
#endif
}
}
// namespace math
...
...
src/operators/math/softmax.cpp
浏览文件 @
7b3df2f0
...
...
@@ -135,6 +135,7 @@ class SoftmaxFuntor<CPU, T> {
}
}
}
#else
#endif // ARM_NEON
public:
...
...
src/operators/mul_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -50,7 +50,7 @@ void MulOp<Dtype, T>::InferShape() const {
framework
::
DDim
ddim
=
framework
::
make_ddim
(
output_dims
);
this
->
param_
.
Out
()
->
Resize
(
ddim
);
}
template
class
MulOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/multiclass_nms_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -34,7 +34,7 @@ void MultiClassNMSOp<Dtype, T>::InferShape() const {
// pre size, will change in Compute.
this
->
param_
.
Out
()
->
Resize
(
framework
::
make_ddim
({
input_bboxes_dims
[
1
],
6
}));
}
template
class
MultiClassNMSOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/op_param.h
浏览文件 @
7b3df2f0
...
...
@@ -371,7 +371,7 @@ class BatchNormParam : OpParam {
input_variance_
=
InputVarianceFrom
<
LoDTensor
>
(
inputs
,
scope
);
epsilon_
=
GetAttr
<
float
>
(
"epsilon"
,
attrs
);
momentum_
=
GetAttr
<
float
>
(
"momentum"
,
attrs
);
is_test_
=
GetAttr
<
bool
>
(
"is_test"
,
attrs
);
//
is_test_ = GetAttr<bool>("is_test", attrs);
}
const
Tensor
*
InputX
()
const
{
return
input_x_
;
}
...
...
@@ -1059,6 +1059,165 @@ class FusionConvAddBNReluParam : public OpParam {
Print
&
operator
<<
(
Print
&
printer
,
const
FusionConvAddParam
&
conv_param
);
#endif
#ifdef FUSION_DWCONVBNRELU_OP
class
FusionDWConvBNReluParam
:
public
OpParam
{
public:
FusionDWConvBNReluParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
filter_
=
FilterFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_
=
InputFrom
<
LoDTensor
>
(
inputs
,
scope
);
output_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
dilations_
=
GetAttr
<
vector
<
int
>>
(
"dilations"
,
attrs
);
groups
=
GetAttr
<
int
>
(
"groups"
,
attrs
);
input_bias_
=
InputBiasFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_mean_
=
InputMeanFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_scale_
=
InputScaleFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_variance_
=
InputVarianceFrom
<
LoDTensor
>
(
inputs
,
scope
);
epsilon_
=
GetAttr
<
float
>
(
"epsilon"
,
attrs
);
momentum_
=
GetAttr
<
float
>
(
"momentum"
,
attrs
);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const
Tensor
*
Input
()
const
{
return
input_
;
}
const
Tensor
*
Filter
()
const
{
return
filter_
;
}
Tensor
*
Output
()
const
{
return
output_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
const
vector
<
int
>
&
Paddings
()
const
{
return
paddings_
;
}
const
vector
<
int
>
&
Dilations
()
const
{
return
dilations_
;
}
const
int
&
Groups
()
const
{
return
groups
;
}
const
Tensor
*
InputBias
()
const
{
return
input_bias_
;
}
const
Tensor
*
InputMean
()
const
{
return
input_mean_
;
}
const
Tensor
*
InputScale
()
const
{
return
input_scale_
;
}
const
Tensor
*
InputVariance
()
const
{
return
input_variance_
;
}
const
float
&
Epsilon
()
const
{
return
epsilon_
;
}
const
float
&
Momentum
()
const
{
return
momentum_
;
}
const
bool
&
IsTest
()
const
{
return
is_test_
;
}
void
SetNewScale
(
Tensor
*
new_scale
)
{
new_scale_
=
new_scale
;
}
void
SetNewBias
(
Tensor
*
new_bias
)
{
new_bias_
=
new_bias
;
}
const
Tensor
*
NewScale
()
const
{
return
new_scale_
;
}
const
Tensor
*
NewBias
()
const
{
return
new_bias_
;
}
protected:
Tensor
*
input_
;
Tensor
*
output_
;
Tensor
*
filter_
;
vector
<
int
>
strides_
;
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
int
groups
;
Tensor
*
input_bias_
;
Tensor
*
input_mean_
;
Tensor
*
input_scale_
;
Tensor
*
input_variance_
;
float
epsilon_
;
float
momentum_
;
bool
is_test_
;
Tensor
*
new_bias_
;
Tensor
*
new_scale_
;
};
Print
&
operator
<<
(
Print
&
printer
,
const
FusionConvAddParam
&
conv_param
);
#endif
#ifdef FUSION_CONVBNRELU_OP
class
FusionConvBNReluParam
:
public
OpParam
{
public:
FusionConvBNReluParam
(
const
VariableNameMap
&
inputs
,
const
VariableNameMap
&
outputs
,
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
filter_
=
FilterFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_
=
InputFrom
<
LoDTensor
>
(
inputs
,
scope
);
output_
=
OutFrom
<
LoDTensor
>
(
outputs
,
scope
);
strides_
=
GetAttr
<
vector
<
int
>>
(
"strides"
,
attrs
);
paddings_
=
GetAttr
<
vector
<
int
>>
(
"paddings"
,
attrs
);
dilations_
=
GetAttr
<
vector
<
int
>>
(
"dilations"
,
attrs
);
groups
=
GetAttr
<
int
>
(
"groups"
,
attrs
);
input_bias_
=
InputBiasFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_mean_
=
InputMeanFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_scale_
=
InputScaleFrom
<
LoDTensor
>
(
inputs
,
scope
);
input_variance_
=
InputVarianceFrom
<
LoDTensor
>
(
inputs
,
scope
);
epsilon_
=
GetAttr
<
float
>
(
"epsilon"
,
attrs
);
momentum_
=
GetAttr
<
float
>
(
"momentum"
,
attrs
);
// is_test_ = GetAttr<bool>("is_test", attrs);
}
const
Tensor
*
Input
()
const
{
return
input_
;
}
const
Tensor
*
Filter
()
const
{
return
filter_
;
}
Tensor
*
Output
()
const
{
return
output_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
const
vector
<
int
>
&
Paddings
()
const
{
return
paddings_
;
}
const
vector
<
int
>
&
Dilations
()
const
{
return
dilations_
;
}
const
int
&
Groups
()
const
{
return
groups
;
}
const
Tensor
*
InputBias
()
const
{
return
input_bias_
;
}
const
Tensor
*
InputMean
()
const
{
return
input_mean_
;
}
const
Tensor
*
InputScale
()
const
{
return
input_scale_
;
}
const
Tensor
*
InputVariance
()
const
{
return
input_variance_
;
}
const
float
&
Epsilon
()
const
{
return
epsilon_
;
}
const
float
&
Momentum
()
const
{
return
momentum_
;
}
const
bool
&
IsTest
()
const
{
return
is_test_
;
}
void
SetNewScale
(
Tensor
*
new_scale
)
{
new_scale_
=
new_scale
;
}
void
SetNewBias
(
Tensor
*
new_bias
)
{
new_bias_
=
new_bias
;
}
const
Tensor
*
NewScale
()
const
{
return
new_scale_
;
}
const
Tensor
*
NewBias
()
const
{
return
new_bias_
;
}
protected:
Tensor
*
input_
;
Tensor
*
output_
;
Tensor
*
filter_
;
vector
<
int
>
strides_
;
vector
<
int
>
paddings_
;
vector
<
int
>
dilations_
;
int
groups
;
Tensor
*
input_bias_
;
Tensor
*
input_mean_
;
Tensor
*
input_scale_
;
Tensor
*
input_variance_
;
float
epsilon_
;
float
momentum_
;
bool
is_test_
;
Tensor
*
new_bias_
;
Tensor
*
new_scale_
;
};
#endif
#ifdef IM2SEQUENCE_OP
class
Im2SequenceParam
:
public
OpParam
{
public:
...
...
src/operators/pool_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -54,7 +54,7 @@ void PoolOp<DeviceType, T>::InferShape() const {
}
this
->
param_
.
Output
()
->
Resize
(
framework
::
make_ddim
(
output_shape
));
}
template
class
PoolOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/prelu_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -23,7 +23,7 @@ void PReluOp<Dtype, T>::InferShape() const {
auto
input_dims
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
input_dims
);
}
template
class
PReluOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/prior_box_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -44,7 +44,7 @@ void PriorBoxOp<Dtype, T>::InferShape() const {
this
->
param_
.
OutputBoxes
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
this
->
param_
.
OutputVariances
()
->
Resize
(
framework
::
make_ddim
(
dim_vec
));
}
template
class
PriorBoxOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/relu_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -23,7 +23,7 @@ void ReluOp<Dtype, T>::InferShape() const {
auto
input_dims
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
input_dims
);
}
template
class
ReluOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/reshape_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -27,7 +27,7 @@ void ReshapeOp<Dtype, T>::InferShape() const {
auto
out_dims
=
ValidateShape
(
shape
,
input_x_dims
);
this
->
param_
.
Out
()
->
Resize
(
out_dims
);
}
template
class
ReshapeOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/resize_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -24,7 +24,7 @@ void ResizeOp<Dtype, T>::InferShape() const {
auto
out_dims
=
CalOutputShape
(
this
->
param_
);
this
->
param_
.
Out
()
->
Resize
(
out_dims
);
}
template
class
ResizeOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/scale_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -24,7 +24,7 @@ void ScaleOp<Dtype, T>::InferShape() const {
auto
input_dims
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
input_dims
);
}
template
class
ScaleOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/sigmoid_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -22,7 +22,7 @@ template <typename DeviceType, typename T>
void
SigmoidOp
<
DeviceType
,
T
>::
InferShape
()
const
{
this
->
param_
.
Out
()
->
Resize
(
this
->
param_
.
InputX
()
->
dims
());
}
template
class
SigmoidOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/slice_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -23,7 +23,7 @@ template <typename Dtype, typename T>
void
SliceOp
<
Dtype
,
T
>::
InferShape
()
const
{
/// todo: add InputShape() detection.
}
template
class
SliceOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/softmax_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -22,7 +22,7 @@ template <typename DeviceType, typename T>
void
SoftmaxOp
<
DeviceType
,
T
>::
InferShape
()
const
{
this
->
param_
.
Out
()
->
Resize
(
this
->
param_
.
InputX
()
->
dims
());
}
template
class
SoftmaxOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/transpose_op.cpp
浏览文件 @
7b3df2f0
...
...
@@ -47,7 +47,7 @@ void TransposeOp<Dtype, T>::InferShape() const {
}
this
->
param_
.
Out
()
->
Resize
(
out_dims
);
}
template
class
TransposeOp
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
test/framework/test_load.cpp
浏览文件 @
7b3df2f0
...
...
@@ -19,7 +19,9 @@ int main() {
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
>
loader
;
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
auto
program
=
loader
.
Load
(
g_googlenet
,
true
);
// auto program = loader.Load(g_googlenet, true);
auto
program
=
loader
.
Load
(
g_mobilenet_ssd
,
true
);
// auto program = loader.Load(g_googlenet_combine + "/model",
// g_googlenet_combine +
// "/params", true);
...
...
test/net/test_googlenet.cpp
浏览文件 @
7b3df2f0
...
...
@@ -23,7 +23,7 @@ int main() {
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
auto
time2
=
time
();
DLOG
<<
"load cost
:
"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
;
DLOG
<<
"load cost
:
"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
;
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224
,
&
input
,
dims
);
...
...
test/net/test_mobilenet+ssd.cpp
浏览文件 @
7b3df2f0
...
...
@@ -12,16 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <
f
stream>
#include <
io
stream>
#include "../test_helper.h"
#include "../test_include.h"
int
main
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
4
);
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_mobilenet_ssd
,
true
))
{
auto
isok
=
paddle_mobile
.
Load
(
g_mobilenet_ssd_gesture
+
"/model"
,
g_mobilenet_ssd_gesture
+
"/params"
,
true
);
// auto isok = paddle_mobile.Load(g_mobilenet_ssd, false);
if
(
isok
)
{
auto
time2
=
time
();
DLOG
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
;
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
300
,
300
};
Tensor
input_tensor
;
...
...
@@ -33,7 +37,8 @@ int main() {
auto
time3
=
time
();
paddle_mobile
.
Predict
(
input
,
dims
);
auto
time4
=
time
();
DLOG
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
<<
"ms"
;
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
<<
"ms"
<<
std
::
endl
;
}
return
0
;
}
test/net/test_mobilenet.cpp
浏览文件 @
7b3df2f0
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <
f
stream>
#include <
io
stream>
#include "../test_helper.h"
#include "../test_include.h"
...
...
@@ -22,7 +22,7 @@ int main() {
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_mobilenet
,
true
))
{
auto
time2
=
time
();
DLOG
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
;
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
Tensor
input_tensor
;
...
...
@@ -35,7 +35,8 @@ int main() {
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
auto
time4
=
time
();
DLOG
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
<<
"ms"
;
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
<<
"ms"
<<
std
::
endl
;
}
return
0
;
...
...
test/test_helper.h
浏览文件 @
7b3df2f0
...
...
@@ -16,6 +16,8 @@ limitations under the License. */
#include <fstream>
#include <random>
#include <string>
#include <vector>
#include "common/common.h"
#include "common/log.h"
...
...
@@ -23,6 +25,8 @@ limitations under the License. */
#include "framework/tensor.h"
static
const
std
::
string
g_mobilenet_ssd
=
"../models/mobilenet+ssd"
;
static
const
std
::
string
g_mobilenet_ssd_gesture
=
"../models/mobilenet+ssd_gesture"
;
static
const
std
::
string
g_squeezenet
=
"../models/squeezenet"
;
static
const
std
::
string
g_googlenet
=
"../models/googlenet"
;
static
const
std
::
string
g_mobilenet
=
"../models/mobilenet"
;
...
...
@@ -62,9 +66,9 @@ void GetInput(const std::string &input_name, std::vector<T> *input,
size
*=
dim
;
}
T
*
input_ptr
=
(
T
*
)
malloc
(
sizeof
(
T
)
*
size
);
T
*
input_ptr
=
reinterpret_cast
<
T
*>
(
malloc
(
sizeof
(
T
)
*
size
)
);
std
::
ifstream
in
(
input_name
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
in
.
read
(
(
char
*
)
(
input_ptr
),
size
*
sizeof
(
T
));
in
.
read
(
reinterpret_cast
<
char
*>
(
input_ptr
),
size
*
sizeof
(
T
));
in
.
close
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
input
->
push_back
(
input_ptr
[
i
]);
...
...
@@ -79,6 +83,6 @@ void GetInput(const std::string &input_name,
T
*
input_ptr
=
input
->
mutable_data
<
T
>
(
dims
);
std
::
ifstream
in
(
input_name
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
in
.
read
(
(
char
*
)
(
input_ptr
),
input
->
numel
()
*
sizeof
(
T
));
in
.
read
(
reinterpret_cast
<
char
*>
(
input_ptr
),
input
->
numel
()
*
sizeof
(
T
));
in
.
close
();
}
tools/build.sh
浏览文件 @
7b3df2f0
...
...
@@ -38,7 +38,8 @@ build_for_android() {
fi
if
[
-z
"
$PLATFORM
"
]
;
then
PLATFORM
=
"arm-v7a"
# Users could choose "arm-v8a" or other platforms from the command line.
PLATFORM
=
"arm-v7a"
# Users could choose "arm-v8a" platform.
# PLATFORM="arm-v8a"
fi
if
[
"
${
PLATFORM
}
"
=
"arm-v7a"
]
;
then
...
...
@@ -92,23 +93,28 @@ build_for_ios() {
# rm -rf "../build"
PLATFORM
=
"ios"
MODE
=
"Release"
BUILD_DIR
=
../build/release/
"
${
PLATFORM
}
"
# IOS_ARCH="armv7"
# IOS_ARCH="armv7s"
IOS_ARCH
=
"arm64"
# Users could choose "armv7" or "armv7s" platforms.
BUILD_DIR
=
../build/release/
"
${
PLATFORM
}
"
/
"
${
IOS_ARCH
}
"
TOOLCHAIN_FILE
=
"./tools/ios-cmake/ios.toolchain.cmake"
mkdir
-p
"
${
BUILD_DIR
}
"
if
[
$#
-eq
1
]
;
then
cmake ..
\
-B
"
${
BUILD_DIR
}
"
\
-DCMAKE_BUILD_TYPE
=
"
${
MODE
}
"
\
-DCMAKE_TOOLCHAIN_FILE
=
"
${
TOOLCHAIN_FILE
}
"
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"
${
IOS_ARCH
}
"
\
-DCMAKE_TOOLCHAIN_FILE
=
"
${
TOOLCHAIN_FILE
}
"
\
-DNET
=
$1
\
-DIS_IOS
=
"true"
else
cmake ..
\
-B
"
${
BUILD_DIR
}
"
\
-DCMAKE_BUILD_TYPE
=
"
${
MODE
}
"
\
-DCMAKE_TOOLCHAIN_FILE
=
"
${
TOOLCHAIN_FILE
}
"
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"
${
IOS_ARCH
}
"
\
-DCMAKE_TOOLCHAIN_FILE
=
"
${
TOOLCHAIN_FILE
}
"
\
-DIS_IOS
=
"true"
fi
cd
"
${
BUILD_DIR
}
"
...
...
tools/ios-cmake/ios.toolchain.cmake
浏览文件 @
7b3df2f0
...
...
@@ -159,7 +159,6 @@ set (CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS su
# set the architecture for iOS
if
(
${
IOS_PLATFORM
}
STREQUAL
"OS"
)
set
(
IOS_ARCH armv7 armv7s arm64
)
elseif
(
${
IOS_PLATFORM
}
STREQUAL
"SIMULATOR"
)
set
(
IOS_ARCH i386
)
elseif
(
${
IOS_PLATFORM
}
STREQUAL
"SIMULATOR64"
)
...
...
tools/op.cmake
浏览文件 @
7b3df2f0
...
...
@@ -42,6 +42,16 @@ elseif (NET STREQUAL "resnet")
set
(
MUL_OP ON
)
set
(
POOL_OP ON
)
set
(
RELU_OP ON
)
elseif
(
NET STREQUAL
"FPGAnets"
)
set
(
FUSION_CONVRELU_OP ON
)
set
(
FUSION_CONVBNSCALE_OP ON
)
set
(
FUSION_CONVBNSCALERELU_OP ON
)
set
(
FUSION_POOLBN_OP ON
)
set
(
FUSION_ELEMENTWISEADDRELU_OP ON
)
set
(
REGION_OP ON
)
set
(
POOL_OP ON
)
set
(
CONCAT_OP ON
)
set
(
SOFTMAX_OP ON
)
else
()
set
(
BATCHNORM_OP ON
)
set
(
BOXCODER_OP ON
)
...
...
@@ -64,6 +74,8 @@ else ()
set
(
TRANSPOSE_OP ON
)
set
(
FUSION_CONVADD_RELU_OP ON
)
set
(
FUSION_CONVADDBNRELU_OP ON
)
set
(
FUSION_DWCONVBNRELU_OP ON
)
set
(
FUSION_CONVBNRELU_OP ON
)
set
(
PRELU_OP ON
)
set
(
RESIZE_OP ON
)
set
(
SCALE_OP ON
)
...
...
@@ -155,6 +167,14 @@ endif()
if
(
FUSION_CONVADDBNRELU_OP
)
add_definitions
(
-DFUSION_CONVADDBNRELU_OP
)
endif
()
if
(
FUSION_DWCONVBNRELU_OP
)
add_definitions
(
-DFUSION_DWCONVBNRELU_OP
)
endif
()
if
(
FUSION_CONVBNRELU_OP
)
add_definitions
(
-DFUSION_CONVBNRELU_OP
)
endif
()
if
(
PRELU_OP
)
add_definitions
(
-DPRELU_OP
)
endif
()
...
...
@@ -173,3 +193,23 @@ endif()
if
(
IM2SEQUENCE_OP
)
add_definitions
(
-DIM2SEQUENCE_OP
)
endif
()
if
(
FUSION_CONVRELU_OP
)
add_definitions
(
-DFUSION_CONVRELU_OP
)
endif
()
if
(
FUSION_CONVBNSCALE_OP
)
add_definitions
(
-DFUSION_CONVBNSCALE_OP
)
endif
()
if
(
FUSION_CONVBNSCALERELU_OP
)
add_definitions
(
-DFUSION_CONVBNSCALERELU_OP
)
endif
()
if
(
FUSION_POOLBN_OP
)
add_definitions
(
-DFUSION_POOLBN_OP
)
endif
()
if
(
FUSION_ELEMENTWISEADDRELU_OP
)
add_definitions
(
-DFUSION_ELEMENTWISEADDRELU_OP
)
endif
()
if
(
REGION_OP
)
add_definitions
(
-DREGION_OP
)
endif
()
tools/quantification/CMakeLists.txt
0 → 100644
浏览文件 @
7b3df2f0
set
(
dir
${
CMAKE_CURRENT_SOURCE_DIR
}
)
set
(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
"
${
dir
}
/build"
)
ADD_EXECUTABLE
(
convert convert.cpp
)
target_link_libraries
(
convert paddle-mobile
)
\ No newline at end of file
tools/quantification/convert.cpp
0 → 100644
浏览文件 @
7b3df2f0
#include "io/paddle_mobile.h"
#include <cstdlib>
using
std
::
string
;
static
const
std
::
string
g_googlenet_combine
=
"../models/googlenet_combine"
;
static
const
std
::
string
g_googlenet
=
"../models/googlenet"
;
using
paddle_mobile
::
Executor
;
using
paddle_mobile
::
framework
::
Program
;
char
*
Get_binary_data
(
std
::
string
filename
)
{
FILE
*
file
=
fopen
(
filename
.
c_str
(),
"rb"
);
PADDLE_MOBILE_ENFORCE
(
file
!=
nullptr
,
"can't open file: %s "
,
filename
.
c_str
());
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
PADDLE_MOBILE_ENFORCE
(
size
>
0
,
"size is too small"
);
rewind
(
file
);
char
*
data
=
new
char
[
size
];
size_t
bytes_read
=
fread
(
data
,
1
,
size
,
file
);
PADDLE_MOBILE_ENFORCE
(
bytes_read
==
size
,
"read binary file bytes do not match with fseek"
);
DLOG
<<
"Get_binary_data end"
;
fclose
(
file
);
return
data
;
}
void
LoadWithDump
(
const
paddle_mobile
::
framework
::
VarDesc
var_desc
,
paddle_mobile
::
framework
::
LoDTensor
*
tensor
,
char
**
data
,
FILE
*
out_file
)
{
// 1. version
uint32_t
version
=
*
reinterpret_cast
<
uint32_t
*>
(
*
data
);
// write version
fwrite
(
&
version
,
sizeof
(
uint32_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
uint32_t
);
// 2 Lod information
uint64_t
*
lod_level_ptr
=
new
uint64_t
();
memcpy
(
lod_level_ptr
,
(
*
data
),
sizeof
(
uint64_t
));
uint64_t
lod_level
=
0
;
// write lod Information
fwrite
(
&
lod_level
,
sizeof
(
uint64_t
),
1
,
out_file
);
delete
lod_level_ptr
;
(
*
data
)
+=
sizeof
(
uint64_t
);
auto
&
lod
=
*
tensor
->
mutable_lod
();
lod
.
resize
(
lod_level
);
for
(
uint64_t
i
=
0
;
i
<
lod_level
;
++
i
)
{
uint64_t
size
=
*
reinterpret_cast
<
uint64_t
*>
(
*
data
);
// write lod size
fwrite
(
&
size
,
sizeof
(
uint64_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
uint64_t
);
std
::
vector
<
size_t
>
tmp
(
size
/
sizeof
(
size_t
));
for
(
int
k
=
0
;
k
<
tmp
.
size
();
++
k
)
{
tmp
[
k
]
=
*
reinterpret_cast
<
size_t
*>
(
*
data
);
(
*
data
)
+=
sizeof
(
size_t
);
}
// write lod size vector
fwrite
(
&
tmp
,
sizeof
(
size_t
),
tmp
.
size
(),
out_file
);
lod
[
i
]
=
tmp
;
}
// 3. tensor version
uint32_t
tensor_version
=
*
reinterpret_cast
<
uint32_t
*>
(
*
data
);
// write tensor version
fwrite
(
&
tensor_version
,
sizeof
(
uint32_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
uint32_t
);
// 4. tensor desc
int32_t
size
=
*
reinterpret_cast
<
int32_t
*>
(
*
data
);
// write tensor desc
fwrite
(
&
size
,
sizeof
(
int32_t
),
1
,
out_file
);
(
*
data
)
+=
sizeof
(
int32_t
);
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
for
(
int
m
=
0
;
m
<
size
;
++
m
)
{
buf
.
get
()[
m
]
=
(
*
data
)[
m
];
}
fwrite
(
buf
.
get
(),
sizeof
(
char
),
size
,
out_file
);
(
*
data
)
+=
(
sizeof
(
char
)
*
size
);
const
paddle_mobile
::
framework
::
TensorDesc
&
desc
=
var_desc
.
Tensor_desc
();
int
memory_size
=
1
;
for
(
auto
l
:
desc
.
Dims
())
{
memory_size
*=
l
;
}
tensor
->
Resize
(
paddle_mobile
::
framework
::
make_ddim
(
desc
.
Dims
()));
void
*
memory
=
tensor
;
int
type_size
=
0
;
switch
(
desc
.
DataType
())
{
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_FP16
:
type_size
=
2
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_FP32
:
type_size
=
4
;
memory
=
tensor
->
mutable_data
<
float
>
();
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_FP64
:
type_size
=
8
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_INT32
:
type_size
=
4
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_INT64
:
type_size
=
8
;
break
;
case
paddle_mobile
::
framework
::
VARTYPE_TYPE_BOOL
:
type_size
=
1
;
break
;
default:
break
;
}
for
(
int
n
=
0
;
n
<
memory_size
*
type_size
;
++
n
)
{
static_cast
<
char
*>
(
memory
)[
n
]
=
(
*
data
)[
n
];
}
(
*
data
)
+=
(
sizeof
(
char
)
*
memory_size
*
type_size
);
// for float 32
float
min_value
=
std
::
numeric_limits
<
float
>::
max
();
float
max_value
=
std
::
numeric_limits
<
float
>::
min
();
for
(
int
k
=
0
;
k
<
memory_size
;
++
k
)
{
min_value
=
std
::
min
(
min_value
,
static_cast
<
float
*>
(
memory
)[
k
]);
max_value
=
std
::
max
(
max_value
,
static_cast
<
float
*>
(
memory
)[
k
]);
}
fwrite
(
&
min_value
,
sizeof
(
float
),
1
,
out_file
);
fwrite
(
&
max_value
,
sizeof
(
float
),
1
,
out_file
);
for
(
int
g
=
0
;
g
<
memory_size
;
++
g
)
{
float
value
=
static_cast
<
float
*>
(
memory
)[
g
];
uint8_t
factor
=
(
uint8_t
)
round
((
value
-
min_value
)
/
(
max_value
-
min_value
)
*
255
);
fwrite
(
&
factor
,
sizeof
(
uint8_t
),
1
,
out_file
);
}
}
void
quantificate_combined
(
std
::
string
model_path
,
std
::
string
param_path
,
std
::
string
param_min_path
){
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
loader
;
bool
optimize
=
true
;
auto
program
=
loader
.
Load
(
model_path
,
param_path
,
optimize
);
char
*
origin_data
=
Get_binary_data
(
program
.
para_path
);
char
*
data
=
origin_data
;
FILE
*
out_file
=
fopen
(
param_min_path
.
c_str
(),
"wb"
);
for
(
const
auto
&
block
:
program
.
originProgram
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
auto
tensor
=
var
->
template
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>();
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
}
LoadWithDump
(
*
var_desc
,
tensor
,
&
data
,
out_file
);
}
}
}
fclose
(
out_file
);
delete
origin_data
;
}
void
quantificate_seperated
(
std
::
string
model_dir
,
std
::
string
param_min_path
)
{
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
,
paddle_mobile
::
Precision
::
FP32
>
loader
;
bool
optimize
=
true
;
auto
program
=
loader
.
Load
(
model_dir
,
optimize
);
std
::
string
shell_command
=
"mkdir "
+
param_min_path
;
system
(
shell_command
.
c_str
());
for
(
const
auto
&
block
:
program
.
originProgram
->
Blocks
())
{
for
(
const
auto
&
var_desc
:
block
->
Vars
())
{
auto
var
=
program
.
scope
->
Var
(
var_desc
->
Name
());
if
(
var_desc
->
Persistable
())
{
auto
tensor
=
var
->
template
GetMutable
<
paddle_mobile
::
framework
::
LoDTensor
>();
if
(
var_desc
->
Name
()
==
"feed"
||
var_desc
->
Name
()
==
"fetch"
)
{
continue
;
}
std
::
string
file_name
=
param_min_path
+
"/"
+
var_desc
->
Name
();
FILE
*
out_file
=
fopen
(
file_name
.
c_str
(),
"wb"
);
char
*
origin_data
=
Get_binary_data
(
program
.
model_path
+
"/"
+
var_desc
->
Name
());
char
*
data
=
origin_data
;
LoadWithDump
(
*
var_desc
,
tensor
,
&
data
,
out_file
);
delete
origin_data
;
fclose
(
out_file
);
}
}
}
}
int
main
()
{
std
::
string
filename
=
"params_min"
;
std
::
string
model_path
=
g_googlenet_combine
+
"/model"
;
std
::
string
param_path
=
g_googlenet_combine
+
"/params"
;
std
::
string
dirname
=
"param_min_dir"
;
std
::
string
model_dir
=
g_googlenet
;
// quantificate_combined(model_path, param_path,filename);
quantificate_seperated
(
model_dir
,
dirname
);
return
0
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录