Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
19308114
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
19308114
编写于
12月 29, 2018
作者:
R
Ray Liu
提交者:
GitHub
12月 29, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1363 from codeWorm2015/develop
modify ios interface
上级
ded5e27d
b92aa4b6
变更
16
展开全部
隐藏空白更改
内联
并排
Showing
16 changed file
with
407 addition
and
1160 deletion
+407
-1160
metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
...-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
+61
-1003
metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
...addle-mobile-demo/paddle-mobile-demo/ViewController.swift
+6
-8
metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
+6
-6
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal
...e/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal
+4
-4
src/io/ios_io/PaddleMobileCPU.h
src/io/ios_io/PaddleMobileCPU.h
+91
-31
src/io/ios_io/PaddleMobileCPU.mm
src/io/ios_io/PaddleMobileCPU.mm
+44
-23
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+6
-9
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+3
-1
src/operators/kernel/arm/conv_kernel.cpp
src/operators/kernel/arm/conv_kernel.cpp
+3
-3
src/operators/kernel/central-arm-func/conv_add_arm_func.h
src/operators/kernel/central-arm-func/conv_add_arm_func.h
+3
-3
src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
...perators/kernel/central-arm-func/conv_add_relu_arm_func.h
+30
-1
src/operators/math/depthwise_conv3x3.cpp
src/operators/math/depthwise_conv3x3.cpp
+126
-63
src/operators/math/depthwise_conv3x3.h
src/operators/math/depthwise_conv3x3.h
+3
-3
test/CMakeLists.txt
test/CMakeLists.txt
+9
-0
tools/build.sh
tools/build.sh
+2
-2
tools/op.cmake
tools/op.cmake
+10
-0
未找到文件。
metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
浏览文件 @
19308114
此差异已折叠。
点击以展开。
metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
浏览文件 @
19308114
...
...
@@ -175,19 +175,17 @@ class ViewController: UIViewController {
override
func
viewDidLoad
()
{
super
.
viewDidLoad
()
// if runner.load() {
// print(" load success ! ")
// } else {
// print(" load error ! ")
// }
//
modelPickerView
.
delegate
=
self
modelPickerView
.
dataSource
=
self
threadPickerView
.
delegate
=
self
threadPickerView
.
dataSource
=
self
if
let
image
=
UIImage
.
init
(
named
:
"test.jpg"
)
{
selectImage
=
image
selectImageView
.
image
=
image
}
else
{
print
(
"请添加测试图片"
)
}
selectImage
=
UIImage
.
init
(
named
:
"hand.jpg"
)
selectImageView
.
image
=
selectImage
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
...
...
metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
浏览文件 @
19308114
...
...
@@ -902,8 +902,8 @@
baseConfigurationReference
=
CDF58151D902A1CBAE56A0C2
/* Pods-paddle-mobile.debug.xcconfig */
;
buildSettings
=
{
CLANG_ENABLE_MODULES
=
YES
;
CODE_SIGN_IDENTITY
=
""
;
CODE_SIGN_STYLE
=
Manual
;
CODE_SIGN_IDENTITY
=
"
iPhone Developer
"
;
CODE_SIGN_STYLE
=
Automatic
;
DEFINES_MODULE
=
YES
;
DEVELOPMENT_TEAM
=
""
;
DYLIB_COMPATIBILITY_VERSION
=
1
;
...
...
@@ -922,7 +922,7 @@
"$(inherited)"
,
"$(PROJECT_DIR)/paddle-mobile/CPU"
,
);
MACH_O_TYPE
=
static
lib
;
MACH_O_TYPE
=
mh_dy
lib
;
MTL_LANGUAGE_REVISION
=
UseDeploymentTarget
;
PRODUCT_BUNDLE_IDENTIFIER
=
"orange.paddle-mobile"
;
PRODUCT_NAME
=
"$(TARGET_NAME:c99extidentifier)"
;
...
...
@@ -939,8 +939,8 @@
baseConfigurationReference
=
E2A7957C92EDA5C3BEC0FFC2
/* Pods-paddle-mobile.release.xcconfig */
;
buildSettings
=
{
CLANG_ENABLE_MODULES
=
YES
;
CODE_SIGN_IDENTITY
=
""
;
CODE_SIGN_STYLE
=
Manual
;
CODE_SIGN_IDENTITY
=
"
iPhone Developer
"
;
CODE_SIGN_STYLE
=
Automatic
;
DEFINES_MODULE
=
YES
;
DEVELOPMENT_TEAM
=
""
;
DYLIB_COMPATIBILITY_VERSION
=
1
;
...
...
@@ -959,7 +959,7 @@
"$(inherited)"
,
"$(PROJECT_DIR)/paddle-mobile/CPU"
,
);
MACH_O_TYPE
=
static
lib
;
MACH_O_TYPE
=
mh_dy
lib
;
MTL_LANGUAGE_REVISION
=
UseDeploymentTarget
;
PRODUCT_BUNDLE_IDENTIFIER
=
"orange.paddle-mobile"
;
PRODUCT_NAME
=
"$(TARGET_NAME:c99extidentifier)"
;
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal
浏览文件 @
19308114
...
...
@@ -31,8 +31,8 @@ kernel void fetch(texture2d_array<float, access::read> inTexture [[texture(0)]],
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
//
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
//
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
...
...
@@ -52,8 +52,8 @@ kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
//
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
//
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
...
...
src/io/ios_io/PaddleMobileCPU.h
浏览文件 @
19308114
...
...
@@ -27,59 +27,119 @@
@end
@interface
PaddleMobileCPUConfig
:
NSObject
/**
@b 默认为 1, 多线程时, 建议设置为 2
*/
@property
(
assign
,
nonatomic
)
int
threadNum
;
/**
@b 是否开启运行时 infershape
*/
@property
(
assign
,
nonatomic
)
BOOL
loddable
;
/**
@b 是否开启模型 op 融合优化
*/
@property
(
assign
,
nonatomic
)
BOOL
optimize
;
@end
@interface
PaddleMobileCPU
:
NSObject
/*
创建对象
*/
-
(
instancetype
)
init
;
/**
@b 创建对象
/*
load 模型, 开辟内存
*/
-
(
BOOL
)
load
:(
NSString
*
)
modelPath
andWeightsPath
:(
NSString
*
)
weighsPath
;
@param config 配置
@return paddlemobile CPU 对象
*/
-
(
instancetype
)
initWithConfig
:(
PaddleMobileCPUConfig
*
)
config
;
/*
加载散开形式的模型, 需传入模型的目录
*/
/**
@b 加载模型
@param modelPath 模型路径
@param weighsPath 权重路径
@return 是否加载成功
*/
-
(
BOOL
)
loadModel
:(
NSString
*
)
modelPath
andWeightsPath
:(
NSString
*
)
weighsPath
;
/**
@b 加载散开形式的模型, 需传入模型的目录
@param modelAndWeightPath 模型和权重的路径
@return 是否加载成功
*/
-
(
BOOL
)
load
:(
NSString
*
)
modelAndWeightPath
;
/*
* 从内存中加载模型
* */
/**
@b 从内存中加载模型
@param modelLen 模型大小(字节数)
@param modelBuf 模型在内存中的位置
@param combinedParamsLen 权重大小(字节数)
@param combinedParamsBuf 权重在内存中的位置
@return 是否加载成功
*/
-
(
BOOL
)
LoadCombinedMemory
:(
size_t
)
modelLen
andModelBuf
:(
const
uint8_t
*
)
modelBuf
andModelParamsLen
:(
size_t
)
combinedParamsLen
andCombinedParamsBuf
:(
const
uint8_t
*
)
combinedParamsBuf
;
/*
*
对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
*
* */
/**
@b 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存, 每一个像素经过这样的预处理 (x + means) * scale, 其中 x 为像素值
@param image 输入的图像
@param output 预处理后的输出
@param means 预处理中 means
@param scale 预处理中的 scale
@param dim 预处理后的维度
*/
-
(
void
)
preprocess
:(
CGImageRef
)
image
output
:(
float
*
)
output
means
:(
NSArray
<
NSNumber
*>
*
)
means
scale
:(
float
)
scale
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
;
/*
* 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
* */
/**
进行预测
@param input 输入
@param dim 输入维度
@return 输出结果
*/
-
(
PaddleMobileCPUResult
*
)
predictInput
:(
float
*
)
input
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
-
(
NSArray
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
means
:(
NSArray
<
NSNumber
*>
*
)
means
scale
:(
float
)
scale
;
/*
进行预测, 默认 means 为 0, scale 为 1.0
*/
-
(
NSArray
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
;
/*
清理内存
*/
/**
@b 进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict, 每一个像素经过这样的预处理 (x + means) * scale, 其中 x 为像素值
@param image 输入图像
@param dim 输入维度
@param means 预处理中 means
@param scale 预处理中 scale
@return 预测结果
*/
-
(
PaddleMobileCPUResult
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
means
:(
NSArray
<
NSNumber
*>
*
)
means
scale
:(
float
)
scale
;
/**
进行预测, 预处理 means 值为 0, scale 值为 1
@param image 输入图像
@param dim 输入维度
@return 预测结果
*/
-
(
PaddleMobileCPUResult
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
;
/**
@b 清理内存
*/
-
(
void
)
clear
;
@end
src/io/ios_io/PaddleMobileCPU.mm
浏览文件 @
19308114
...
...
@@ -45,21 +45,44 @@
@end
@implementation
PaddleMobileCPUConfig
-
(
instancetype
)
init
{
if
(
self
=
[
super
init
])
{
self
.
threadNum
=
1
;
self
.
optimize
=
YES
;
}
return
self
;
}
@end
@interface
PaddleMobileCPU
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
float
>
*
pam_
;
BOOL
loaded_
;
}
@property
(
strong
,
nonatomic
)
PaddleMobileCPUConfig
*
config
;
@end
@implementation
PaddleMobileCPU
static
std
::
mutex
shared_mutex
;
-
(
instancetype
)
init
{
-
(
instancetype
)
init
WithConfig
:(
PaddleMobileCPUConfig
*
)
config
{
if
(
self
=
[
super
init
])
{
pam_
=
new
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
float
>
();
_config
=
config
;
}
return
self
;
}
-
(
instancetype
)
init
{
if
(
self
=
[
super
init
])
{
_config
=
[[
PaddleMobileCPUConfig
alloc
]
init
];
pam_
=
new
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
,
float
>
();
}
return
self
;
}
...
...
@@ -79,11 +102,11 @@ static std::mutex shared_mutex;
return
sharedManager
;
}
-
(
BOOL
)
load
:(
NSString
*
)
modelPath
andWeightsPath
:(
NSString
*
)
weighsPath
{
-
(
BOOL
)
load
Model
:(
NSString
*
)
modelPath
andWeightsPath
:(
NSString
*
)
weighsPath
{
std
::
string
model_path_str
=
std
::
string
([
modelPath
UTF8String
]);
std
::
string
weights_path_str
=
std
::
string
([
weighsPath
UTF8String
]);
pam_
->
SetThreadNum
(
2
);
if
(
loaded_
=
pam_
->
Load
(
model_path_str
,
weights_path_str
,
tru
e
))
{
pam_
->
SetThreadNum
(
self
.
config
.
threadNum
);
if
(
loaded_
=
pam_
->
Load
(
model_path_str
,
weights_path_str
,
self
.
config
.
optimize
,
false
,
1
,
self
.
config
.
loddabl
e
))
{
return
YES
;
}
else
{
return
NO
;
...
...
@@ -94,14 +117,14 @@ static std::mutex shared_mutex;
andModelBuf
:(
const
uint8_t
*
)
modelBuf
andModelParamsLen
:(
size_t
)
combinedParamsLen
andCombinedParamsBuf
:(
const
uint8_t
*
)
combinedParamsBuf
{
pam_
->
SetThreadNum
(
2
);
pam_
->
SetThreadNum
(
self
.
config
.
threadNum
);
return
loaded_
=
pam_
->
LoadCombinedMemory
(
modelLen
,
modelBuf
,
combinedParamsLen
,
const_cast
<
uint8_t
*>
(
combinedParamsBuf
));
const_cast
<
uint8_t
*>
(
combinedParamsBuf
)
,
self
.
config
.
optimize
,
false
,
1
,
self
.
config
.
loddable
);
}
-
(
BOOL
)
load
:(
NSString
*
)
modelAndWeightPath
{
std
::
string
model_path_str
=
std
::
string
([
modelAndWeightPath
UTF8String
]);
if
(
loaded_
=
pam_
->
Load
(
model_path_str
))
{
if
(
loaded_
=
pam_
->
Load
(
model_path_str
,
self
.
config
.
optimize
,
false
,
1
,
self
.
config
.
loddable
))
{
return
YES
;
}
else
{
return
NO
;
...
...
@@ -116,6 +139,10 @@ static std::mutex shared_mutex;
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
shared_mutex
);
if
(
means
==
nil
)
{
means
=
@[
@0
,
@0
,
@0
];
}
// dim to c++ vector, get numel
std
::
vector
<
int64_t
>
dim_vec
;
int
numel
=
1
;
...
...
@@ -235,7 +262,7 @@ static std::mutex shared_mutex;
return
cpuResult
;
}
-
(
NSArray
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
means
:(
NSArray
<
NSNumber
*>
*
)
means
scale
:(
float
)
scale
{
-
(
PaddleMobileCPUResult
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
means
:(
NSArray
<
NSNumber
*>
*
)
means
scale
:(
float
)
scale
{
// printf(" predict one ");
std
::
lock_guard
<
std
::
mutex
>
lock
(
shared_mutex
);
if
(
!
loaded_
)
{
...
...
@@ -284,28 +311,22 @@ static std::mutex shared_mutex;
// predict
std
::
vector
<
float
>
cpp_result
=
pam_
->
Predict
(
predict_input
,
dim_vec
);
// result
long
count
=
0
;
count
=
cpp_result
.
size
();
NSMutableArray
*
result
=
[[
NSMutableArray
alloc
]
init
];
for
(
int
i
=
0
;
i
<
count
;
i
++
)
{
[
result
addObject
:[
NSNumber
numberWithFloat
:
cpp_result
[
i
]]];
}
float
*
output_pointer
=
new
float
[
cpp_result
.
size
()];
memcpy
(
output_pointer
,
cpp_result
.
data
(),
cpp_result
.
size
()
*
sizeof
(
float
));
PaddleMobileCPUResult
*
cpuResult
=
[[
PaddleMobileCPUResult
alloc
]
init
];
[
cpuResult
toSetOutput
:
output_pointer
];
[
cpuResult
toSetOutputSize
:
cpp_result
.
size
()];
free
(
output
);
// 待验证
// if ([UIDevice currentDevice].systemVersion.doubleValue < 11.0) {
CFRelease
(
cfData
);
cfData
=
NULL
;
// }
return
r
esult
;
return
cpuR
esult
;
}
-
(
NSArray
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
{
[
self
predict
:
image
dim
:
dim
means
:
nil
scale
:
1
];
-
(
PaddleMobileCPUResult
*
)
predict
:(
CGImageRef
)
image
dim
:(
NSArray
<
NSNumber
*>
*
)
dim
{
return
[
self
predict
:
image
dim
:
dim
means
:
nil
scale
:
1
];
}
-
(
void
)
clear
{
...
...
src/io/paddle_mobile.cpp
浏览文件 @
19308114
...
...
@@ -74,13 +74,10 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
}
template
<
typename
Device
,
typename
T
>
bool
PaddleMobile
<
Device
,
T
>::
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
)
{
int
batch_size
=
1
;
bool
optimise
=
true
;
bool
quantification
=
false
;
bool
PaddleMobile
<
Device
,
T
>::
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
,
int
batch_size
,
bool
loddable
)
{
if
(
loader_
.
get
()
==
nullptr
)
{
loader_
=
std
::
make_shared
<
framework
::
Loader
<
Device
,
T
>>
();
}
else
{
...
...
@@ -89,9 +86,9 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory(size_t model_len,
if
(
executor_
.
get
()
==
nullptr
)
{
executor_
=
std
::
make_shared
<
framework
::
Executor
<
Device
,
T
>>
(
loader_
->
LoadCombinedMemory
(
model_len
,
model_buf
,
combined_params_len
,
combined_params_buf
,
optimi
s
e
,
combined_params_buf
,
optimi
z
e
,
quantification
),
batch_size
,
optimi
s
e
);
batch_size
,
optimi
ze
,
loddabl
e
);
}
else
{
LOG
(
kLOG_INFO
)
<<
"executor inited"
;
}
...
...
src/io/paddle_mobile.h
浏览文件 @
19308114
...
...
@@ -73,7 +73,9 @@ class PaddleMobile {
bool
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
);
uint8_t
*
combined_params_buf
,
bool
optimize
=
false
,
bool
quantification
=
false
,
int
batch_size
=
1
,
bool
loddable
=
false
);
void
SetThreadNum
(
int
count
);
void
Clear
();
...
...
src/operators/kernel/arm/conv_kernel.cpp
浏览文件 @
19308114
...
...
@@ -77,15 +77,15 @@ void ConvKernel<CPU, float>::Compute(const ConvParam<CPU> ¶m) {
break
;
case
ConvParam
<
CPU
>::
EXEC_DEPTHWISE3x3S1P1_FLOAT
:
math
::
DepthwiseConv3x3s1p1
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
nullptr
,
false
);
nullptr
,
false
,
false
);
break
;
case
ConvParam
<
CPU
>::
EXEC_DEPTHWISE3x3S2P1_FLOAT
:
math
::
DepthwiseConv3x3s2p1v2
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
nullptr
,
false
);
param
.
Output
(),
nullptr
,
false
,
false
);
break
;
case
ConvParam
<
CPU
>::
EXEC_DEPTHWISE3x3S2P0_FLOAT
:
math
::
DepthwiseConv3x3s2p0
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
nullptr
,
false
);
nullptr
,
false
,
false
);
break
;
case
ConvParam
<
CPU
>::
EXEC_WINOGRAD3X3_FLOAT
:
WinogradConv3x3
<
8
,
3
>
(
param
);
...
...
src/operators/kernel/central-arm-func/conv_add_arm_func.h
浏览文件 @
19308114
...
...
@@ -122,7 +122,7 @@ void ConvAddCompute(const FusionConvAddParam<CPU> ¶m) {
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
1
)
{
math
::
DepthwiseConv3x3s1p1
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
Bias
(),
true
);
param
.
Bias
(),
true
,
false
);
}
else
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
...
...
@@ -133,10 +133,10 @@ void ConvAddCompute(const FusionConvAddParam<CPU> ¶m) {
// param.Output(), false);
if
(
param
.
Paddings
()[
0
]
==
0
)
{
math
::
DepthwiseConv3x3s2p0
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
Bias
(),
true
);
param
.
Bias
(),
true
,
false
);
}
else
{
math
::
DepthwiseConv3x3s2p1v2
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
Bias
(),
true
);
param
.
Output
(),
param
.
Bias
(),
true
,
false
);
}
}
else
{
ConvAddBasic
(
param
);
...
...
src/operators/kernel/central-arm-func/conv_add_relu_arm_func.h
浏览文件 @
19308114
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP
#pragma once
#include <operators/math/depthwise_conv3x3.h>
#include <vector>
#include "operators/math/conv_func.h"
#include "operators/math/im2col.h"
...
...
@@ -26,7 +27,7 @@ namespace paddle_mobile {
namespace
operators
{
template
<
typename
Itype
,
typename
Otype
>
void
ConvAddRelu
Compute
(
const
FusionConvAddReluParam
<
CPU
>
&
param
)
{
void
ConvAddRelu
Basic
(
const
FusionConvAddReluParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
bias
=
*
param
.
Bias
();
...
...
@@ -118,6 +119,34 @@ void ConvAddReluCompute(const FusionConvAddReluParam<CPU> ¶m) {
}
}
template
<
typename
Itype
,
typename
Otype
>
void
ConvAddReluCompute
(
const
FusionConvAddReluParam
<
CPU
>
&
param
)
{
param
.
Output
()
->
mutable_data
<
float
>
();
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
1
)
{
math
::
DepthwiseConv3x3s1p1
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
Bias
(),
true
,
true
);
}
else
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
3
&&
param
.
Strides
()[
0
]
==
2
)
{
// math::DepthwiseConv3x3(param.Input(), param.Strides(),
// param.Paddings(),
// param.Filter(), param.Bias(),
// param.Output(), false);
if
(
param
.
Paddings
()[
0
]
==
0
)
{
math
::
DepthwiseConv3x3s2p0
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
Bias
(),
true
,
true
);
}
else
{
math
::
DepthwiseConv3x3s2p1v2
(
param
.
Input
(),
param
.
Filter
(),
param
.
Output
(),
param
.
Bias
(),
true
,
true
);
}
}
else
{
ConvAddReluBasic
<
Itype
,
Otype
>
(
param
);
}
}
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/math/depthwise_conv3x3.cpp
浏览文件 @
19308114
...
...
@@ -251,27 +251,31 @@ void DepthwiseConv3x3(const framework::Tensor *input,
void
DepthwiseConv3x3s1p1
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
output
,
framework
::
Tensor
*
bias
,
bool
if_bias
)
{
bool
if_bias
,
bool
if_relu
)
{
#if __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
const
float
*
bias_data
;
if
(
if_bias
)
{
bias_data
=
bias
->
data
<
float
>
();
}
const
int
h
=
static_cast
<
int
>
(
input
->
dims
()[
2
]);
const
int
w
=
static_cast
<
int
>
(
input
->
dims
()[
3
]);
// const int l = h;
const
float
*
bias_data
=
bias
->
data
<
float
>
();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
const
int
c
=
static_cast
<
int
>
(
input
->
dims
()[
1
]);
const
int
h
=
static_cast
<
int
>
(
input
->
dims
()[
2
]);
const
int
w
=
static_cast
<
int
>
(
input
->
dims
()[
3
]);
const
int
hxw
=
h
*
w
;
float32x4_t
vbias
=
vdupq_n_f32
(
0.0
);
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
const
float
*
filter_data_tmp
=
filter_data
;
// const int l = h;
// leftTop, rightTop, leftBottom, rightBottom
const
int
lt
=
0
;
const
int
rt
=
w
-
1
;
const
int
lb
=
(
h
-
1
)
*
w
;
const
int
rb
=
h
*
w
-
1
;
float32x4_t
zero
=
vdupq_n_f32
(
0.0
);
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
#pragma omp parallel for
for
(
int
j
=
0
;
j
<
c
;
++
j
)
{
const
float
*
filter_data_tmp
=
filter
->
data
<
float
>
()
+
j
*
9
;
const
float
*
input_data
=
input
->
data
<
float
>
()
+
j
*
hxw
;
float
*
output_data
=
output
->
mutable_data
<
float
>
()
+
j
*
hxw
;
float32x4_t
vbias
;
if
(
if_bias
)
{
vbias
=
vdupq_n_f32
(
bias_data
[
j
]);
}
...
...
@@ -287,39 +291,51 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
float
w21
=
filter_data_tmp
[
7
];
float
w22
=
filter_data_tmp
[
8
];
output_data
[
0
]
=
w11
*
input_data
[
0
]
+
w12
*
input_data
[
1
]
+
w21
*
input_data
[
w
]
+
w22
*
input_data
[
w
+
1
];
output_data
[
w
-
1
]
=
w10
*
input_data
[
w
-
2
]
+
w11
*
input_data
[
w
-
1
]
+
w20
*
input_data
[
2
*
w
-
2
]
+
w21
*
input_data
[
2
*
w
-
1
];
output_data
[
(
h
-
1
)
*
w
]
=
output_data
[
lt
]
=
w11
*
input_data
[
0
]
+
w12
*
input_data
[
1
]
+
w21
*
input_data
[
w
]
+
w22
*
input_data
[
w
+
1
];
output_data
[
rt
]
=
w10
*
input_data
[
w
-
2
]
+
w11
*
input_data
[
w
-
1
]
+
w20
*
input_data
[
2
*
w
-
2
]
+
w21
*
input_data
[
2
*
w
-
1
];
output_data
[
lb
]
=
w01
*
input_data
[(
h
-
2
)
*
w
]
+
w02
*
input_data
[(
h
-
2
)
*
w
+
1
]
+
w11
*
input_data
[(
h
-
1
)
*
w
]
+
w12
*
input_data
[(
h
-
1
)
*
w
+
1
];
output_data
[
h
*
w
-
1
]
=
output_data
[
rb
]
=
w00
*
input_data
[
h
*
w
-
w
-
2
]
+
w01
*
input_data
[
h
*
w
-
w
-
1
]
+
w10
*
input_data
[
h
*
w
-
2
]
+
w11
*
input_data
[
h
*
w
-
1
];
if
(
if_bias
)
{
output_data
[
0
]
+=
bias_data
[
j
];
output_data
[
w
-
1
]
+=
bias_data
[
j
];
output_data
[(
h
-
1
)
*
w
]
+=
bias_data
[
j
];
output_data
[
h
*
w
-
1
]
+=
bias_data
[
j
];
output_data
[
lt
]
+=
bias_data
[
j
];
output_data
[
rt
]
+=
bias_data
[
j
];
output_data
[
lb
]
+=
bias_data
[
j
];
output_data
[
rb
]
+=
bias_data
[
j
];
}
if
(
if_relu
)
{
output_data
[
lt
]
=
output_data
[
lt
]
<
0
?
0
:
output_data
[
lt
];
output_data
[
rt
]
=
output_data
[
rt
]
<
0
?
0
:
output_data
[
rt
];
output_data
[
lb
]
=
output_data
[
lb
]
<
0
?
0
:
output_data
[
lb
];
output_data
[
rb
]
=
output_data
[
rb
]
<
0
?
0
:
output_data
[
rb
];
}
for
(
int
i
=
1
;
i
<
h
-
1
;
++
i
)
{
output_data
[
i
*
w
]
=
int
left
=
i
*
w
;
int
right
=
i
*
w
+
w
-
1
;
output_data
[
left
]
=
w01
*
input_data
[
i
*
w
-
w
]
+
w02
*
input_data
[
i
*
w
-
w
+
1
]
+
w11
*
input_data
[
i
*
w
]
+
w12
*
input_data
[
i
*
w
+
1
]
+
w21
*
input_data
[
i
*
w
+
w
]
+
w22
*
input_data
[
i
*
w
+
w
+
1
];
output_data
[
i
*
w
+
w
-
1
]
=
w00
*
input_data
[
i
*
w
+
w
-
1
-
w
-
1
]
+
w01
*
input_data
[
i
*
w
+
w
-
1
-
w
]
+
w10
*
input_data
[
i
*
w
+
w
-
1
-
1
]
+
w11
*
input_data
[
i
*
w
+
w
-
1
]
+
w20
*
input_data
[
i
*
w
+
w
-
1
+
w
-
1
]
+
w21
*
input_data
[
i
*
w
+
w
-
1
+
w
];
output_data
[
right
]
=
w00
*
input_data
[
i
*
w
+
w
-
1
-
w
-
1
]
+
w01
*
input_data
[
i
*
w
+
w
-
1
-
w
]
+
w10
*
input_data
[
i
*
w
+
w
-
1
-
1
]
+
w11
*
input_data
[
i
*
w
+
w
-
1
]
+
w20
*
input_data
[
i
*
w
+
w
-
1
+
w
-
1
]
+
w21
*
input_data
[
i
*
w
+
w
-
1
+
w
];
if
(
if_bias
)
{
output_data
[
i
*
w
]
+=
bias_data
[
j
];
output_data
[
i
*
w
+
w
-
1
]
+=
bias_data
[
j
];
output_data
[
left
]
+=
bias_data
[
j
];
output_data
[
right
]
+=
bias_data
[
j
];
}
if
(
if_relu
)
{
output_data
[
left
]
=
output_data
[
left
]
<
0
?
0
:
output_data
[
left
];
output_data
[
right
]
=
output_data
[
right
]
<
0
?
0
:
output_data
[
right
];
}
}
...
...
@@ -352,7 +368,9 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
out0
=
vmlaq_n_f32
(
out0
,
tmp2
,
w21
);
out0
=
vmlaq_n_f32
(
out0
,
tmp3
,
w22
);
out0
=
vaddq_f32
(
out0
,
vbias
);
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
vst1q_f32
(
output_ptr
,
out0
);
in5
=
vld1q_f32
(
input_tmp_end
+
4
);
...
...
@@ -370,7 +388,9 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
out0
=
vmlaq_n_f32
(
out0
,
tmp2
,
w11
);
out0
=
vmlaq_n_f32
(
out0
,
tmp3
,
w12
);
out0
=
vaddq_f32
(
out0
,
vbias
);
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
vst1q_f32
(
output_ptr
+
(
h
-
1
)
*
w
,
out0
);
// can optimize to each 8 stride.
...
...
@@ -399,6 +419,9 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
out0
=
vmlaq_n_f32
(
out0
,
tmp2
,
w21
);
out0
=
vmlaq_n_f32
(
out0
,
tmp3
,
w22
);
out0
=
vaddq_f32
(
out0
,
vbias
);
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
for
(
int
i
=
0
;
i
<
c_mid
;
++
i
)
{
if
(
i
==
0
)
{
...
...
@@ -428,6 +451,9 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
out0
=
vmlaq_n_f32
(
out0
,
tmp2
,
w11
);
out0
=
vmlaq_n_f32
(
out0
,
tmp3
,
w12
);
out0
=
vaddq_f32
(
out0
,
vbias
);
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
for
(
int
i
=
0
;
i
<
c_mid
;
++
i
)
{
if
(
i
==
0
)
{
...
...
@@ -471,6 +497,9 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
out0
=
vmlaq_n_f32
(
out0
,
tmp4
,
w21
);
out0
=
vmlaq_n_f32
(
out0
,
tmp5
,
w22
);
out0
=
vaddq_f32
(
out0
,
vbias
);
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
vst1q_f32
(
output_ptr
,
out0
);
...
...
@@ -502,6 +531,9 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
out0
=
vmlaq_n_f32
(
out0
,
tmp4
,
w21
);
out0
=
vmlaq_n_f32
(
out0
,
tmp5
,
w22
);
out0
=
vaddq_f32
(
out0
,
vbias
);
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
for
(
int
i
=
0
;
i
<
c_mid
;
++
i
)
{
if
(
i
==
0
)
{
...
...
@@ -515,9 +547,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
}
}
}
output_data
+=
hxw
;
input_data
+=
hxw
;
filter_data_tmp
+=
9
;
}
}
#endif
...
...
@@ -1273,7 +1302,7 @@ void DepthwiseConvAddBNRelu3x3s2p1(const framework::Tensor *input,
void
DepthwiseConv3x3s2p1v2
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
output
,
framework
::
Tensor
*
bias
,
bool
if_bias
)
{
bool
if_bias
,
bool
if_relu
)
{
#if __ARM_NEON
const
float
*
input_data
=
input
->
data
<
float
>
();
const
float
*
filter_data
=
filter
->
data
<
float
>
();
...
...
@@ -1361,6 +1390,9 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
res3
=
vaddq_f32
(
vextq_f32
(
elewise_res2
,
zero
,
1
),
vaddq_f32
(
elewise_res0
,
elewise_res1
));
res3
=
vaddq_f32
(
res3
,
vbias
);
if
(
if_relu
)
{
res3
=
vmaxq_f32
(
res3
,
zero
);
}
vst1q_f32
(
output_row_ptr
,
res3
);
input_row_ptr
+=
6
;
...
...
@@ -1395,6 +1427,9 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
res3
=
vaddq_f32
(
vextq_f32
(
elewise_res2
,
zero
,
1
),
vaddq_f32
(
elewise_res0
,
elewise_res1
));
res3
=
vaddq_f32
(
res3
,
vbias
);
if
(
if_relu
)
{
res3
=
vmaxq_f32
(
res3
,
zero
);
}
if
((
w4
!=
w_times
))
{
vst1q_f32
(
output_row_ptr
,
res3
);
...
...
@@ -1410,12 +1445,18 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
output_row_ptr
+=
3
;
}
output_data_tmp
[
0
]
=
input_const
[
0
]
*
w11
+
input_const
[
1
]
*
w12
+
input_const
[
in_w
]
*
w21
+
input_const
[
in_w
+
1
]
*
w22
;
// leftTop, rightTop, leftBottom, rightBottom
int
lt
=
0
;
int
rt
=
out_w
-
1
;
int
lb
=
out_w
*
(
out_h
-
1
);
int
rb
=
out_h
*
out_w
-
1
;
output_data_tmp
[
lt
]
=
input_const
[
0
]
*
w11
+
input_const
[
1
]
*
w12
+
input_const
[
in_w
]
*
w21
+
input_const
[
in_w
+
1
]
*
w22
;
out2in_mid
=
(
out_w
-
1
)
*
2
;
output_data_tmp
[
out_w
-
1
]
=
output_data_tmp
[
rt
]
=
w10
*
input_const
[
out2in_mid
-
1
]
+
w11
*
input_const
[
out2in_mid
]
+
w20
*
input_const
[
out2in_mid
+
in_w
-
1
]
+
w21
*
input_const
[
out2in_mid
+
in_w
]
+
...
...
@@ -1424,7 +1465,7 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
out2in_mid
=
(
out_h
-
1
)
*
2
*
in_w
;
output_data_tmp
[
out_w
*
(
out_h
-
1
)
]
=
output_data_tmp
[
lb
]
=
w01
*
input_const
[
out2in_mid
-
in_w
]
+
w02
*
input_const
[
out2in_mid
-
in_w
+
1
]
+
w11
*
input_const
[
out2in_mid
]
+
w12
*
input_const
[
out2in_mid
+
1
]
+
...
...
@@ -1432,7 +1473,7 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
w22
*
input_const
[
out2in_mid
+
in_w
+
1
]);
out2in_mid
=
(
out_h
-
1
)
*
2
*
in_w
+
(
out_w
-
1
)
*
2
;
output_data_tmp
[
out_h
*
out_w
-
1
]
=
output_data_tmp
[
rb
]
=
w00
*
input_const
[
out2in_mid
-
in_w
-
1
]
+
w01
*
input_const
[
out2in_mid
-
in_w
]
+
w10
*
input_const
[
out2in_mid
-
1
]
+
w11
*
input_const
[
out2in_mid
]
+
...
...
@@ -1443,22 +1484,30 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
(
1
-
if_pad_r
)
*
(
1
-
if_pad_b
)
*
w22
*
input_const
[
out2in_mid
+
in_w
+
1
];
if
(
if_bias
)
{
output_data_tmp
[
0
]
+=
bias_data
[
j
];
output_data_tmp
[
out_w
-
1
]
+=
bias_data
[
j
];
output_data_tmp
[
out_w
*
(
out_h
-
1
)]
+=
bias_data
[
j
];
output_data_tmp
[
out_h
*
out_w
-
1
]
+=
bias_data
[
j
];
output_data_tmp
[
lt
]
+=
bias_data
[
j
];
output_data_tmp
[
rt
]
+=
bias_data
[
j
];
output_data_tmp
[
lb
]
+=
bias_data
[
j
];
output_data_tmp
[
rb
]
+=
bias_data
[
j
];
}
if
(
if_relu
)
{
output_data_tmp
[
lt
]
=
output_data_tmp
[
lt
]
<
0
?
0
:
output_data_tmp
[
lt
];
output_data_tmp
[
rt
]
=
output_data_tmp
[
rt
]
<
0
?
0
:
output_data_tmp
[
rt
];
output_data_tmp
[
lb
]
=
output_data_tmp
[
lb
]
<
0
?
0
:
output_data_tmp
[
lb
];
output_data_tmp
[
rb
]
=
output_data_tmp
[
rb
]
<
0
?
0
:
output_data_tmp
[
rb
];
}
for
(
int
i
=
1
;
i
<
out_h
-
1
;
i
++
)
{
out2in_mid
=
i
*
2
*
in_w
;
output_data_tmp
[
i
*
out_w
]
=
w01
*
input_const
[
out2in_mid
-
in_w
]
+
w02
*
input_const
[
out2in_mid
-
in_w
+
1
]
+
w11
*
input_const
[
out2in_mid
]
+
w12
*
input_const
[
out2in_mid
+
1
]
+
w21
*
input_const
[
out2in_mid
+
in_w
]
+
w22
*
input_const
[
out2in_mid
+
in_w
+
1
];
int
left
=
i
*
out_w
;
output_data_tmp
[
left
]
=
w01
*
input_const
[
out2in_mid
-
in_w
]
+
w02
*
input_const
[
out2in_mid
-
in_w
+
1
]
+
w11
*
input_const
[
out2in_mid
]
+
w12
*
input_const
[
out2in_mid
+
1
]
+
w21
*
input_const
[
out2in_mid
+
in_w
]
+
w22
*
input_const
[
out2in_mid
+
in_w
+
1
];
out2in_mid
=
i
*
2
*
in_w
+
(
out_w
-
1
)
*
2
;
output_data_tmp
[
i
*
out_w
+
out_w
-
1
]
=
int
right
=
i
*
out_w
+
out_w
-
1
;
output_data_tmp
[
right
]
=
w00
*
input_const
[
out2in_mid
-
in_w
-
1
]
+
w01
*
input_const
[
out2in_mid
-
in_w
]
+
w10
*
input_const
[
out2in_mid
-
1
]
+
w11
*
input_const
[
out2in_mid
]
+
...
...
@@ -1468,8 +1517,14 @@ void DepthwiseConv3x3s2p1v2(const framework::Tensor *input,
w12
*
input_const
[
out2in_mid
+
1
]
+
w22
*
input_const
[
out2in_mid
+
in_w
+
1
]);
if
(
if_bias
)
{
output_data_tmp
[
i
*
out_w
]
+=
bias_data
[
j
];
output_data_tmp
[
i
*
out_w
+
out_w
-
1
]
+=
bias_data
[
j
];
output_data_tmp
[
left
]
+=
bias_data
[
j
];
output_data_tmp
[
right
]
+=
bias_data
[
j
];
}
if
(
if_relu
)
{
output_data_tmp
[
left
]
=
output_data_tmp
[
left
]
<
0
?
0
:
output_data_tmp
[
left
];
output_data_tmp
[
right
]
=
output_data_tmp
[
right
]
<
0
?
0
:
output_data_tmp
[
right
];
}
}
filter_data_tmp
+=
9
;
...
...
@@ -1909,7 +1964,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const framework::Tensor *input,
void
DepthwiseConv3x3s2p0
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
output
,
framework
::
Tensor
*
bias
,
bool
if_bias
)
{
bool
if_bias
,
bool
if_relu
)
{
#if __ARM_NEON
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
@@ -1977,6 +2032,9 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
if
(
if_bias
)
{
out0
=
vaddq_f32
(
out0
,
biasv
);
}
if
(
if_relu
)
{
out0
=
vmaxq_f32
(
out0
,
zero
);
}
vst1q_lane_f32
(
output_ptr
,
out0
,
0
);
vst1q_lane_f32
(
output_ptr
+
1
,
out0
,
1
);
vst1q_lane_f32
(
output_ptr
+
2
,
out0
,
2
);
...
...
@@ -1985,7 +2043,8 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
for
(
m
=
0
;
m
<
output_width
-
2
;
m
+=
3
)
{
}
for
(
int
j
=
m
;
j
<
output_width
;
j
++
)
{
output_data
[
i
*
output_width
+
j
]
=
int
index
=
i
*
output_width
+
j
;
output_data
[
index
]
=
input_data
[(
2
*
i
)
*
input_width
+
2
*
j
]
*
w00
+
input_data
[(
2
*
i
)
*
input_width
+
2
*
j
+
1
]
*
w01
+
input_data
[(
2
*
i
)
*
input_width
+
2
*
j
+
2
]
*
w02
+
...
...
@@ -1996,7 +2055,11 @@ void DepthwiseConv3x3s2p0(const framework::Tensor *input,
input_data
[(
2
*
i
+
2
)
*
input_width
+
2
*
j
+
1
]
*
w21
+
input_data
[(
2
*
i
+
2
)
*
input_width
+
2
*
j
+
2
]
*
w22
;
if
(
if_bias
)
{
output_data
[
i
*
output_width
+
j
]
+=
*
bias_data
;
output_data
[
index
]
+=
*
bias_data
;
}
if
(
if_relu
)
{
output_data
[
index
]
=
output_data
[
index
]
<
0
?
0
:
output_data
[
index
];
}
}
}
...
...
src/operators/math/depthwise_conv3x3.h
浏览文件 @
19308114
...
...
@@ -32,7 +32,7 @@ void DepthwiseConv3x3(const framework::Tensor *input,
void
DepthwiseConv3x3s1p1
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
output
,
framework
::
Tensor
*
bias
,
bool
if_bias
);
bool
if_bias
,
bool
if_relu
);
void
DepthwiseConvAddBNRelu3x3s1p1
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
...
...
@@ -51,7 +51,7 @@ void DepthwiseConvAddBNRelu3x3s2p1(const framework::Tensor *input,
void
DepthwiseConv3x3s2p1v2
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
output
,
framework
::
Tensor
*
bias
,
bool
if_bias
);
bool
if_bias
,
bool
if_relu
);
void
DepthwiseConvAddBNRelu3x3s2p1v2
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
...
...
@@ -63,7 +63,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const framework::Tensor *input,
void
DepthwiseConv3x3s2p0
(
const
framework
::
Tensor
*
input
,
const
framework
::
Tensor
*
filter
,
framework
::
Tensor
*
output
,
framework
::
Tensor
*
bias
,
bool
if_bias
);
bool
if_bias
,
bool
if_relu
);
// TODO(hjchen2) need to be implemented
// template<typename Itype, typename Otype>
...
...
test/CMakeLists.txt
浏览文件 @
19308114
...
...
@@ -129,6 +129,15 @@ if (CON GREATER -1)
endif
()
list
(
FIND NET
"super"
CON
)
if
(
CON GREATER -1
)
# gen test
ADD_EXECUTABLE
(
test-super net/test_super.cpp test_helper.h test_include.h executor_for_test.h
)
target_link_libraries
(
test-super paddle-mobile
)
set
(
FOUND_MATCH ON
)
endif
()
if
(
NOT FOUND_MATCH
)
# gen test
ADD_EXECUTABLE
(
test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h
)
...
...
tools/build.sh
浏览文件 @
19308114
#!/usr/bin/env bash
NETS
=
""
declare
-a
supportedNets
=(
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
"resnet"
"mobilenetssd"
"nlp"
"mobilenetfssd"
"genet"
)
declare
-a
supportedNets
=(
"googlenet"
"mobilenet"
"yolo"
"squeezenet"
"resnet"
"mobilenetssd"
"nlp"
"mobilenetfssd"
"genet"
"super"
)
build_for_mac
()
{
if
[
!
`
which brew
`
]
;
then
...
...
@@ -162,7 +162,7 @@ build_for_ios() {
fi
cd
"
${
BUILD_DIR
}
"
make
-j
8
cp
../../../src/ios_io/PaddleMobileCPU.h ./build/PaddleMobileCPU.h
cp
../../../src/io
/io
s_io/PaddleMobileCPU.h ./build/PaddleMobileCPU.h
cd
./build
# 生成符号表
ranlib
*
.a
...
...
tools/op.cmake
浏览文件 @
19308114
...
...
@@ -202,6 +202,16 @@ if (CON GREATER -1)
set
(
FOUND_MATCH ON
)
endif
()
list
(
FIND NET
"super"
CON
)
if
(
CON GREATER -1
)
message
(
"super enabled"
)
set
(
FUSION_CONVADD_OP ON
)
set
(
FUSION_CONVADDRELU_OP ON
)
set
(
ELEMENTWISEADD_OP ON
)
set
(
FOUND_MATCH ON
)
endif
()
if
(
NOT FOUND_MATCH
)
message
(
"--default--"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录