Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
e5e51936
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e5e51936
编写于
3月 09, 2019
作者:
R
Ray Liu
提交者:
GitHub
3月 09, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1481 from codeWorm2015/develop
add mps support
上级
09ec8398
548723ec
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
200 addition
and
87 deletion
+200
-87
metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
...-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
+20
-16
metal/paddle-mobile-demo/paddle-mobile-demo/Net/MobileNetCombined.swift
...obile-demo/paddle-mobile-demo/Net/MobileNetCombined.swift
+26
-1
metal/paddle-mobile-demo/paddle-mobile-demo/Net/YoloNet.swift
...l/paddle-mobile-demo/paddle-mobile-demo/Net/YoloNet.swift
+1
-1
metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddMetal.metal
...mobile-metallib/paddle-mobile-metallib/ConvAddMetal.metal
+20
-20
metal/paddle-mobile/paddle-mobile/Src/Framework/Executor.swift
.../paddle-mobile/paddle-mobile/Src/Framework/Executor.swift
+1
-2
metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift
...e/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift
+98
-19
test/net/test_mobilenet_GPU.cpp
test/net/test_mobilenet_GPU.cpp
+7
-6
test/net/test_mobilenet_combine.cpp
test/net/test_mobilenet_combine.cpp
+9
-4
test/net/test_yolo_combined.cpp
test/net/test_yolo_combined.cpp
+3
-10
test/net/test_yologpu.cpp
test/net/test_yologpu.cpp
+8
-7
test/test_helper.h
test/test_helper.h
+7
-1
未找到文件。
metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
浏览文件 @
e5e51936
...
...
@@ -33,8 +33,6 @@
FC5E03B221DCE8D90016C137
/* mingren_input_data in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC5E03B121DCE8D90016C137
/* mingren_input_data */
;
};
FC704C1921D2375300F98BAB
/* super_params in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC704C1721D2375300F98BAB
/* super_params */
;
};
FC704C1A21D2375300F98BAB
/* super_model in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC704C1821D2375300F98BAB
/* super_model */
;
};
FC704C2221D237FC00F98BAB
/* combined_mobilenet_params in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC704C1D21D237FC00F98BAB
/* combined_mobilenet_params */
;
};
FC704C2321D237FC00F98BAB
/* combined_mobilenet_model in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC704C1E21D237FC00F98BAB
/* combined_mobilenet_model */
;
};
FC704C2421D237FC00F98BAB
/* yolo_params in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC704C2021D237FC00F98BAB
/* yolo_params */
;
};
FC704C2521D237FC00F98BAB
/* yolo_model in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC704C2121D237FC00F98BAB
/* yolo_model */
;
};
FC803BCD214D27930094B8E5
/* FPSCounter.swift in Sources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FC803BCB214D27920094B8E5
/* FPSCounter.swift */
;
};
...
...
@@ -49,6 +47,9 @@
FCBCCC552122EF5500D94F7E
/* MetalHelper.swift in Sources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCBCCC542122EF5400D94F7E
/* MetalHelper.swift */
;
};
FCC15E15221E716500DC3CB2
/* paddle-mobile-metallib.metallib in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCC15E14221E716400DC3CB2
/* paddle-mobile-metallib.metallib */
;
};
FCCED60521D7646E00BE8D5F
/* test_image_super in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCCED60421D7646E00BE8D5F
/* test_image_super */
;
};
FCE834AE2232A4AE0057BF43
/* combined_mobilenet_params in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCE834AC2232A4AE0057BF43
/* combined_mobilenet_params */
;
};
FCE834AF2232A4AE0057BF43
/* combined_mobilenet_model in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCE834AD2232A4AE0057BF43
/* combined_mobilenet_model */
;
};
FCE834B12232B6DC0057BF43
/* vision_synset.txt in Resources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCE834B02232B6DC0057BF43
/* vision_synset.txt */
;
};
FCEBEC2C20E1391F00C0B14D
/* paddle_mobile.framework in Frameworks */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCEBEC2B20E1391F00C0B14D
/* paddle_mobile.framework */
;
};
FCEBEC2D20E1391F00C0B14D
/* paddle_mobile.framework in Embed Frameworks */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCEBEC2B20E1391F00C0B14D
/* paddle_mobile.framework */
;
settings
=
{
ATTRIBUTES
=
(
CodeSignOnCopy
,
RemoveHeadersOnCopy
,
);
};
};
FCF437E8214B6DDB00943429
/* MultiPredictViewController.swift in Sources */
=
{
isa
=
PBXBuildFile
;
fileRef
=
FCF437E7214B6DDB00943429
/* MultiPredictViewController.swift */
;
};
...
...
@@ -105,8 +106,6 @@
FC5E03B121DCE8D90016C137
/* mingren_input_data */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
mingren_input_data
;
sourceTree
=
"<group>"
;
};
FC704C1721D2375300F98BAB
/* super_params */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
super_params
;
sourceTree
=
"<group>"
;
};
FC704C1821D2375300F98BAB
/* super_model */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
super_model
;
sourceTree
=
"<group>"
;
};
FC704C1D21D237FC00F98BAB
/* combined_mobilenet_params */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
combined_mobilenet_params
;
sourceTree
=
"<group>"
;
};
FC704C1E21D237FC00F98BAB
/* combined_mobilenet_model */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
combined_mobilenet_model
;
sourceTree
=
"<group>"
;
};
FC704C2021D237FC00F98BAB
/* yolo_params */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
yolo_params
;
sourceTree
=
"<group>"
;
};
FC704C2121D237FC00F98BAB
/* yolo_model */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
yolo_model
;
sourceTree
=
"<group>"
;
};
FC803BCB214D27920094B8E5
/* FPSCounter.swift */
=
{
isa
=
PBXFileReference
;
fileEncoding
=
4
;
lastKnownFileType
=
sourcecode.swift
;
path
=
FPSCounter.swift
;
sourceTree
=
"<group>"
;
};
...
...
@@ -121,6 +120,9 @@
FCBCCC542122EF5400D94F7E
/* MetalHelper.swift */
=
{
isa
=
PBXFileReference
;
fileEncoding
=
4
;
lastKnownFileType
=
sourcecode.swift
;
path
=
MetalHelper.swift
;
sourceTree
=
"<group>"
;
};
FCC15E14221E716400DC3CB2
/* paddle-mobile-metallib.metallib */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
"archive.metal-library"
;
name
=
"paddle-mobile-metallib.metallib"
;
path
=
"../../../../Library/Developer/Xcode/DerivedData/paddle-mobile-hdsimtkoxoondndnjczkbkchcwyh/Build/Products/Release-iphoneos/paddle-mobile-metallib.metallib"
;
sourceTree
=
"<group>"
;
};
FCCED60421D7646E00BE8D5F
/* test_image_super */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
test_image_super
;
sourceTree
=
"<group>"
;
};
FCE834AC2232A4AE0057BF43
/* combined_mobilenet_params */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
combined_mobilenet_params
;
sourceTree
=
"<group>"
;
};
FCE834AD2232A4AE0057BF43
/* combined_mobilenet_model */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
file
;
path
=
combined_mobilenet_model
;
sourceTree
=
"<group>"
;
};
FCE834B02232B6DC0057BF43
/* vision_synset.txt */
=
{
isa
=
PBXFileReference
;
fileEncoding
=
4
;
lastKnownFileType
=
text
;
path
=
vision_synset.txt
;
sourceTree
=
"<group>"
;
};
FCEBEC2B20E1391F00C0B14D
/* paddle_mobile.framework */
=
{
isa
=
PBXFileReference
;
explicitFileType
=
wrapper.framework
;
path
=
paddle_mobile.framework
;
sourceTree
=
BUILT_PRODUCTS_DIR
;
};
FCF437E7214B6DDB00943429
/* MultiPredictViewController.swift */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
sourcecode.swift
;
path
=
MultiPredictViewController.swift
;
sourceTree
=
"<group>"
;
};
FCFADE33222F63CB0037DCE8
/* test_big.JPG */
=
{
isa
=
PBXFileReference
;
lastKnownFileType
=
image.jpeg
;
path
=
test_big.JPG
;
sourceTree
=
"<group>"
;
};
...
...
@@ -267,22 +269,13 @@
FC704C1B21D237FC00F98BAB
/* vision_model */
=
{
isa
=
PBXGroup
;
children
=
(
FCE834AB2232A4AE0057BF43
/* vision_mobilenet */
,
FCAFD8482231614200496A36
/* yolo_16 */
,
FC704C1C21D237FC00F98BAB
/* mobilenet */
,
FC704C1F21D237FC00F98BAB
/* yolo */
,
);
path
=
vision_model
;
sourceTree
=
"<group>"
;
};
FC704C1C21D237FC00F98BAB
/* mobilenet */
=
{
isa
=
PBXGroup
;
children
=
(
FC704C1D21D237FC00F98BAB
/* combined_mobilenet_params */
,
FC704C1E21D237FC00F98BAB
/* combined_mobilenet_model */
,
);
path
=
mobilenet
;
sourceTree
=
"<group>"
;
};
FC704C1F21D237FC00F98BAB
/* yolo */
=
{
isa
=
PBXGroup
;
children
=
(
...
...
@@ -336,6 +329,16 @@
path
=
yolo_16
;
sourceTree
=
"<group>"
;
};
FCE834AB2232A4AE0057BF43
/* vision_mobilenet */
=
{
isa
=
PBXGroup
;
children
=
(
FCE834B02232B6DC0057BF43
/* vision_synset.txt */
,
FCE834AC2232A4AE0057BF43
/* combined_mobilenet_params */
,
FCE834AD2232A4AE0057BF43
/* combined_mobilenet_model */
,
);
path
=
vision_mobilenet
;
sourceTree
=
"<group>"
;
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
...
...
@@ -401,8 +404,8 @@
FCCED60521D7646E00BE8D5F
/* test_image_super in Resources */
,
FC039B8C20E11C560081E9F8
/* LaunchScreen.storyboard in Resources */
,
FC9797CF21D6506F00F2FD90
/* mingren.jpg in Resources */
,
FC704C2221D237FC00F98BAB
/* combined_mobilenet_params in Resources */
,
FCAFD84B2231614200496A36
/* yolo_16_param in Resources */
,
FCE834AF2232A4AE0057BF43
/* combined_mobilenet_model in Resources */
,
FC704C1921D2375300F98BAB
/* super_params in Resources */
,
FC2BFCBE21DF15D900C262B2
/* 123.jpg in Resources */
,
FC039B8920E11C560081E9F8
/* Assets.xcassets in Resources */
,
...
...
@@ -411,14 +414,15 @@
FC5E03B221DCE8D90016C137
/* mingren_input_data in Resources */
,
FC704C1A21D2375300F98BAB
/* super_model in Resources */
,
FC039B8720E11C550081E9F8
/* Main.storyboard in Resources */
,
FCE834B12232B6DC0057BF43
/* vision_synset.txt in Resources */
,
FC9797C221D608E000F2FD90
/* mobilenet_model in Resources */
,
FCAFD84C2231614200496A36
/* yolo_16_model in Resources */
,
FC2BFCC021DF279900C262B2
/* classify-img-output.png in Resources */
,
FC203FB221CBFDBA00B37166
/* test.jpg in Resources */
,
FCC15E15221E716500DC3CB2
/* paddle-mobile-metallib.metallib in Resources */
,
FC704C2321D237FC00F98BAB
/* combined_mobilenet_model in Resources */
,
FC9797C321D608E000F2FD90
/* mobilenet_params in Resources */
,
FC704C2421D237FC00F98BAB
/* yolo_params in Resources */
,
FCE834AE2232A4AE0057BF43
/* combined_mobilenet_params in Resources */
,
FC2BFCBC21DF0A8600C262B2
/* 00001.jpg in Resources */
,
FC9797BE21D6045B00F2FD90
/* banana.jpeg in Resources */
,
FC704C2521D237FC00F98BAB
/* yolo_model in Resources */
,
...
...
metal/paddle-mobile-demo/paddle-mobile-demo/Net/MobileNetCombined.swift
浏览文件 @
e5e51936
...
...
@@ -24,10 +24,35 @@ public class MobileNetCombined: Net {
inputDim
=
Dim
.
init
(
inDim
:
[
1
,
224
,
224
,
3
])
metalLoadMode
=
.
LoadMetalInCustomMetalLib
metalLibPath
=
Bundle
.
main
.
path
(
forResource
:
"paddle-mobile-metallib"
,
ofType
:
"metallib"
)
useMPS
=
true
}
let
labels
=
PreWords
.
init
(
fileName
:
"vision_synset"
)
class
PreWords
{
var
contents
:
[
String
]
=
[]
init
(
fileName
:
String
,
type
:
String
=
"txt"
,
inBundle
:
Bundle
=
Bundle
.
main
)
{
if
let
filePath
=
inBundle
.
path
(
forResource
:
fileName
,
ofType
:
type
)
{
let
string
=
try!
String
.
init
(
contentsOfFile
:
filePath
)
contents
=
string
.
components
(
separatedBy
:
CharacterSet
.
newlines
)
.
filter
{
$0
.
count
>
10
}
.
map
{
String
(
$0
[
$0
.
index
(
$0
.
startIndex
,
offsetBy
:
10
)
...
])
}
}
else
{
fatalError
(
"no file call
\(
fileName
)
"
)
}
}
subscript
(
index
:
Int
)
->
String
{
return
contents
[
index
]
}
}
override
public
func
resultStr
(
res
:
[
ResultHolder
])
->
String
{
return
"
\(
res
[
0
]
.
result
[
0
]
)
... "
let
firstRes
=
res
[
0
]
let
resPointer
=
firstRes
.
result
var
s
:
[
String
]
=
[]
(
0
..<
firstRes
.
capacity
)
.
map
{
resPointer
[
$0
]
}
.
top
(
r
:
5
)
.
enumerated
()
.
forEach
{
s
.
append
(
String
(
format
:
"%d: %@ (%3.2f%%)"
,
$0
+
1
,
labels
[
$1
.
0
],
$1
.
1
*
100
))
}
return
s
.
joined
(
separator
:
"
\n
"
)
}
}
metal/paddle-mobile-demo/paddle-mobile-demo/Net/YoloNet.swift
浏览文件 @
e5e51936
...
...
@@ -25,7 +25,7 @@ public class YoloNet: Net {
inputDim
=
Dim
.
init
(
inDim
:
[
1
,
416
,
416
,
3
])
metalLoadMode
=
.
LoadMetalInCustomMetalLib
metalLibPath
=
Bundle
.
main
.
path
(
forResource
:
"paddle-mobile-metallib"
,
ofType
:
"metallib"
)
useMPS
=
fals
e
useMPS
=
tru
e
paramPrecision
=
.
Float16
}
...
...
metal/paddle-mobile-metallib/paddle-mobile-metallib/ConvAddMetal.metal
浏览文件 @
e5e51936
...
...
@@ -354,7 +354,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]
;
float4 output = float4(biase[gid.z])
;
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
...
...
@@ -385,7 +385,7 @@ kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(
output
, gid.xy, gid.z);
outTexture.write(
half4(output)
, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
...
...
@@ -406,7 +406,7 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = biase[gid.z]
;
float4 output = float4(biase[gid.z])
;
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
...
...
@@ -419,13 +419,13 @@ kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> in
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x +=
input.x * weights[weithTo + 0 * kernelHXW + j]
;
output.y +=
input.y * weights[weithTo + 1 * kernelHXW + j]
;
output.z +=
input.z * weights[weithTo + 2 * kernelHXW + j]
;
output.w +=
input.w * weights[weithTo + 3 * kernelHXW + j]
;
output.x +=
float(input.x) * float(weights[weithTo + 0 * kernelHXW + j])
;
output.y +=
float(input.y) * float(weights[weithTo + 1 * kernelHXW + j])
;
output.z +=
float(input.z) * float(weights[weithTo + 2 * kernelHXW + j])
;
output.w +=
float(input.w) * float(weights[weithTo + 3 * kernelHXW + j])
;
}
// output = output + float4(biase[gid.z]);
outTexture.write(
output
, gid.xy, gid.z);
outTexture.write(
half4(output)
, gid.xy, gid.z);
}
...
...
@@ -453,7 +453,7 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]
;
float4 output = float4(biase[gid.z])
;
ushort dilation_y = param.dilationY;
half4 input[5];
...
...
@@ -471,20 +471,20 @@ kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(
input[j], weight_x
);
output.x += dot(
float4(input[j]), float4(weight_x)
);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(
input[j], weight_y
);
output.y += dot(
float4(input[j]), float4(weight_y)
);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(
input[j], weight_z
);
output.z += dot(
float4(input[j]), float4(weight_z)
);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(
input[j], weight_w
);
output.w += dot(
float4(input[j]), float4(weight_w)
);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(
output
, gid.xy, gid.z);
outTexture.write(
half4(output)
, gid.xy, gid.z);
}
...
...
@@ -512,7 +512,7 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z]
;
float4 output = float4(biase[gid.z])
;
ushort dilation_x = param.dilationX;
half4 input[5];
...
...
@@ -530,20 +530,20 @@ kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(
input[j], weight_x
);
output.x += dot(
float4(input[j]), float4(weight_x)
);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(
input[j], weight_y
);
output.y += dot(
float4(input[j]), float4(weight_y)
);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(
input[j], weight_z
);
output.z += dot(
float4(input[j]), float4(weight_z)
);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(
input[j], weight_w
);
output.w += dot(
float4(input[j]), float4(weight_w)
);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(
output
, gid.xy, gid.z);
outTexture.write(
half4(output)
, gid.xy, gid.z);
}
...
...
metal/paddle-mobile/paddle-mobile/Src/Framework/Executor.swift
浏览文件 @
e5e51936
...
...
@@ -117,10 +117,9 @@ public class Executor<P: PrecisionProtocol>: Executorable{
//将输入写进文件
/*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim)
writeToLibrary(fileName: "
yolo
_input", array: inputArr)
writeToLibrary(fileName: "
mobilenet
_input", array: inputArr)
print(" write done ")
return
*/
...
...
metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/ConvAddKernel.swift
浏览文件 @
e5e51936
...
...
@@ -27,6 +27,78 @@ func getUniqueKey() -> String {
return
UUID
.
init
()
.
uuidString
}
@available
(
iOS
11.0
,
*
)
class
ConvDataSource
<
P
:
PrecisionProtocol
>
:
NSObject
,
MPSCNNConvolutionDataSource
{
var
_descriptor
:
MPSCNNConvolutionDescriptor
var
_weightsTensor
:
Tensor
<
P
>
var
_biasTensor
:
Tensor
<
P
>
var
_biasTerms
:
UnsafeMutablePointer
<
Float
>
?
func
load
()
->
Bool
{
switch
P
.
precisionType
{
case
.
Float32
:
_biasTerms
=
_biasTensor
.
data
.
pointer
as?
UnsafeMutablePointer
<
Float
>
case
.
Float16
:
_biasTerms
=
UnsafeMutablePointer
<
Float
>.
allocate
(
capacity
:
_biasTensor
.
data
.
count
)
if
let
float16Point
=
_biasTensor
.
data
.
pointer
as?
UnsafeMutablePointer
<
Float16
>
{
float16to32
(
input
:
float16Point
,
output
:
_biasTerms
!
,
count
:
_biasTensor
.
data
.
count
)
}
}
return
true
}
func
purge
()
{
switch
P
.
precisionType
{
case
.
Float32
:
return
case
.
Float16
:
_biasTerms
?
.
deinitialize
(
count
:
_biasTensor
.
data
.
count
)
_biasTerms
?
.
deallocate
()
}
}
func
label
()
->
String
?
{
return
"conv_add_label"
}
func
copy
(
with
zone
:
NSZone
?
=
nil
)
->
Any
{
return
self
}
init
(
inDesc
:
MPSCNNConvolutionDescriptor
,
inWeights
:
Tensor
<
P
>
,
inBiasTerms
:
Tensor
<
P
>
)
{
_descriptor
=
inDesc
_weightsTensor
=
inWeights
_biasTensor
=
inBiasTerms
super
.
init
()
}
func
descriptor
()
->
MPSCNNConvolutionDescriptor
{
return
_descriptor
}
func
dataType
()
->
MPSDataType
{
switch
P
.
precisionType
{
case
.
Float32
:
return
.
float32
case
.
Float16
:
return
.
float16
}
}
func
weights
()
->
UnsafeMutableRawPointer
{
return
UnsafeMutableRawPointer
.
init
(
_weightsTensor
.
data
.
pointer
)
}
func
biasTerms
()
->
UnsafeMutablePointer
<
Float
>
?
{
return
_biasTerms
}
}
class
ConvAddKernel
<
P
:
PrecisionProtocol
>
:
Kernel
,
Computable
{
var
metalParam
:
MetalConvParam
!
...
...
@@ -40,30 +112,37 @@ class ConvAddKernel<P: PrecisionProtocol>: Kernel, Computable {
let
offsetX
=
(
Int
(
param
.
dilations
[
0
])
*
(
param
.
filter
.
tensorDim
[
3
]
-
1
)
+
1
)
/
2
-
Int
(
param
.
paddings
[
0
])
let
key
=
identifyingKey
if
initContext
.
useMPS
{
if
#available(iOS 10.0, *)
{
if
initContext
.
useMPS
{
// 使用 apple 的 MetalPerformanceShaders
if
#available(iOS 11.0, *)
{
var
desc
:
MPSCNNConvolutionDescriptor
?
// 如果不是 depth wise, 并且输入输出 tensor channel 都大于 4
if
!
(
param
.
filter
.
tensorDim
[
1
]
==
1
&&
param
.
filter
.
tensorDim
[
0
]
==
param
.
input
.
tensorDim
[
1
])
&&
param
.
input
.
tensorDim
[
1
]
>
4
&&
param
.
output
.
tensorDim
[
1
]
>
4
{
let
desc
=
MPSCNNConvolutionDescriptor
(
kernelWidth
:
param
.
filter
.
tensorDim
[
3
],
desc
=
MPSCNNConvolutionDescriptor
(
kernelWidth
:
param
.
filter
.
tensorDim
[
3
],
kernelHeight
:
param
.
filter
.
tensorDim
[
2
],
inputFeatureChannels
:
param
.
input
.
tensorDim
[
1
],
outputFeatureChannels
:
param
.
output
.
tensorDim
[
1
],
neuronFilter
:
nil
)
desc
.
strideInPixelsX
=
Int
(
param
.
stride
[
0
])
desc
.
strideInPixelsY
=
Int
(
param
.
stride
[
1
])
let
tensorPointer
=
param
.
filter
.
convert
(
converter
:
MPSPointerConverter
<
P
>.
init
())
let
yPointer
=
param
.
y
.
data
.
pointer
tensorPointer
.
withMemoryRebound
(
to
:
Float
.
self
,
capacity
:
param
.
filter
.
numel
())
{
(
weightPointer
:
UnsafeMutablePointer
<
Float
>
)
in
yPointer
.
withMemoryRebound
(
to
:
Float
.
self
,
capacity
:
param
.
y
.
numel
(),
{
(
biasePointer
:
UnsafeMutablePointer
<
Float
>
)
in
let
conv
=
MPSCNNConvolution
.
init
(
device
:
device
,
convolutionDescriptor
:
desc
,
kernelWeights
:
weightPointer
,
biasTerms
:
biasePointer
,
flags
:
.
none
)
conv
.
offset
=
MPSOffset
.
init
(
x
:
offsetX
,
y
:
offsetY
,
z
:
0
)
conv
.
edgeMode
=
.
zero
convDic
[
key
]
=
conv
})
}
desc
?
.
strideInPixelsX
=
Int
(
param
.
stride
[
0
])
desc
?
.
strideInPixelsY
=
Int
(
param
.
stride
[
1
])
}
else
if
param
.
input
.
tensorDim
[
1
]
>
4
&&
param
.
output
.
tensorDim
[
1
]
>
4
{
desc
=
MPSCNNDepthWiseConvolutionDescriptor
(
kernelWidth
:
param
.
filter
.
tensorDim
[
3
],
kernelHeight
:
param
.
filter
.
tensorDim
[
2
],
inputFeatureChannels
:
param
.
input
.
tensorDim
[
1
],
outputFeatureChannels
:
param
.
output
.
tensorDim
[
1
],
neuronFilter
:
nil
)
}
desc
?
.
strideInPixelsX
=
Int
(
param
.
stride
[
0
])
desc
?
.
strideInPixelsY
=
Int
(
param
.
stride
[
1
])
if
let
inDesc
=
desc
{
let
_
=
param
.
filter
.
convert
(
converter
:
MPSPointerConverter
<
P
>.
init
())
let
dataSource
=
ConvDataSource
.
init
(
inDesc
:
inDesc
,
inWeights
:
param
.
filter
,
inBiasTerms
:
param
.
y
)
let
conv
=
MPSCNNConvolution
.
init
(
device
:
device
,
weights
:
dataSource
)
conv
.
offset
=
MPSOffset
.
init
(
x
:
offsetX
,
y
:
offsetY
,
z
:
0
)
conv
.
edgeMode
=
.
zero
convDic
[
key
]
=
conv
imageDic
[
identifyingKey
+
"_input"
]
=
MPSImage
.
init
(
texture
:
param
.
input
.
metalTexture
,
featureChannels
:
param
.
input
.
tensorDim
[
1
])
imageDic
[
identifyingKey
+
"_output"
]
=
MPSImage
.
init
(
texture
:
param
.
output
.
metalTexture
,
featureChannels
:
param
.
output
.
tensorDim
[
1
])
super
.
init
(
device
:
device
,
inFunctionName
:
"place_holder"
,
initContext
:
initContext
)
...
...
test/net/test_mobilenet_GPU.cpp
浏览文件 @
e5e51936
...
...
@@ -25,11 +25,11 @@ int main() {
paddle_mobile
.
SetCLPath
(
"/data/local/tmp/bin"
);
#endif
// auto isok =
// paddle_mobile.Load(std::string(g_mobilenet_mul) + "/
model",
// std::string(g_mobilenet_mul) + "/
params", true);
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
g_mobilenet_vision
)
+
"/vision_mobilenet_
model"
,
std
::
string
(
g_mobilenet_vision
)
+
"/vision_mobilenet_
params"
,
true
);
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
g_mobilenet
),
true
);
//
auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
if
(
isok
)
{
auto
time2
=
paddle_mobile
::
time
();
std
::
cout
<<
"load cost :"
<<
paddle_mobile
::
time_diff
(
time1
,
time2
)
<<
"ms"
...
...
@@ -37,12 +37,13 @@ int main() {
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
GetInput
<
float
>
(
g_test_image_1x3x224x224_vision_mobilenet_input
,
&
input
,
dims
);
std
::
vector
<
float
>
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
auto
time3
=
paddle_mobile
::
time
();
int
max
=
1
0
;
int
max
=
1
;
for
(
int
i
=
0
;
i
<
max
;
++
i
)
{
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
...
...
test/net/test_mobilenet_combine.cpp
浏览文件 @
e5e51936
...
...
@@ -20,14 +20,18 @@ int main() {
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
4
);
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
std
::
string
(
g_mobilenet_combined
)
+
"/model"
,
std
::
string
(
g_mobilenet_combined
)
+
"/params"
,
true
))
{
if
(
paddle_mobile
.
Load
(
std
::
string
(
g_mobilenet_vision
)
+
"/vision_mobilenet_model"
,
std
::
string
(
g_mobilenet_vision
)
+
"/vision_mobilenet_params"
,
true
))
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224_banana
,
&
input
,
dims
);
GetInput
<
float
>
(
g_test_image_1x3x224x224_vision_mobilenet_input
,
&
input
,
dims
);
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
std
::
vector
<
float
>::
iterator
biggest
=
...
...
@@ -39,8 +43,9 @@ int main() {
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
1
0
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
auto
vec_result
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time4
=
time
();
...
...
test/net/test_yolo_combined.cpp
浏览文件 @
e5e51936
...
...
@@ -23,15 +23,15 @@ int main() {
// ../../../test/models/mobilenet
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
std
::
string
(
g_yolo_
combined
)
+
"/model"
,
std
::
string
(
g_yolo_
combined
)
+
"/params"
,
true
))
{
if
(
paddle_mobile
.
Load
(
std
::
string
(
g_yolo_
vision
)
+
"/model"
,
std
::
string
(
g_yolo_
vision
)
+
"/params"
,
true
))
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
416
,
416
};
std
::
vector
<
float
>
input
;
GetInput
<
float
>
(
g_test_image_
desktop_1_3_416_416_nchw_floa
t
,
&
input
,
dims
);
GetInput
<
float
>
(
g_test_image_
1x3x416x416_vision_yolo_inpu
t
,
&
input
,
dims
);
std
::
cout
<<
"input.size(): "
<<
input
.
size
()
<<
std
::
endl
;
for
(
int
j
=
0
;
j
<
100
;
++
j
)
{
std
::
cout
<<
j
<<
" : "
<<
input
[
j
]
<<
std
::
endl
;
...
...
@@ -42,13 +42,6 @@ int main() {
// }
auto
time3
=
time
();
const
vector
<
float
>
vector_out
=
paddle_mobile
.
Predict
(
input
,
dims
);
std
::
cout
<<
"--------------------------------------------"
<<
std
::
endl
;
for
(
float
i
:
vector_out
)
{
std
::
cout
<<
i
<<
std
::
endl
;
}
std
::
cout
<<
"--------------------------------------------"
<<
std
::
endl
;
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time1
)
<<
"ms"
<<
std
::
endl
;
...
...
test/net/test_yologpu.cpp
浏览文件 @
e5e51936
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include <thread>
#include <thread>
// NOLINT
#include "../../src/common/types.h"
#include "../../src/io/paddle_test_inference_api.h"
#include "../test_helper.h"
...
...
@@ -31,8 +31,9 @@ void t1() {
paddle_mobile_gpu
.
SetCLPath
(
"/data/local/tmp/bin"
);
#endif
auto
time1
=
paddle_mobile
::
time
();
auto
isok
=
paddle_mobile_gpu
.
Load
(
std
::
string
(
g_yolo_mul
)
+
"/model"
,
std
::
string
(
g_yolo_mul
)
+
"/params"
,
true
);
auto
isok
=
paddle_mobile_gpu
.
Load
(
std
::
string
(
g_yolo_vision
)
+
"/model"
,
std
::
string
(
g_yolo_vision
)
+
"/params"
,
true
);
// auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
if
(
isok
)
{
...
...
@@ -42,13 +43,13 @@ void t1() {
std
::
vector
<
float
>
input
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
416
,
416
};
GetInput
<
float
>
(
g_
yolo_img
,
&
input
,
dims
);
GetInput
<
float
>
(
g_
test_image_1x3x416x416_vision_yolo_input
,
&
input
,
dims
);
std
::
vector
<
float
>
vec_result
;
// = paddle_mobile.Predict(input, dims);
auto
time3
=
paddle_mobile
::
time
();
int
max
=
1
0
;
int
max
=
1
;
for
(
int
i
=
0
;
i
<
max
;
++
i
)
{
vec_result
=
paddle_mobile_gpu
.
Predict
(
input
,
dims
);
}
...
...
@@ -129,9 +130,9 @@ void t2() {
void
t3
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
// paddle_mobile.SetThreadNum(4);
//#ifdef PADDLE_MOBILE_CL
//
#ifdef PADDLE_MOBILE_CL
// paddle_mobile.SetCLPath("/data/local/tmp/bin");
//#endif
//
#endif
auto
time1
=
paddle_mobile
::
time
();
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
g_yolo_mul
)
+
"/model"
,
std
::
string
(
g_yolo_mul
)
+
"/params"
,
true
);
...
...
test/test_helper.h
浏览文件 @
e5e51936
...
...
@@ -51,6 +51,8 @@ static const char *g_yolo_combined = "../models/yolo_combined";
static
const
char
*
g_yolo_mul
=
"../models/d"
;
static
const
char
*
g_fluid_fssd_new
=
"../models/fluid_fssd_new"
;
static
const
char
*
g_vgg16_ssd_combined
=
"../models/vgg16_ssd_combined"
;
static
const
char
*
g_mobilenet_vision
=
"../models/vision_mobilenet"
;
static
const
char
*
g_yolo_vision
=
"../models/vision_yolo"
;
static
const
char
*
g_test_image_1x3x224x224
=
"../images/test_image_1x3x224x224_float"
;
static
const
char
*
g_test_image_1x3x224x224_banana
=
...
...
@@ -65,10 +67,14 @@ static const char *g_img = "../images/img.bin";
static
const
char
*
g_yolo_img
=
"../images/in_put_1_3_416_416_2"
;
static
const
char
*
g_super_img
=
"../images/mingren_input_data"
;
static
const
char
*
g_mobilenet_img
=
"../images/image"
;
static
const
char
*
g_test_image_1x3x224x224_vision_mobilenet_input
=
"../images/vision_mobilenet_input"
;
static
const
char
*
g_test_image_1x3x416x416_vision_yolo_input
=
"../images/yolo_input"
;
using
paddle_mobile
::
framework
::
DDim
;
using
paddle_mobile
::
framework
::
Tensor
;
using
namespace
paddle_mobile
;
using
namespace
paddle_mobile
;
// NOLINT
template
<
typename
T
>
void
SetupTensor
(
paddle_mobile
::
framework
::
Tensor
*
input
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录