Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
70bc0ed0
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
70bc0ed0
编写于
9月 15, 2018
作者:
D
dolphin8
提交者:
GitHub
9月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #978 from dolphin8/metal
xx
上级
0bb2c1cd
64e6ac2b
变更
20
显示空白变更内容
内联
并排
Showing
20 changed file
with
215 addition
and
92 deletion
+215
-92
metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
...l/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+134
-24
metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
+1
-1
metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
...l/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+7
-5
metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
...dle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
+4
-9
metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
+4
-9
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
...ile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+2
-1
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
...addle-mobile/Operators/Kernels/BilinearInterpKernel.swift
+9
-3
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
...ddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
+3
-3
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
...e-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
+4
-3
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
...addle-mobile/Operators/Kernels/metal/BilinearInterp.metal
+0
-2
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
...mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
+6
-6
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
...dle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
+6
-6
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
...le-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
+0
-1
metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
+14
-3
metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+3
-0
metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+3
-9
metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
+5
-0
metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
...l/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
+3
-0
metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
+1
-1
metal/paddle-mobile/paddle-mobile/framework/Executor.swift
metal/paddle-mobile/paddle-mobile/framework/Executor.swift
+6
-6
未找到文件。
metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
浏览文件 @
70bc0ed0
...
@@ -71,7 +71,128 @@ extension MTLDevice {
...
@@ -71,7 +71,128 @@ extension MTLDevice {
return
buffer
!
return
buffer
!
}
}
func
texture2tensor_loop
<
P
>
(
texture
:
MTLTexture
,
cb
:
([
Int
],
P
)
->
Void
)
->
Void
{
let
bpR
=
texture
.
width
*
4
*
MemoryLayout
<
P
>.
size
let
bpI
=
texture
.
height
*
bpR
let
region
=
MTLRegion
.
init
(
origin
:
MTLOrigin
.
init
(
x
:
0
,
y
:
0
,
z
:
0
),
size
:
MTLSize
.
init
(
width
:
texture
.
width
,
height
:
texture
.
height
,
depth
:
1
))
for
i
in
0
..<
texture
.
arrayLength
{
let
pointer
:
UnsafeMutablePointer
<
P
>
=
UnsafeMutablePointer
<
P
>.
allocate
(
capacity
:
bpI
)
texture
.
getBytes
(
pointer
,
bytesPerRow
:
bpR
,
bytesPerImage
:
bpI
,
from
:
region
,
mipmapLevel
:
0
,
slice
:
i
)
for
tx
in
0
..<
texture
.
width
*
texture
.
height
*
4
{
var
k
=
tx
var
xyzn
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
xyzn
[
1
]
=
k
/
(
texture
.
width
*
4
)
k
%=
(
texture
.
width
*
4
)
xyzn
[
3
]
=
k
%
4
xyzn
[
0
]
=
k
/
4
xyzn
[
2
]
=
i
cb
(
xyzn
,
pointer
[
tx
])
}
}
}
func
texture2tensor_3
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
}
let
count
=
dim
.
reduce
(
1
)
{
$0
*
$1
}
var
tensor
:
[
P
]
=
.
init
(
repeating
:
Float32
(
0.0
)
as!
P
,
count
:
count
)
let
ndim
:
[
Int
]
=
transpose
.
map
{
tdim
[
$0
]
}
assert
(
dim
.
count
==
3
)
assert
(
texture
.
width
==
ndim
[
3
])
assert
(
texture
.
height
==
ndim
[
2
])
assert
(
ndim
[
0
]
==
1
)
assert
(
texture
.
arrayLength
==
(
ndim
[
1
]
+
3
)
/
4
)
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
1
]
=
xyzn
[
2
]
*
4
+
xyzn
[
3
]
tg
[
2
]
=
xyzn
[
1
]
tg
[
3
]
=
xyzn
[
0
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
}
func
texture2tensor_2
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
}
let
count
=
dim
.
reduce
(
1
)
{
$0
*
$1
}
var
tensor
:
[
P
]
=
.
init
(
repeating
:
Float32
(
0.0
)
as!
P
,
count
:
count
)
let
ndim
:
[
Int
]
=
transpose
.
map
{
tdim
[
$0
]
}
assert
(
dim
.
count
==
2
)
let
w
=
(
ndim
[
3
]
+
3
)
/
4
assert
(
texture
.
width
==
w
)
assert
(
texture
.
height
==
ndim
[
2
])
assert
(
ndim
[
0
]
==
1
)
assert
(
ndim
[
1
]
==
1
)
assert
(
texture
.
arrayLength
==
1
)
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
2
]
=
xyzn
[
1
]
tg
[
3
]
=
xyzn
[
0
]
*
4
+
xyzn
[
3
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
}
func
texture2tensor_1
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
}
let
count
=
dim
.
reduce
(
1
)
{
$0
*
$1
}
var
tensor
:
[
P
]
=
.
init
(
repeating
:
Float32
(
0.0
)
as!
P
,
count
:
count
)
let
ndim
:
[
Int
]
=
transpose
.
map
{
tdim
[
$0
]
}
assert
(
dim
.
count
==
1
)
let
w
=
(
ndim
[
3
]
+
3
)
/
4
assert
(
texture
.
width
==
w
)
assert
(
texture
.
height
==
1
)
assert
(
ndim
[
0
]
==
1
)
assert
(
ndim
[
1
]
==
1
)
assert
(
ndim
[
2
]
==
1
)
assert
(
texture
.
arrayLength
==
1
)
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
3
]
=
xyzn
[
0
]
*
4
+
xyzn
[
3
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
}
func
texture2tensor
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
func
texture2tensor
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
if
dim
.
count
==
3
{
return
texture2tensor_3
(
texture
:
texture
,
dim
:
dim
,
transpose
:
transpose
)
}
else
if
dim
.
count
==
2
{
return
texture2tensor_2
(
texture
:
texture
,
dim
:
dim
,
transpose
:
transpose
)
}
else
if
dim
.
count
==
1
{
return
texture2tensor_1
(
texture
:
texture
,
dim
:
dim
,
transpose
:
transpose
)
}
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
...
@@ -84,30 +205,19 @@ extension MTLDevice {
...
@@ -84,30 +205,19 @@ extension MTLDevice {
assert
(
texture
.
height
==
ndim
[
1
])
assert
(
texture
.
height
==
ndim
[
1
])
assert
(
texture
.
arrayLength
==
(
ndim
[
0
]
*
ndim
[
3
]
+
3
)
/
4
)
assert
(
texture
.
arrayLength
==
(
ndim
[
0
]
*
ndim
[
3
]
+
3
)
/
4
)
let
bpR
=
ndim
[
2
]
*
4
*
MemoryLayout
<
P
>.
size
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
let
bpI
=
ndim
[
1
]
*
bpR
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
let
region
=
MTLRegion
.
init
(
origin
:
MTLOrigin
.
init
(
x
:
0
,
y
:
0
,
z
:
0
),
size
:
MTLSize
.
init
(
width
:
ndim
[
2
],
height
:
ndim
[
1
],
depth
:
1
))
tg
[
1
]
=
xyzn
[
1
]
for
i
in
0
..<
texture
.
arrayLength
{
tg
[
2
]
=
xyzn
[
0
]
let
pointer
:
UnsafeMutablePointer
<
P
>
=
UnsafeMutablePointer
<
P
>.
allocate
(
capacity
:
ndim
[
1
]
*
ndim
[
2
]
*
4
*
MemoryLayout
<
P
>.
size
)
tg
[
0
]
=
(
xyzn
[
2
]
*
4
+
xyzn
[
3
])
/
ndim
[
3
]
texture
.
getBytes
(
pointer
,
bytesPerRow
:
bpR
,
bytesPerImage
:
bpI
,
from
:
region
,
mipmapLevel
:
0
,
slice
:
i
)
tg
[
3
]
=
(
xyzn
[
2
]
*
4
+
xyzn
[
3
])
%
ndim
[
3
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
h
in
0
..<
ndim
[
1
]
{
for
w
in
0
..<
ndim
[
2
]
{
for
k
in
0
..<
4
{
for
k
in
0
..<
4
{
let
tx
=
(
h
*
ndim
[
2
]
+
w
)
*
4
+
k
ig
[
transpose
[
k
]]
=
tg
[
k
]
let
n
=
(
i
*
4
+
k
)
/
ndim
[
3
]
let
c
=
(
i
*
4
+
k
)
%
ndim
[
3
]
let
jg
=
[
n
,
h
,
w
,
c
]
var
ig
=
[
0
,
0
,
0
,
0
]
for
d
in
0
..<
4
{
ig
[
transpose
[
d
]]
=
jg
[
d
]
}
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
if
ix
<
count
{
tensor
[
ix
]
=
pointer
[
tx
]
tensor
[
ix
]
=
v
}
}
}
}
}
}
}
return
tensor
return
tensor
...
...
metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
浏览文件 @
70bc0ed0
...
@@ -30,7 +30,7 @@ public class MobileNet_ssd_AR: Net{
...
@@ -30,7 +30,7 @@ public class MobileNet_ssd_AR: Net{
class
MobilenetssdPreProccess
:
CusomKernel
{
class
MobilenetssdPreProccess
:
CusomKernel
{
init
(
device
:
MTLDevice
)
{
init
(
device
:
MTLDevice
)
{
let
s
=
CusomKernel
.
Shape
.
init
(
inWidth
:
160
,
inHeight
:
160
,
inChannel
:
3
)
let
s
=
CusomKernel
.
Shape
.
init
(
inWidth
:
160
,
inHeight
:
160
,
inChannel
:
3
)
super
.
init
(
device
:
device
,
inFunctionName
:
"mobilent_ar_preprocess
_half
"
,
outputDim
:
s
,
usePaddleMobileLib
:
false
)
super
.
init
(
device
:
device
,
inFunctionName
:
"mobilent_ar_preprocess"
,
outputDim
:
s
,
usePaddleMobileLib
:
false
)
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
浏览文件 @
70bc0ed0
...
@@ -56,9 +56,11 @@ class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam
...
@@ -56,9 +56,11 @@ class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam
throw
error
throw
error
}
}
}
}
}
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
浏览文件 @
70bc0ed0
...
@@ -53,15 +53,10 @@ class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, Bili
...
@@ -53,15 +53,10 @@ class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, Bili
func
delogOutput
()
{
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
print
(
"
\(
type
)
output: "
)
let
padToFourDim
=
para
.
output
.
padToFourDim
let
device
=
para
.
output
.
metalTexture
!.
device
if
para
.
output
.
transpose
==
[
0
,
1
,
2
,
3
]
{
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
let
outputArray
:
[
Float32
]
=
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
n
:
padToFourDim
[
0
],
h
:
padToFourDim
[
1
],
w
:
padToFourDim
[
2
],
c
:
padToFourDim
[
3
])
)
// print(outputArray
)
print
(
outputArray
.
strideArray
())
print
(
outputArray
.
strideArray
())
}
else
if
para
.
output
.
transpose
==
[
0
,
2
,
3
,
1
]
{
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
padToFourDim
[
0
],
c
:
padToFourDim
[
1
],
h
:
padToFourDim
[
2
],
w
:
padToFourDim
[
3
]))
.
strideArray
())
}
else
{
fatalError
(
" not implemet"
)
}
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
浏览文件 @
70bc0ed0
...
@@ -65,15 +65,10 @@ class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Run
...
@@ -65,15 +65,10 @@ class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Run
func
delogOutput
()
{
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
print
(
"
\(
type
)
output: "
)
let
padToFourDim
=
para
.
output
.
padToFourDim
if
para
.
output
.
transpose
==
[
0
,
1
,
2
,
3
]
{
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
n
:
padToFourDim
[
0
],
h
:
padToFourDim
[
1
],
w
:
padToFourDim
[
2
],
c
:
padToFourDim
[
3
])
)
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
print
(
outputArray
.
strideArray
())
}
else
if
para
.
output
.
transpose
==
[
0
,
2
,
3
,
1
]
{
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
padToFourDim
[
0
],
c
:
padToFourDim
[
1
],
h
:
padToFourDim
[
2
],
w
:
padToFourDim
[
3
]))
.
strideArray
())
}
else
{
fatalError
(
" not implemet"
)
}
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
浏览文件 @
70bc0ed0
...
@@ -20,12 +20,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
...
@@ -20,12 +20,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
let
varianceP
=
param
.
variance
.
data
.
pointer
let
varianceP
=
param
.
variance
.
data
.
pointer
let
meanP
=
param
.
mean
.
data
.
pointer
let
meanP
=
param
.
mean
.
data
.
pointer
let
scaleP
=
param
.
scale
.
data
.
pointer
let
scaleP
=
param
.
scale
.
data
.
pointer
let
biasP
=
param
.
scale
.
data
.
pointer
let
biasP
=
param
.
bias
.
data
.
pointer
for
i
in
0
..<
count
{
for
i
in
0
..<
count
{
let
invStd
=
P
(
1
/
(
Float32
(
varianceP
[
i
])
+
param
.
epsilon
)
.
squareRoot
())
let
invStd
=
P
(
1
/
(
Float32
(
varianceP
[
i
])
+
param
.
epsilon
)
.
squareRoot
())
biasP
[
i
]
=
biasP
[
i
]
-
meanP
[
i
]
*
invStd
*
scaleP
[
i
]
biasP
[
i
]
=
biasP
[
i
]
-
meanP
[
i
]
*
invStd
*
scaleP
[
i
]
scaleP
[
i
]
=
invStd
*
scaleP
[
i
]
scaleP
[
i
]
=
invStd
*
scaleP
[
i
]
}
}
param
.
bias
.
initBuffer
(
device
:
device
,
precision
:
computePrecision
)
param
.
bias
.
initBuffer
(
device
:
device
,
precision
:
computePrecision
)
param
.
scale
.
initBuffer
(
device
:
device
,
precision
:
computePrecision
)
param
.
scale
.
initBuffer
(
device
:
device
,
precision
:
computePrecision
)
param
.
output
.
initTexture
(
device
:
device
,
inTranspose
:
param
.
input
.
transpose
,
computePrecision
:
computePrecision
)
param
.
output
.
initTexture
(
device
:
device
,
inTranspose
:
param
.
input
.
transpose
,
computePrecision
:
computePrecision
)
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
浏览文件 @
70bc0ed0
...
@@ -27,10 +27,16 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
...
@@ -27,10 +27,16 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
encoder
.
setTexture
(
param
.
input
.
metalTexture
,
index
:
0
)
encoder
.
setTexture
(
param
.
input
.
metalTexture
,
index
:
0
)
encoder
.
setTexture
(
param
.
output
.
metalTexture
,
index
:
1
)
encoder
.
setTexture
(
param
.
output
.
metalTexture
,
index
:
1
)
let
ratio_h
:
Float32
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
2
])
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
2
])
var
ratio_h
:
Float32
=
0
let
ratio_w
:
Float32
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
3
])
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
3
])
var
ratio_w
:
Float32
=
0
if
param
.
output
.
tensorDim
.
dims
[
2
]
>
1
{
ratio_h
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
2
]
-
1
)
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
2
]
-
1
)
}
if
param
.
output
.
tensorDim
.
dims
[
3
]
>
1
{
ratio_w
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
3
]
-
1
)
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
3
]
-
1
)
}
var
p
=
BilinearInterpMetalParam
.
init
(
ratio_h
:
ratio_h
,
ratio_w
:
ratio_w
)
var
p
=
BilinearInterpMetalParam
.
init
(
ratio_h
:
ratio_h
,
ratio_w
:
ratio_w
)
encoder
.
setBytes
(
&
p
,
length
:
MemoryLayout
<
Concat
MetalParam
>.
size
,
index
:
0
)
encoder
.
setBytes
(
&
p
,
length
:
MemoryLayout
<
BilinearInterp
MetalParam
>.
size
,
index
:
0
)
encoder
.
dispatch
(
computePipline
:
pipline
,
outTexture
:
param
.
output
.
metalTexture
)
encoder
.
dispatch
(
computePipline
:
pipline
,
outTexture
:
param
.
output
.
metalTexture
)
encoder
.
endEncoding
()
encoder
.
endEncoding
()
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
浏览文件 @
70bc0ed0
...
@@ -17,14 +17,14 @@ using namespace metal;
...
@@ -17,14 +17,14 @@ using namespace metal;
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * n
ewS
cale [[buffer(0)]],
const device float4 * n
s
cale [[buffer(0)]],
const device float4 * n
ewB
ias [[buffer(1)]],
const device float4 * n
b
ias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * n
ewScale[gid.z] + newB
ias[gid.z];
float4 output = input * n
scale[gid.z] + nb
ias[gid.z];
outTexture.write(output, gid.xy, gid.z);
outTexture.write(output, gid.xy, gid.z);
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
浏览文件 @
70bc0ed0
...
@@ -14,8 +14,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
...
@@ -14,8 +14,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
r = input.read(gid.xy, gid.z);
} else {
} else {
float
w = gid.x * pm.ratio_w;
P
w = gid.x * pm.ratio_w;
float
h = gid.y * pm.ratio_h;
P
h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
P w1lambda = w - w0, h1lambda = h - h0;
...
@@ -26,7 +26,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
...
@@ -26,7 +26,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
}
output.write(r, gid.xy, gid.z);
output.write(r, gid.xy, gid.z);
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
浏览文件 @
70bc0ed0
...
@@ -16,8 +16,6 @@
...
@@ -16,8 +16,6 @@
using namespace metal;
using namespace metal;
struct bilinear_interp_param {
struct bilinear_interp_param {
// int32_t out_h;
// int32_t out_w;
float ratio_h;
float ratio_h;
float ratio_w;
float ratio_w;
};
};
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
浏览文件 @
70bc0ed0
...
@@ -17,16 +17,16 @@ using namespace metal;
...
@@ -17,16 +17,16 @@ using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] =
1
;
abcd[0] = abcd[1] = abcd[2] =
0
;
abcd[3] = xyzn[0] * 4 + xyzn[3];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] =
1
;
abcd[0] = abcd[1] =
0
;
abcd[2] = xyzn[1];
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] =
1
;
abcd[0] =
0
;
abcd[3] = xyzn[0];
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
abcd[1] = xyzn[2] * 4 + xyzn[3];
...
@@ -40,15 +40,15 @@ inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
...
@@ -40,15 +40,15 @@ inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
}
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] =
1
;
xyzn[1] = xyzn[2] =
0
;
xyzn[0] = abcd[3] / 4;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
xyzn[1] = abcd[3] % 4;
}
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] =
1
;
xyzn[2] =
0
;
xyzn[1] = abcd[2];
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[0] = abcd[3] / 4;
xyzn[
1
] = abcd[3] % 4;
xyzn[
3
] = abcd[3] % 4;
}
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
xyzn[0] = abcd[3];
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
浏览文件 @
70bc0ed0
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
浏览文件 @
70bc0ed0
...
@@ -36,7 +36,6 @@ kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTextu
...
@@ -36,7 +36,6 @@ kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTextu
if (index < count) {
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
trans(lrp.itrans, tabcd, iabcd);
abcd2xyzn(iC, iabcd, ixyzn);
#if RIN == 4
#if RIN == 4
abcd2xyzn_4(iC, iabcd, ixyzn);
abcd2xyzn_4(iC, iabcd, ixyzn);
#else
#else
...
...
metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
浏览文件 @
70bc0ed0
...
@@ -72,10 +72,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
...
@@ -72,10 +72,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
print
(
"
\(
type
)
output: "
)
print
(
"
\(
type
)
output: "
)
// output
// output
let
outputArray
=
para
.
output
.
metalTexture
.
float32Array
()
// let outputArray = para.output.metalTexture.float32Array()
print
(
outputArray
)
// print(outputArray.strideArray())
let
device
=
para
.
input
.
metalTexture
!.
device
let
boxes
:[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
!
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
[
2
,
0
,
1
,
3
])
let
variances
:[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
outputVariances
.
metalTexture
!
,
dim
:
para
.
outputVariances
.
tensorDim
.
dims
,
transpose
:
[
2
,
0
,
1
,
3
])
print
(
"boxes: "
)
print
(
boxes
.
strideArray
())
print
(
"variances: "
)
print
(
variances
.
strideArray
())
// output
// output
// print(" \(type) output: ")
print
(
"
\(
type
)
output: "
)
print
(
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
para
.
output
.
dim
[
0
],
para
.
output
.
dim
[
1
],
para
.
output
.
dim
[
2
],
para
.
output
.
dim
[
3
]))
.
strideArray
())
// print(para.output.realNHWC().strideArray())
// let padToFourDim = para.output.padToFourDim
// let padToFourDim = para.output.padToFourDim
// if para.output.transpose == [0, 1, 2, 3] {
// if para.output.transpose == [0, 1, 2, 3] {
// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
...
...
metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
浏览文件 @
70bc0ed0
...
@@ -47,6 +47,9 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
...
@@ -47,6 +47,9 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
func
delogOutput
()
{
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
print
(
"
\(
type
)
output: "
)
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
para
.
output
.
tensorDim
[
0
],
c
:
para
.
output
.
tensorDim
[
1
],
h
:
para
.
output
.
tensorDim
[
2
],
w
:
para
.
output
.
tensorDim
[
3
]))
.
strideArray
())
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
para
.
output
.
tensorDim
[
0
],
c
:
para
.
output
.
tensorDim
[
1
],
h
:
para
.
output
.
tensorDim
[
2
],
w
:
para
.
output
.
tensorDim
[
3
]))
.
strideArray
())
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
浏览文件 @
70bc0ed0
...
@@ -69,15 +69,9 @@ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>,
...
@@ -69,15 +69,9 @@ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>,
}
}
func
delogOutput
()
{
func
delogOutput
()
{
print
(
"reshape delog"
)
print
(
"reshape delog"
)
// let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
let
device
=
para
.
output
.
metalTexture
!.
device
//
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
// let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
let
padToFourDim
=
para
.
output
.
padToFourDim
let
outputArray
:
[
Float32
]
=
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
n
:
padToFourDim
[
0
],
h
:
padToFourDim
[
1
],
w
:
padToFourDim
[
2
],
c
:
padToFourDim
[
3
]))
// print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
print
(
outputArray
.
strideArray
())
print
(
outputArray
.
strideArray
())
// print(outputArray)
}
}
}
}
metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
浏览文件 @
70bc0ed0
...
@@ -64,6 +64,11 @@ class SplitOp<P: PrecisionType>: Operator<SplitKernel<P>, SplitParam<P>>, Runabl
...
@@ -64,6 +64,11 @@ class SplitOp<P: PrecisionType>: Operator<SplitKernel<P>, SplitParam<P>>, Runabl
func
delogOutput
()
{
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
print
(
"
\(
type
)
output: "
)
let
device
=
para
.
input
.
metalTexture
!.
device
for
out
in
para
.
outputList
{
let
arr
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
out
.
metalTexture
,
dim
:
out
.
tensorDim
.
dims
,
transpose
:
out
.
transpose
)
print
(
arr
.
strideArray
())
}
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
浏览文件 @
70bc0ed0
...
@@ -57,6 +57,9 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
...
@@ -57,6 +57,9 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
}
else
{
}
else
{
print
(
" not implement"
)
print
(
" not implement"
)
}
}
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
浏览文件 @
70bc0ed0
...
@@ -16,7 +16,7 @@ import Foundation
...
@@ -16,7 +16,7 @@ import Foundation
class
ScaleKernel
:
CusomKernel
{
class
ScaleKernel
:
CusomKernel
{
init
(
device
:
MTLDevice
,
shape
:
Shape
)
{
init
(
device
:
MTLDevice
,
shape
:
Shape
)
{
super
.
init
(
device
:
device
,
inFunctionName
:
"scale
_half
"
,
outputDim
:
shape
,
usePaddleMobileLib
:
false
)
super
.
init
(
device
:
device
,
inFunctionName
:
"scale"
,
outputDim
:
shape
,
usePaddleMobileLib
:
false
)
}
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/framework/Executor.swift
浏览文件 @
70bc0ed0
...
@@ -14,10 +14,10 @@
...
@@ -14,10 +14,10 @@
import
Foundation
import
Foundation
let
testTo
=
2
let
testTo
=
113
var
isTest
=
false
var
isTest
=
false
let
computePrecision
:
ComputePrecision
=
.
Float
16
let
computePrecision
:
ComputePrecision
=
.
Float
32
public
class
ResultHolder
{
public
class
ResultHolder
{
public
let
dim
:
[
Int
]
public
let
dim
:
[
Int
]
...
@@ -120,10 +120,10 @@ public class Executor<P: PrecisionType> {
...
@@ -120,10 +120,10 @@ public class Executor<P: PrecisionType> {
let
inputArr
=
resInput
.
toTensor
(
dim
:
(
n
:
dim
[
0
],
c
:
dim
[
3
],
h
:
dim
[
1
],
w
:
dim
[
2
]))
let
inputArr
=
resInput
.
toTensor
(
dim
:
(
n
:
dim
[
0
],
c
:
dim
[
3
],
h
:
dim
[
1
],
w
:
dim
[
2
]))
print
(
inputArr
.
strideArray
())
print
(
inputArr
.
strideArray
())
//
print(dim)
print
(
dim
)
//
writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
writeToLibrary
(
fileName
:
"test_image_ssd_ar"
,
array
:
inputArr
)
//
//
print("write to library done")
print
(
"write to library done"
)
// return
// return
// print(inputArr)
// print(inputArr)
//
//
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录