Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
76a6a4c5
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
76a6a4c5
编写于
9月 15, 2018
作者:
D
dolphin8
提交者:
GitHub
9月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #978 from dolphin8/metal
xx
上级
362534f9
b1035346
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
215 addition
and
92 deletion
+215
-92
metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
...l/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+134
-24
metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
+1
-1
metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
...l/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+7
-5
metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
...dle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
+4
-9
metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
+4
-9
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
...ile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+2
-1
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
...addle-mobile/Operators/Kernels/BilinearInterpKernel.swift
+9
-3
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
...ddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
+3
-3
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
...e-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
+4
-3
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
...addle-mobile/Operators/Kernels/metal/BilinearInterp.metal
+0
-2
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
...mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
+6
-6
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
...dle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
+6
-6
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
...le-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
+0
-1
metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
+14
-3
metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+3
-0
metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+3
-9
metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
+5
-0
metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
...l/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
+3
-0
metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
+1
-1
metal/paddle-mobile/paddle-mobile/framework/Executor.swift
metal/paddle-mobile/paddle-mobile/framework/Executor.swift
+6
-6
未找到文件。
metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
浏览文件 @
76a6a4c5
...
...
@@ -71,7 +71,128 @@ extension MTLDevice {
return
buffer
!
}
func
texture2tensor_loop
<
P
>
(
texture
:
MTLTexture
,
cb
:
([
Int
],
P
)
->
Void
)
->
Void
{
let
bpR
=
texture
.
width
*
4
*
MemoryLayout
<
P
>.
size
let
bpI
=
texture
.
height
*
bpR
let
region
=
MTLRegion
.
init
(
origin
:
MTLOrigin
.
init
(
x
:
0
,
y
:
0
,
z
:
0
),
size
:
MTLSize
.
init
(
width
:
texture
.
width
,
height
:
texture
.
height
,
depth
:
1
))
for
i
in
0
..<
texture
.
arrayLength
{
let
pointer
:
UnsafeMutablePointer
<
P
>
=
UnsafeMutablePointer
<
P
>.
allocate
(
capacity
:
bpI
)
texture
.
getBytes
(
pointer
,
bytesPerRow
:
bpR
,
bytesPerImage
:
bpI
,
from
:
region
,
mipmapLevel
:
0
,
slice
:
i
)
for
tx
in
0
..<
texture
.
width
*
texture
.
height
*
4
{
var
k
=
tx
var
xyzn
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
xyzn
[
1
]
=
k
/
(
texture
.
width
*
4
)
k
%=
(
texture
.
width
*
4
)
xyzn
[
3
]
=
k
%
4
xyzn
[
0
]
=
k
/
4
xyzn
[
2
]
=
i
cb
(
xyzn
,
pointer
[
tx
])
}
}
}
func
texture2tensor_3
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
}
let
count
=
dim
.
reduce
(
1
)
{
$0
*
$1
}
var
tensor
:
[
P
]
=
.
init
(
repeating
:
Float32
(
0.0
)
as!
P
,
count
:
count
)
let
ndim
:
[
Int
]
=
transpose
.
map
{
tdim
[
$0
]
}
assert
(
dim
.
count
==
3
)
assert
(
texture
.
width
==
ndim
[
3
])
assert
(
texture
.
height
==
ndim
[
2
])
assert
(
ndim
[
0
]
==
1
)
assert
(
texture
.
arrayLength
==
(
ndim
[
1
]
+
3
)
/
4
)
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
1
]
=
xyzn
[
2
]
*
4
+
xyzn
[
3
]
tg
[
2
]
=
xyzn
[
1
]
tg
[
3
]
=
xyzn
[
0
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
}
func
texture2tensor_2
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
}
let
count
=
dim
.
reduce
(
1
)
{
$0
*
$1
}
var
tensor
:
[
P
]
=
.
init
(
repeating
:
Float32
(
0.0
)
as!
P
,
count
:
count
)
let
ndim
:
[
Int
]
=
transpose
.
map
{
tdim
[
$0
]
}
assert
(
dim
.
count
==
2
)
let
w
=
(
ndim
[
3
]
+
3
)
/
4
assert
(
texture
.
width
==
w
)
assert
(
texture
.
height
==
ndim
[
2
])
assert
(
ndim
[
0
]
==
1
)
assert
(
ndim
[
1
]
==
1
)
assert
(
texture
.
arrayLength
==
1
)
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
2
]
=
xyzn
[
1
]
tg
[
3
]
=
xyzn
[
0
]
*
4
+
xyzn
[
3
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
}
func
texture2tensor_1
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
}
let
count
=
dim
.
reduce
(
1
)
{
$0
*
$1
}
var
tensor
:
[
P
]
=
.
init
(
repeating
:
Float32
(
0.0
)
as!
P
,
count
:
count
)
let
ndim
:
[
Int
]
=
transpose
.
map
{
tdim
[
$0
]
}
assert
(
dim
.
count
==
1
)
let
w
=
(
ndim
[
3
]
+
3
)
/
4
assert
(
texture
.
width
==
w
)
assert
(
texture
.
height
==
1
)
assert
(
ndim
[
0
]
==
1
)
assert
(
ndim
[
1
]
==
1
)
assert
(
ndim
[
2
]
==
1
)
assert
(
texture
.
arrayLength
==
1
)
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
3
]
=
xyzn
[
0
]
*
4
+
xyzn
[
3
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
}
func
texture2tensor
<
P
>
(
texture
:
MTLTexture
,
dim
:
[
Int
],
transpose
:
[
Int
]
=
[
0
,
1
,
2
,
3
])
->
[
P
]
{
if
dim
.
count
==
3
{
return
texture2tensor_3
(
texture
:
texture
,
dim
:
dim
,
transpose
:
transpose
)
}
else
if
dim
.
count
==
2
{
return
texture2tensor_2
(
texture
:
texture
,
dim
:
dim
,
transpose
:
transpose
)
}
else
if
dim
.
count
==
1
{
return
texture2tensor_1
(
texture
:
texture
,
dim
:
dim
,
transpose
:
transpose
)
}
var
tdim
:
[
Int
]
=
[
1
,
1
,
1
,
1
]
for
i
in
0
..<
dim
.
count
{
tdim
[
4
-
dim
.
count
+
i
]
=
dim
[
i
]
...
...
@@ -84,30 +205,19 @@ extension MTLDevice {
assert
(
texture
.
height
==
ndim
[
1
])
assert
(
texture
.
arrayLength
==
(
ndim
[
0
]
*
ndim
[
3
]
+
3
)
/
4
)
let
bpR
=
ndim
[
2
]
*
4
*
MemoryLayout
<
P
>.
size
let
bpI
=
ndim
[
1
]
*
bpR
let
region
=
MTLRegion
.
init
(
origin
:
MTLOrigin
.
init
(
x
:
0
,
y
:
0
,
z
:
0
),
size
:
MTLSize
.
init
(
width
:
ndim
[
2
],
height
:
ndim
[
1
],
depth
:
1
))
for
i
in
0
..<
texture
.
arrayLength
{
let
pointer
:
UnsafeMutablePointer
<
P
>
=
UnsafeMutablePointer
<
P
>.
allocate
(
capacity
:
ndim
[
1
]
*
ndim
[
2
]
*
4
*
MemoryLayout
<
P
>.
size
)
texture
.
getBytes
(
pointer
,
bytesPerRow
:
bpR
,
bytesPerImage
:
bpI
,
from
:
region
,
mipmapLevel
:
0
,
slice
:
i
)
for
h
in
0
..<
ndim
[
1
]
{
for
w
in
0
..<
ndim
[
2
]
{
for
k
in
0
..<
4
{
let
tx
=
(
h
*
ndim
[
2
]
+
w
)
*
4
+
k
let
n
=
(
i
*
4
+
k
)
/
ndim
[
3
]
let
c
=
(
i
*
4
+
k
)
%
ndim
[
3
]
let
jg
=
[
n
,
h
,
w
,
c
]
var
ig
=
[
0
,
0
,
0
,
0
]
for
d
in
0
..<
4
{
ig
[
transpose
[
d
]]
=
jg
[
d
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
pointer
[
tx
]
}
}
}
texture2tensor_loop
(
texture
:
texture
)
{
(
xyzn
:
[
Int
],
v
:
P
)
in
var
tg
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
tg
[
1
]
=
xyzn
[
1
]
tg
[
2
]
=
xyzn
[
0
]
tg
[
0
]
=
(
xyzn
[
2
]
*
4
+
xyzn
[
3
])
/
ndim
[
3
]
tg
[
3
]
=
(
xyzn
[
2
]
*
4
+
xyzn
[
3
])
%
ndim
[
3
]
var
ig
:
[
Int
]
=
[
0
,
0
,
0
,
0
]
for
k
in
0
..<
4
{
ig
[
transpose
[
k
]]
=
tg
[
k
]
}
let
ix
=
ig
[
0
]
*
tdim
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
1
]
*
tdim
[
2
]
*
tdim
[
3
]
+
ig
[
2
]
*
tdim
[
3
]
+
ig
[
3
]
if
ix
<
count
{
tensor
[
ix
]
=
v
}
}
return
tensor
...
...
metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
浏览文件 @
76a6a4c5
...
...
@@ -30,7 +30,7 @@ public class MobileNet_ssd_AR: Net{
class
MobilenetssdPreProccess
:
CusomKernel
{
init
(
device
:
MTLDevice
)
{
let
s
=
CusomKernel
.
Shape
.
init
(
inWidth
:
160
,
inHeight
:
160
,
inChannel
:
3
)
super
.
init
(
device
:
device
,
inFunctionName
:
"mobilent_ar_preprocess
_half
"
,
outputDim
:
s
,
usePaddleMobileLib
:
false
)
super
.
init
(
device
:
device
,
inFunctionName
:
"mobilent_ar_preprocess"
,
outputDim
:
s
,
usePaddleMobileLib
:
false
)
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -56,9 +56,11 @@ class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam
throw
error
}
}
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -53,15 +53,10 @@ class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, Bili
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
let
padToFourDim
=
para
.
output
.
padToFourDim
if
para
.
output
.
transpose
==
[
0
,
1
,
2
,
3
]
{
let
outputArray
:
[
Float32
]
=
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
n
:
padToFourDim
[
0
],
h
:
padToFourDim
[
1
],
w
:
padToFourDim
[
2
],
c
:
padToFourDim
[
3
]))
print
(
outputArray
.
strideArray
())
}
else
if
para
.
output
.
transpose
==
[
0
,
2
,
3
,
1
]
{
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
padToFourDim
[
0
],
c
:
padToFourDim
[
1
],
h
:
padToFourDim
[
2
],
w
:
padToFourDim
[
3
]))
.
strideArray
())
}
else
{
fatalError
(
" not implemet"
)
}
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
// print(outputArray)
print
(
outputArray
.
strideArray
())
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -65,15 +65,10 @@ class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Run
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
let
padToFourDim
=
para
.
output
.
padToFourDim
if
para
.
output
.
transpose
==
[
0
,
1
,
2
,
3
]
{
let
outputArray
:
[
Float32
]
=
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
n
:
padToFourDim
[
0
],
h
:
padToFourDim
[
1
],
w
:
padToFourDim
[
2
],
c
:
padToFourDim
[
3
]))
print
(
outputArray
.
strideArray
())
}
else
if
para
.
output
.
transpose
==
[
0
,
2
,
3
,
1
]
{
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
padToFourDim
[
0
],
c
:
padToFourDim
[
1
],
h
:
padToFourDim
[
2
],
w
:
padToFourDim
[
3
]))
.
strideArray
())
}
else
{
fatalError
(
" not implemet"
)
}
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
浏览文件 @
76a6a4c5
...
...
@@ -20,12 +20,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
let
varianceP
=
param
.
variance
.
data
.
pointer
let
meanP
=
param
.
mean
.
data
.
pointer
let
scaleP
=
param
.
scale
.
data
.
pointer
let
biasP
=
param
.
scale
.
data
.
pointer
let
biasP
=
param
.
bias
.
data
.
pointer
for
i
in
0
..<
count
{
let
invStd
=
P
(
1
/
(
Float32
(
varianceP
[
i
])
+
param
.
epsilon
)
.
squareRoot
())
biasP
[
i
]
=
biasP
[
i
]
-
meanP
[
i
]
*
invStd
*
scaleP
[
i
]
scaleP
[
i
]
=
invStd
*
scaleP
[
i
]
}
param
.
bias
.
initBuffer
(
device
:
device
,
precision
:
computePrecision
)
param
.
scale
.
initBuffer
(
device
:
device
,
precision
:
computePrecision
)
param
.
output
.
initTexture
(
device
:
device
,
inTranspose
:
param
.
input
.
transpose
,
computePrecision
:
computePrecision
)
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
浏览文件 @
76a6a4c5
...
...
@@ -27,10 +27,16 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
encoder
.
setTexture
(
param
.
input
.
metalTexture
,
index
:
0
)
encoder
.
setTexture
(
param
.
output
.
metalTexture
,
index
:
1
)
let
ratio_h
:
Float32
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
2
])
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
2
])
let
ratio_w
:
Float32
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
3
])
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
3
])
var
ratio_h
:
Float32
=
0
var
ratio_w
:
Float32
=
0
if
param
.
output
.
tensorDim
.
dims
[
2
]
>
1
{
ratio_h
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
2
]
-
1
)
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
2
]
-
1
)
}
if
param
.
output
.
tensorDim
.
dims
[
3
]
>
1
{
ratio_w
=
Float32
(
param
.
input
.
tensorDim
.
dims
[
3
]
-
1
)
/
Float32
(
param
.
output
.
tensorDim
.
dims
[
3
]
-
1
)
}
var
p
=
BilinearInterpMetalParam
.
init
(
ratio_h
:
ratio_h
,
ratio_w
:
ratio_w
)
encoder
.
setBytes
(
&
p
,
length
:
MemoryLayout
<
Concat
MetalParam
>.
size
,
index
:
0
)
encoder
.
setBytes
(
&
p
,
length
:
MemoryLayout
<
BilinearInterp
MetalParam
>.
size
,
index
:
0
)
encoder
.
dispatch
(
computePipline
:
pipline
,
outTexture
:
param
.
output
.
metalTexture
)
encoder
.
endEncoding
()
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
浏览文件 @
76a6a4c5
...
...
@@ -17,14 +17,14 @@ using namespace metal;
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * n
ewS
cale [[buffer(0)]],
const device float4 * n
ewB
ias [[buffer(1)]],
const device float4 * n
s
cale [[buffer(0)]],
const device float4 * n
b
ias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * n
ewScale[gid.z] + newB
ias[gid.z];
float4 output = input * n
scale[gid.z] + nb
ias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
浏览文件 @
76a6a4c5
...
...
@@ -14,8 +14,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
float
w = gid.x * pm.ratio_w;
float
h = gid.y * pm.ratio_h;
P
w = gid.x * pm.ratio_w;
P
h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
...
...
@@ -26,7 +26,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
浏览文件 @
76a6a4c5
...
...
@@ -16,8 +16,6 @@
using namespace metal;
struct bilinear_interp_param {
// int32_t out_h;
// int32_t out_w;
float ratio_h;
float ratio_w;
};
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
浏览文件 @
76a6a4c5
...
...
@@ -17,16 +17,16 @@ using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] =
1
;
abcd[0] = abcd[1] = abcd[2] =
0
;
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] =
1
;
abcd[0] = abcd[1] =
0
;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] =
1
;
abcd[0] =
0
;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
...
...
@@ -40,15 +40,15 @@ inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] =
1
;
xyzn[1] = xyzn[2] =
0
;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] =
1
;
xyzn[2] =
0
;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[
1
] = abcd[3] % 4;
xyzn[
3
] = abcd[3] % 4;
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
浏览文件 @
76a6a4c5
...
...
@@ -122,20 +122,20 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
VECTOR(P, 4)
r = in0.read(gid.xy, gid.z);
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
VECTOR(P, 4)
r = in1.read(uint2(x, gid.y), gid.z);
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
VECTOR(P, 4)
r = in2.read(uint2(x, gid.y), gid.z);
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
...
...
@@ -143,7 +143,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
#if N >= 4
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
VECTOR(P, 4)
r = in3.read(uint2(x, gid.y), gid.z);
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
...
...
@@ -151,7 +151,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
#if N >= 5
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
VECTOR(P, 4)
r = in4.read(uint2(x, gid.y), gid.z);
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
...
...
@@ -159,7 +159,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
#if N >= 6
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
VECTOR(P, 4)
r = in5.read(uint2(x, gid.y), gid.z);
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
浏览文件 @
76a6a4c5
...
...
@@ -36,7 +36,6 @@ kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTextu
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
abcd2xyzn(iC, iabcd, ixyzn);
#if RIN == 4
abcd2xyzn_4(iC, iabcd, ixyzn);
#else
...
...
metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -72,10 +72,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
print
(
"
\(
type
)
output: "
)
// output
let
outputArray
=
para
.
output
.
metalTexture
.
float32Array
()
print
(
outputArray
)
// let outputArray = para.output.metalTexture.float32Array()
// print(outputArray.strideArray())
let
device
=
para
.
input
.
metalTexture
!.
device
let
boxes
:[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
!
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
[
2
,
0
,
1
,
3
])
let
variances
:[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
outputVariances
.
metalTexture
!
,
dim
:
para
.
outputVariances
.
tensorDim
.
dims
,
transpose
:
[
2
,
0
,
1
,
3
])
print
(
"boxes: "
)
print
(
boxes
.
strideArray
())
print
(
"variances: "
)
print
(
variances
.
strideArray
())
// output
// print(" \(type) output: ")
print
(
"
\(
type
)
output: "
)
print
(
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
para
.
output
.
dim
[
0
],
para
.
output
.
dim
[
1
],
para
.
output
.
dim
[
2
],
para
.
output
.
dim
[
3
]))
.
strideArray
())
// print(para.output.realNHWC().strideArray())
// let padToFourDim = para.output.padToFourDim
// if para.output.transpose == [0, 1, 2, 3] {
// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
...
...
metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -47,6 +47,9 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
print
(
para
.
output
.
metalTexture
.
toTensor
(
dim
:
(
n
:
para
.
output
.
tensorDim
[
0
],
c
:
para
.
output
.
tensorDim
[
1
],
h
:
para
.
output
.
tensorDim
[
2
],
w
:
para
.
output
.
tensorDim
[
3
]))
.
strideArray
())
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -69,15 +69,9 @@ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>,
}
func
delogOutput
()
{
print
(
"reshape delog"
)
// let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
//
// let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
let
padToFourDim
=
para
.
output
.
padToFourDim
let
outputArray
:
[
Float32
]
=
para
.
output
.
metalTexture
.
realNHWC
(
dim
:
(
n
:
padToFourDim
[
0
],
h
:
padToFourDim
[
1
],
w
:
padToFourDim
[
2
],
c
:
padToFourDim
[
3
]))
// print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
// print(outputArray)
}
}
metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -64,6 +64,11 @@ class SplitOp<P: PrecisionType>: Operator<SplitKernel<P>, SplitParam<P>>, Runabl
func
delogOutput
()
{
print
(
"
\(
type
)
output: "
)
let
device
=
para
.
input
.
metalTexture
!.
device
for
out
in
para
.
outputList
{
let
arr
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
out
.
metalTexture
,
dim
:
out
.
tensorDim
.
dims
,
transpose
:
out
.
transpose
)
print
(
arr
.
strideArray
())
}
}
}
...
...
metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
浏览文件 @
76a6a4c5
...
...
@@ -57,6 +57,9 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
}
else
{
print
(
" not implement"
)
}
let
device
=
para
.
output
.
metalTexture
!.
device
let
outputArray
:
[
Float32
]
=
device
.
texture2tensor
(
texture
:
para
.
output
.
metalTexture
,
dim
:
para
.
output
.
tensorDim
.
dims
,
transpose
:
para
.
output
.
transpose
)
print
(
outputArray
.
strideArray
())
}
}
...
...
metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
浏览文件 @
76a6a4c5
...
...
@@ -16,7 +16,7 @@ import Foundation
class
ScaleKernel
:
CusomKernel
{
init
(
device
:
MTLDevice
,
shape
:
Shape
)
{
super
.
init
(
device
:
device
,
inFunctionName
:
"scale
_half
"
,
outputDim
:
shape
,
usePaddleMobileLib
:
false
)
super
.
init
(
device
:
device
,
inFunctionName
:
"scale"
,
outputDim
:
shape
,
usePaddleMobileLib
:
false
)
}
}
...
...
metal/paddle-mobile/paddle-mobile/framework/Executor.swift
浏览文件 @
76a6a4c5
...
...
@@ -14,10 +14,10 @@
import
Foundation
let
testTo
=
2
let
testTo
=
113
var
isTest
=
false
let
computePrecision
:
ComputePrecision
=
.
Float
16
let
computePrecision
:
ComputePrecision
=
.
Float
32
public
class
ResultHolder
{
public
let
dim
:
[
Int
]
...
...
@@ -120,10 +120,10 @@ public class Executor<P: PrecisionType> {
let
inputArr
=
resInput
.
toTensor
(
dim
:
(
n
:
dim
[
0
],
c
:
dim
[
3
],
h
:
dim
[
1
],
w
:
dim
[
2
]))
print
(
inputArr
.
strideArray
())
//
print(dim)
//
writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
//
//
print("write to library done")
print
(
dim
)
writeToLibrary
(
fileName
:
"test_image_ssd_ar"
,
array
:
inputArr
)
print
(
"write to library done"
)
// return
// print(inputArr)
//
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录