Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
cead34f5
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cead34f5
编写于
7月 19, 2018
作者:
R
Ruilong Liu
提交者:
GitHub
7月 19, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #609 from codeWorm2015/metal
add conv add imp
上级
4e101470
8737957f
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
43 addition
and
29 deletion
+43
-29
metal/paddle-mobile/paddle-mobile/Executor.swift
metal/paddle-mobile/paddle-mobile/Executor.swift
+3
-3
metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
...bile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
+4
-5
metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
...mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
+2
-2
metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
...obile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
+20
-0
metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
...e-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
+8
-16
metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
+6
-3
未找到文件。
metal/paddle-mobile/paddle-mobile/Executor.swift
浏览文件 @
cead34f5
...
...
@@ -55,6 +55,7 @@ public class Executor<P: PrecisionType> {
device
=
inDevice
queue
=
inQueue
for
block
in
inProgram
.
programDesc
.
blocks
{
//block.ops.count
for
i
in
0
..<
block
.
ops
.
count
{
let
op
=
block
.
ops
[
i
]
do
{
...
...
@@ -65,6 +66,7 @@ public class Executor<P: PrecisionType> {
throw
error
}
}
// for op in block.ops {
// do {
// let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
...
...
@@ -94,16 +96,14 @@ public class Executor<P: PrecisionType> {
}
buffer
.
addCompletedHandler
{
(
commandbuffer
)
in
for
op
in
self
.
ops
{
op
.
delogOutput
()
}
let
afterDate
=
Date
.
init
()
print
(
" encoder end ! time:
\(
afterDate
.
timeIntervalSince
(
beforeDate
)
)
"
)
}
buffer
.
commit
()
guard
let
outputVar
=
program
.
scope
.
output
()
else
{
...
...
metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
浏览文件 @
cead34f5
...
...
@@ -107,17 +107,16 @@ class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKer
}
func
delogOutput
()
{
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable:
fals
e)
// let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable:
tru
e)
// para.filter.logDataPointer(header: "filter data pointer: ")
//
// print("filter: \(para.filter)")
// print("biase: \(para.bias)")
// print("padding: \(para.paddings)")
// print("stride: \(para.stride)")
//
// let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
// let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable:
tru
e)
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable:
fals
e)
}
}
metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
浏览文件 @
cead34f5
...
...
@@ -22,7 +22,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
if
param
.
filter
.
width
==
1
&&
param
.
filter
.
height
==
1
{
super
.
init
(
device
:
device
,
inFunctionName
:
"conv_add_batch_norm_relu_1x1"
)
}
else
if
param
.
filter
.
channel
==
1
{
super
.
init
(
device
:
device
,
inFunctionName
:
"depthwise_conv_add_batch_norm_relu_
1x1
"
)
super
.
init
(
device
:
device
,
inFunctionName
:
"depthwise_conv_add_batch_norm_relu_
3x3
"
)
}
else
{
super
.
init
(
device
:
device
,
inFunctionName
:
"conv_add_batch_norm_relu_3x3"
)
}
...
...
@@ -75,7 +75,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
encoder
.
setTexture
(
param
.
output
.
metalTexture
,
index
:
1
)
encoder
.
setBytes
(
&
metalParam
,
length
:
MemoryLayout
<
MetalConvParam
>.
size
,
index
:
0
)
encoder
.
setBuffer
(
param
.
filter
.
buffer
,
offset
:
0
,
index
:
1
)
encoder
.
setBuffer
(
param
.
bias
.
buffer
,
offset
:
0
,
index
:
2
)
encoder
.
setBuffer
(
param
.
y
.
buffer
,
offset
:
0
,
index
:
2
)
encoder
.
setBuffer
(
param
.
newScale
!
,
offset
:
0
,
index
:
3
)
encoder
.
setBuffer
(
param
.
newBiase
!
,
offset
:
0
,
index
:
4
)
encoder
.
dispatch
(
computePipline
:
pipline
,
outTexture
:
param
.
output
.
metalTexture
)
...
...
metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
浏览文件 @
cead34f5
...
...
@@ -15,11 +15,31 @@
import
Foundation
class
ConvAddKernel
<
P
:
PrecisionType
>
:
Kernel
,
Computable
{
var
metalParam
:
MetalConvParam
!
required
init
(
device
:
MTLDevice
,
param
:
ConvAddParam
<
P
>
)
{
super
.
init
(
device
:
device
,
inFunctionName
:
"conv_add_1x1"
)
let
offsetX
=
param
.
filter
.
width
/
2
-
Int
(
param
.
paddings
[
0
])
let
offsetY
=
param
.
filter
.
height
/
2
-
Int
(
param
.
paddings
[
1
])
print
(
"offset x:
\(
offsetX
)
"
)
print
(
"offset y:
\(
offsetY
)
"
)
let
offsetZ
=
0.0
metalParam
=
MetalConvParam
.
init
(
offsetX
:
Int16
(
offsetX
),
offsetY
:
Int16
(
offsetY
),
offsetZ
:
Int16
(
offsetZ
),
strideX
:
UInt16
(
param
.
stride
[
0
]),
strideY
:
UInt16
(
param
.
stride
[
1
]),
paddedZ
:
UInt16
(
param
.
input
.
metalTexture
.
arrayLength
*
4
-
param
.
input
.
dim
[
3
]))
}
func
compute
(
commandBuffer
:
MTLCommandBuffer
,
param
:
ConvAddParam
<
P
>
)
throws
{
guard
let
encoder
=
commandBuffer
.
makeComputeCommandEncoder
()
else
{
throw
PaddleMobileError
.
predictError
(
message
:
" encode is nil"
)
}
print
(
"Conv Add compute"
)
encoder
.
setTexture
(
param
.
input
.
metalTexture
,
index
:
0
)
encoder
.
setTexture
(
param
.
output
.
metalTexture
,
index
:
1
)
encoder
.
setBytes
(
&
metalParam
,
length
:
MemoryLayout
<
MetalConvParam
>.
size
,
index
:
0
)
encoder
.
setBuffer
(
param
.
filter
.
buffer
,
offset
:
0
,
index
:
1
)
encoder
.
setBuffer
(
param
.
y
.
buffer
,
offset
:
0
,
index
:
2
)
encoder
.
dispatch
(
computePipline
:
pipline
,
outTexture
:
param
.
output
.
metalTexture
)
encoder
.
endEncoding
()
}
}
metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
浏览文件 @
cead34f5
...
...
@@ -66,7 +66,6 @@ struct MetalConvParam {
//
//}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam ¶m [[buffer(0)]],
...
...
@@ -120,8 +119,6 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample>
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam ¶m [[buffer(0)]],
...
...
@@ -165,14 +162,11 @@ kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample>
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam ¶m [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
...
...
@@ -210,10 +204,10 @@ kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[text
}
kernel void depthwise_conv_add_batch_norm_relu_
1x1
(texture2d_array<float, access::sample> inTexture [[texture(0)]],
kernel void depthwise_conv_add_batch_norm_relu_
3x3
(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam ¶m [[buffer(0)]],
const device float
4
*weights [[buffer(1)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
...
...
@@ -225,11 +219,10 @@ kernel void depthwise_conv_add_batch_norm_relu_1x1(texture2d_array<float, access
return;
}
uint output_slice = gid.z;
short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW;
uint weithTo = gid.z * kernelHXW
* 4
;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
...
...
@@ -243,13 +236,12 @@ kernel void depthwise_conv_add_batch_norm_relu_1x1(texture2d_array<float, access
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
float4 weight = weights[weithTo + j];
output.x += input.x * weight.x;
output.y += input.y * weight.y;
output.z += input.z * weight.z;
output.w += input.w * weight.w;
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output =
fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0)
;
output =
(output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z]
;
outTexture.write(output, gid.xy, gid.z);
}
metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
浏览文件 @
cead34f5
...
...
@@ -95,12 +95,14 @@ class Tensor<P: PrecisionType>: Tensorial {
let
cSlices
=
(
C
+
3
)
/
4
let
paddedC
=
cSlices
*
4
let
count
=
paddedC
*
dim
[
0
]
*
dim
[
1
]
*
dim
[
2
]
buffer
=
device
.
makeBuffer
(
length
:
count
*
MemoryLayout
<
P
>.
stride
)
if
C
==
paddedC
{
buffer
=
device
.
makeBuffer
(
length
:
count
*
MemoryLayout
<
P
>.
stride
)
buffer
?
.
contents
()
.
copyMemory
(
from
:
data
.
pointer
,
byteCount
:
count
*
MemoryLayout
<
P
>.
stride
)
}
else
if
C
==
1
{
buffer
?
.
contents
()
.
copyMemory
(
from
:
data
.
pointer
,
byteCount
:
count
*
MemoryLayout
<
P
>.
stride
)
buffer
=
device
.
makeBuffer
(
length
:
numel
()
*
MemoryLayout
<
P
>.
stride
)
buffer
?
.
contents
()
.
copyMemory
(
from
:
data
.
pointer
,
byteCount
:
numel
()
*
MemoryLayout
<
P
>.
stride
)
}
else
{
buffer
=
device
.
makeBuffer
(
length
:
count
*
MemoryLayout
<
P
>.
stride
)
var
tmpPointer
=
data
.
pointer
var
dstPtr
=
buffer
?
.
contents
()
.
bindMemory
(
to
:
P
.
self
,
capacity
:
count
)
for
_
in
0
..<
dim
[
0
]
*
dim
[
1
]
*
dim
[
2
]
{
...
...
@@ -120,7 +122,8 @@ class Tensor<P: PrecisionType>: Tensorial {
}
else
{
fatalError
(
" not support !"
)
}
data
.
release
()
//TODO: release
// data.release()
}
var
width
:
Int
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录