提交 49884b2a 编写于 作者: D dolphin8

compute -> init

上级 0d0c9582
...@@ -25,8 +25,31 @@ struct ElementwiseAddMetalParam { ...@@ -25,8 +25,31 @@ struct ElementwiseAddMetalParam {
} }
class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable { class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddParam<P>) { required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
metalParam = ElementwiseAddMetalParam.init()
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
metalParam.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 { if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "elementwise_add") super.init(device: device, inFunctionName: "elementwise_add")
} else if computePrecision == .Float16 { } else if computePrecision == .Float16 {
...@@ -40,32 +63,10 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -40,32 +63,10 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
var emp = ElementwiseAddMetalParam.init()
encoder.setTexture(param.inputX.metalTexture, index: 0) encoder.setTexture(param.inputX.metalTexture, index: 0)
encoder.setTexture(param.inputY.metalTexture, index: 1) encoder.setTexture(param.inputY.metalTexture, index: 1)
encoder.setTexture(param.output.metalTexture, index: 2) encoder.setTexture(param.output.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
emp.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
emp.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
emp.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
emp.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
emp.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
emp.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
emp.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
emp.fast = 1
}
encoder.setBytes(&emp, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
......
...@@ -25,24 +25,10 @@ struct PoolMetalParam { ...@@ -25,24 +25,10 @@ struct PoolMetalParam {
} }
class PoolKernel<P: PrecisionType>: Kernel, Computable{ class PoolKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PoolMetalParam
required init(device: MTLDevice, param: PoolParam<P>) { required init(device: MTLDevice, param: PoolParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
var poolType: Int32 var poolType: Int32
switch param.poolType { switch param.poolType {
case "max": case "max":
...@@ -50,9 +36,9 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -50,9 +36,9 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
case "avg": case "avg":
poolType = 1 poolType = 1
default: default:
throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType) fatalError()
} }
var pmp = PoolMetalParam.init( metalParam = PoolMetalParam.init(
ksizeX: param.ksize[0], ksizeX: param.ksize[0],
ksizeY: param.ksize[1], ksizeY: param.ksize[1],
strideX: param.stride[0], strideX: param.stride[0],
...@@ -61,7 +47,24 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -61,7 +47,24 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
paddingY: param.padding[1], paddingY: param.padding[1],
poolType: poolType poolType: poolType
) )
encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<PoolMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
......
...@@ -28,8 +28,27 @@ struct ReshapeTestParam: TestParam { ...@@ -28,8 +28,27 @@ struct ReshapeTestParam: TestParam {
} }
class ReshapeKernel<P: PrecisionType>: Kernel, Computable{ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: ReshapeMetalParam
required init(device: MTLDevice, param: ReshapeParam<P>) { required init(device: MTLDevice, param: ReshapeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: computePrecision)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
}
let it: [Int32] = param.input.transpose.map { Int32($0) }
var od: [Int32] = [1, 1, 1, 1]
for i in 0..<param.output.tensorDim.cout() {
od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
}
let ot: [Int32] = param.output.transpose.map { Int32($0) }
metalParam = ReshapeMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
if computePrecision == .Float32 { if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape") super.init(device: device, inFunctionName: "reshape")
} else if computePrecision == .Float16 { } else if computePrecision == .Float16 {
...@@ -40,6 +59,12 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -40,6 +59,12 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
} }
required init(device: MTLDevice, testParam: ReshapeTestParam) { required init(device: MTLDevice, testParam: ReshapeTestParam) {
metalParam = ReshapeMetalParam.init(
idim: (0, 0, 0, 0),
itrans: (0, 0, 0, 0),
odim: (0, 0, 0, 0),
otrans: (0, 0, 0, 0)
)
super.init(device: device, inFunctionName: "reshape") super.init(device: device, inFunctionName: "reshape")
} }
...@@ -50,23 +75,8 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -50,23 +75,8 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() { encoder.setBytes(&metalParam, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
}
let it: [Int32] = param.input.transpose.map { Int32($0) }
var od: [Int32] = [1, 1, 1, 1]
for i in 0..<param.output.tensorDim.cout() {
od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
}
let ot: [Int32] = param.output.transpose.map { Int32($0) }
var rmp = ReshapeMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
encoder.setBytes(&rmp, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
......
...@@ -21,8 +21,13 @@ struct SoftmaxMetalParam { ...@@ -21,8 +21,13 @@ struct SoftmaxMetalParam {
class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: SoftmaxMetalParam
required init(device: MTLDevice, param: SoftmaxParam<P>) { required init(device: MTLDevice, param: SoftmaxParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: computePrecision)
metalParam = SoftmaxMetalParam.init(
N: Int32(param.input.tensorDim[0]),
K: Int32(param.input.tensorDim[1])
)
if computePrecision == .Float32 { if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "softmax") super.init(device: device, inFunctionName: "softmax")
} else if computePrecision == .Float16 { } else if computePrecision == .Float16 {
...@@ -39,14 +44,7 @@ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -39,14 +44,7 @@ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
var smp = SoftmaxMetalParam.init( encoder.setBytes(&metalParam, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
N: Int32(param.input.tensorDim[0]),
K: Int32(param.input.tensorDim[1])
)
print(" soft max param: ")
print(smp)
encoder.setBytes(&smp, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
......
...@@ -58,12 +58,6 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable { ...@@ -58,12 +58,6 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
} }
var axis: [Int] = [0, 1, 2, 3] var axis: [Int] = [0, 1, 2, 3]
// var doNothing = false
// if param.axis.count == param.input.transpose.count {
// doNothing = param.axis == param.input.transpose.map { Int32($0) }
// }
for i in 0..<param.axis.count { for i in 0..<param.axis.count {
axis[4-param.axis.count+i] = 4 - param.axis.count + Int(param.axis[i]) axis[4-param.axis.count+i] = 4 - param.axis.count + Int(param.axis[i])
} }
...@@ -72,9 +66,9 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable { ...@@ -72,9 +66,9 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
tmp.iC = Int32(param.input.dim[param.input.transpose[3]]) tmp.iC = Int32(param.input.dim[param.input.transpose[3]])
tmp.oC = Int32(param.output.dim[3]) tmp.oC = Int32(param.output.dim[3])
if realAxis == [0, 1, 2, 3] { if realAxis == [0, 1, 2, 3] {
print("====> transpose! FAST :)") // print("====> transpose! FAST :)")
} else { } else {
print("====> transpose! SLOW :(") // print("====> transpose! SLOW :(")
} }
metalParam = tmp metalParam = tmp
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册