提交 882f5dae 编写于 作者: L liuruilong

Merge remote-tracking branch 'upstream/metal' into metal

......@@ -25,8 +25,31 @@ struct ElementwiseAddMetalParam {
}
class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
metalParam = ElementwiseAddMetalParam.init()
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
metalParam.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
metalParam.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
metalParam.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
metalParam.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
metalParam.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
metalParam.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
metalParam.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "elementwise_add")
} else if computePrecision == .Float16 {
......@@ -40,32 +63,10 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
var emp = ElementwiseAddMetalParam.init()
encoder.setTexture(param.inputX.metalTexture, index: 0)
encoder.setTexture(param.inputY.metalTexture, index: 1)
encoder.setTexture(param.output.metalTexture, index: 2)
let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) }
let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) }
let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) }
let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) }
emp.xdim = (xdim[0], xdim[1], xdim[2], xdim[3])
emp.ydim = (ydim[0], ydim[1], ydim[2], ydim[3])
emp.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3])
emp.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3])
if param.axis == -1 {
emp.axis = 4 - Int32(param.inputY.tensorDim.cout())
} else {
emp.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis)
}
emp.ylen = Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
// print("===> elementwise_add fast!!!")
emp.fast = 1
}
encoder.setBytes(&emp, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.setBytes(&metalParam, length: MemoryLayout<ElementwiseAddMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
......
......@@ -25,24 +25,10 @@ struct PoolMetalParam {
}
class PoolKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PoolMetalParam
required init(device: MTLDevice, param: PoolParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
var poolType: Int32
switch param.poolType {
case "max":
......@@ -50,9 +36,9 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
case "avg":
poolType = 1
default:
throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType)
fatalError()
}
var pmp = PoolMetalParam.init(
metalParam = PoolMetalParam.init(
ksizeX: param.ksize[0],
ksizeY: param.ksize[1],
strideX: param.stride[0],
......@@ -61,7 +47,24 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
paddingY: param.padding[1],
poolType: poolType
)
encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half")
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<PoolMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
......
......@@ -28,8 +28,27 @@ struct ReshapeTestParam: TestParam {
}
class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: ReshapeMetalParam
required init(device: MTLDevice, param: ReshapeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
}
let it: [Int32] = param.input.transpose.map { Int32($0) }
var od: [Int32] = [1, 1, 1, 1]
for i in 0..<param.output.tensorDim.cout() {
od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
}
let ot: [Int32] = param.output.transpose.map { Int32($0) }
metalParam = ReshapeMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape")
} else if computePrecision == .Float16 {
......@@ -40,6 +59,12 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
}
required init(device: MTLDevice, testParam: ReshapeTestParam) {
metalParam = ReshapeMetalParam.init(
idim: (0, 0, 0, 0),
itrans: (0, 0, 0, 0),
odim: (0, 0, 0, 0),
otrans: (0, 0, 0, 0)
)
super.init(device: device, inFunctionName: "reshape")
}
......@@ -50,23 +75,8 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
}
let it: [Int32] = param.input.transpose.map { Int32($0) }
var od: [Int32] = [1, 1, 1, 1]
for i in 0..<param.output.tensorDim.cout() {
od[4-param.output.tensorDim.cout()+i] = Int32(param.output.tensorDim[i])
}
let ot: [Int32] = param.output.transpose.map { Int32($0) }
var rmp = ReshapeMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
encoder.setBytes(&rmp, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.setBytes(&metalParam, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
......
......@@ -21,8 +21,13 @@ struct SoftmaxMetalParam {
class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: SoftmaxMetalParam
required init(device: MTLDevice, param: SoftmaxParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
metalParam = SoftmaxMetalParam.init(
N: Int32(param.input.tensorDim[0]),
K: Int32(param.input.tensorDim[1])
)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "softmax")
} else if computePrecision == .Float16 {
......@@ -38,13 +43,7 @@ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
var smp = SoftmaxMetalParam.init(
N: Int32(param.input.tensorDim[0]),
K: Int32(param.input.tensorDim[1])
)
encoder.setBytes(&smp, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
encoder.setBytes(&metalParam, length: MemoryLayout<SoftmaxMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
......
......@@ -57,13 +57,7 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
invT[v] = i
}
var axis: [Int] = [0, 1, 2, 3]
// var doNothing = false
// if param.axis.count == param.input.transpose.count {
// doNothing = param.axis == param.input.transpose.map { Int32($0) }
// }
for i in 0..<param.axis.count {
axis[4-param.axis.count+i] = 4 - param.axis.count + Int(param.axis[i])
}
......@@ -72,9 +66,9 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
tmp.iC = Int32(param.input.dim[param.input.transpose[3]])
tmp.oC = Int32(param.output.dim[3])
if realAxis == [0, 1, 2, 3] {
print("====> transpose! FAST :)")
// print("====> transpose! FAST :)")
} else {
print("====> transpose! SLOW :(")
// print("====> transpose! SLOW :(")
}
metalParam = tmp
}
......
......@@ -84,6 +84,7 @@ kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[te
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册