提交 e4554f0a 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #805 from dolphin8/metal

transpose op
...@@ -71,6 +71,105 @@ extension MTLDevice { ...@@ -71,6 +71,105 @@ extension MTLDevice {
return buffer! return buffer!
} }
func texture2tensor<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
}
let count = dim.reduce(1) { $0 * $1 }
var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
let ndim: [Int] = transpose.map { tdim[$0] }
assert(texture.width == ndim[2])
assert(texture.height == ndim[1])
assert(texture.arrayLength == (ndim[0] * ndim[3] + 3) / 4)
let bpR = ndim[2] * 4 * MemoryLayout<P>.size
let bpI = ndim[1] * bpR
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1))
for i in 0..<texture.arrayLength {
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: ndim[1] * ndim[2] * 4 * MemoryLayout<P>.size)
texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
for h in 0..<ndim[1] {
for w in 0..<ndim[2] {
for k in 0..<4 {
let tx = (h * ndim[2] + w) * 4 + k
let n = (i * 4 + k) / ndim[3]
let c = (i * 4 + k) % ndim[3]
let jg = [n, h, w, c]
var ig = [0, 0, 0, 0]
for d in 0..<4 {
ig[transpose[d]] = jg[d]
}
let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
if ix < count {
tensor[ix] = pointer[tx]
}
}
}
}
}
return tensor
}
func tensor2texture<P>(value: [P], dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> MTLTexture {
if value.count > 0 {
assert(value.count == dim.reduce(1) { $0 * $1 })
}
var tdim: [Int] = [1, 1, 1, 1]
for i in 0..<dim.count {
tdim[4 - dim.count + i] = dim[i]
}
let ndim: [Int] = transpose.map { tdim[$0] }
let textureDesc = MTLTextureDescriptor.init()
textureDesc.width = ndim[2]
textureDesc.height = ndim[1]
textureDesc.depth = 1
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.pixelFormat = .rgba32Float
textureDesc.textureType = .type2DArray
textureDesc.storageMode = .shared
textureDesc.cpuCacheMode = .defaultCache
textureDesc.arrayLength = (ndim[0] * ndim[3] + 3) / 4
let texture = makeTexture(descriptor: textureDesc)!
if value.count > 0 {
var rcount: Int = (ndim[0] * ndim[3] + 3) / 4
rcount = rcount * 4 * ndim[1] * ndim[2]
var nvalue: [P] = .init(repeating: Float32(0.0) as! P, count: rcount)
for i0 in 0..<tdim[0] {
for i1 in 0..<tdim[1] {
for i2 in 0..<tdim[2] {
for i3 in 0..<tdim[3] {
let ig = [i0, i1, i2, i3]
let ix = (i0 * tdim[1] * tdim[2] * tdim[3]) + (i1 * tdim[2] * tdim[3]) + (i2 * tdim[3]) + i3
let jg = transpose.map { ig[$0] }
let k = jg[0] * ndim[3] + jg[3]
let jx = ((k / 4) * ndim[1] * ndim[2] * 4) + (jg[1] * ndim[2] * 4) + (jg[2] * 4) + (k % 4)
nvalue[jx] = value[ix]
}
}
}
}
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer(mutating: nvalue)
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1))
let bpR = ndim[2] * 4 * MemoryLayout<P>.size
let bpI = ndim[1] * bpR
for i in 0..<textureDesc.arrayLength {
let p = pointer + texture.width * texture.height * 4 * i
texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bpR, bytesPerImage: bpI)
}
}
return texture
}
func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{ func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
let textureDesc = MTLTextureDescriptor.init() let textureDesc = MTLTextureDescriptor.init()
...@@ -85,19 +184,25 @@ extension MTLDevice { ...@@ -85,19 +184,25 @@ extension MTLDevice {
textureDesc.arrayLength = arrayLength textureDesc.arrayLength = arrayLength
let texture = makeTexture(descriptor: textureDesc)! let texture = makeTexture(descriptor: textureDesc)!
if arrayLength == 1 && value.count >= 4{ if value.count >= 4{
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: value.count * MemoryLayout<P>.size) let counts = arrayLength * 4 * textureWidth * textureHeight
let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: counts * MemoryLayout<P>.size)
for i in 0..<value.count { for i in 0..<value.count {
pointer[i] = value[i] pointer[i] = value[i]
} }
for i in value.count..<counts {
pointer[i] = 0 as! P
}
let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
let bytesPerImage = texture.height * bytesPerRow
let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth)) let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow) for i in 0..<arrayLength {
let p = pointer + texture.width * texture.height * 4 * i
texture.replace(region: region, mipmapLevel: 0, slice: i, withBytes: p, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage)
}
} else { } else {
} }
return texture return texture
...@@ -112,16 +217,16 @@ extension MTLComputeCommandEncoder { ...@@ -112,16 +217,16 @@ extension MTLComputeCommandEncoder {
let height = computePipline.maxTotalThreadsPerThreadgroup/width let height = computePipline.maxTotalThreadsPerThreadgroup/width
let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1) let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
// print(" thread: threads per group: \(threadsPerGroup) ") print(" thread: threads per group: \(threadsPerGroup) ")
// print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)") print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
let groupWidth = (outTexture.width + width - 1)/width let groupWidth = (outTexture.width + width - 1)/width
let groupHeight = (outTexture.height + height - 1)/height let groupHeight = (outTexture.height + height - 1)/height
let groupDepth = slices let groupDepth = slices
let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth) let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
// print("groups: \(groups) ") print("groups: \(groups) ")
// print("threads per group: \(threadsPerGroup)") print("threads per group: \(threadsPerGroup)")
setComputePipelineState(computePipline) setComputePipelineState(computePipline)
...@@ -183,54 +288,54 @@ public extension MTLTexture { ...@@ -183,54 +288,54 @@ public extension MTLTexture {
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? { func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print(header) print(header)
print("texture: \(self)") print("texture: \(self)")
let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable) // let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
print(res) // print(res)
// if textureType == .type2DArray { if textureType == .type2DArray {
// for i in 0..<arrayLength{ for i in 0..<arrayLength{
// var str: String = "slice: \(i): \n" var str: String = "slice: \(i): \n"
// let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment) let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
// let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
// let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
// getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i) getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
// let p = bytes.assumingMemoryBound(to: T.self) let p = bytes.assumingMemoryBound(to: T.self)
// str += "2d array count : \(width * height * depth * 4) \n" str += "2d array count : \(width * height * depth * 4) \n"
// if stridable && width * height * depth * 4 > 100 { if stridable && width * height * depth * 4 > 100 {
// for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){ for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
// str += " index \(j): \(p[j])" str += " index \(j): \(p[j])"
// } }
// } else { } else {
// for j in 0..<width * height * depth * 4 { for j in 0..<width * height * depth * 4 {
// str += " index \(j): \(p[j])" str += " index \(j): \(p[j])"
// } }
// } }
//
// bytes.deallocate() bytes.deallocate()
// print(str) print(str)
// } }
// } else if textureType == .type2D { } else if textureType == .type2D {
// var str: String = "texture 2D: " var str: String = "texture 2D: "
// let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment) let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
// let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
// let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth)) let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
// getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0) getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
// let p = bytes.assumingMemoryBound(to: T.self) let p = bytes.assumingMemoryBound(to: T.self)
// str += "2d count : \(width * width * 4) \n" str += "2d count : \(width * width * 4) \n"
//
// if stridable { if stridable {
// for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){ for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
// str += "index \(j): \(p[j]) " str += "index \(j): \(p[j]) "
// } }
// } else { } else {
// for j in 0..<width * height * 4 { for j in 0..<width * height * 4 {
// str += "index \(j): \(p[j]) " str += "index \(j): \(p[j]) "
// } }
// } }
//
// print(str) print(str)
// bytes.deallocate() bytes.deallocate()
// } }
return nil return nil
} }
......
...@@ -17,6 +17,103 @@ public class PaddleMobileUnitTest { ...@@ -17,6 +17,103 @@ public class PaddleMobileUnitTest {
queue = inQueue queue = inQueue
} }
private func indentPrintTensor(tensor: [Float32], dim: [Int], ix: [Int], indentLevel: Int) {
let indent = Array.init(repeating: " ", count: indentLevel).joined(separator: "")
var tx = ix
if dim.count == indentLevel + 1 {
var log: String = indent + "["
for i in 0..<dim[indentLevel] {
tx = ix
tx[indentLevel] = i
for x in 1..<dim.count {
for y in 0..<x {
tx[y] *= dim[x]
}
}
let c = tx.reduce(0) { $0 + $1 }
if i > 0 {
log += ", "
}
log += tensor[c].description
}
log += "]"
if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) {
log += ","
}
print(log)
} else {
print(indent + "[")
for i in 0..<dim[indentLevel] {
tx[indentLevel] = i
indentPrintTensor(tensor: tensor, dim: dim, ix: tx, indentLevel: indentLevel + 1)
}
if (indentLevel > 0) && (ix[indentLevel - 1] < dim[indentLevel - 1] - 1) {
print(indent + "],")
} else {
print(indent + "]")
}
}
}
private func tensorPrint(tensor: [Float32], dim: [Int]) {
var detectPos = -1
var odim = 1
var ndim = dim
for i in 0..<dim.count {
if dim[i] == -1 {
if detectPos == -1 {
detectPos = i
} else {
detectPos = -2
}
} else if dim[i] <= 0 {
detectPos = -3
} else {
odim *= dim[i]
}
}
assert(detectPos >= -1)
if (detectPos == -1) {
assert(tensor.count == odim)
} else {
assert(tensor.count % odim == 0)
ndim[detectPos] = tensor.count / odim
}
indentPrintTensor(tensor: tensor, dim: ndim, ix: dim.map { $0 * 0 }, indentLevel: 0)
}
public func testTranspose() {
let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
var input: [Float32] = []
for i in 0..<72 {
input.append(Float32(i))
}
// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3)
let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]);
// group 1
let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4])
let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1])
// // group 2
// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6)
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1])
//
let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
transposeKernel.test(commandBuffer: buffer, param: param)
buffer.addCompletedHandler { (buffer) in
let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
self.tensorPrint(tensor: input, dim: [4, 3, 2, 3])
let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4])
self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4])
}
buffer.commit()
}
public func testConvAddBnRelu() { public func testConvAddBnRelu() {
let buffer = queue.makeCommandBuffer() ?! " buffer is nil " let buffer = queue.makeCommandBuffer() ?! " buffer is nil "
...@@ -132,16 +229,6 @@ public class PaddleMobileUnitTest { ...@@ -132,16 +229,6 @@ public class PaddleMobileUnitTest {
} }
buffer.commit() buffer.commit()
// let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>)
// let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>)
// ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>)
} }
} }
......
...@@ -68,16 +68,6 @@ public class Executor<P: PrecisionType> { ...@@ -68,16 +68,6 @@ public class Executor<P: PrecisionType> {
throw error throw error
} }
} }
// for op in block.ops {
// do {
// let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
// op.inferShape()
// ops.append(op)
// } catch let error {
// throw error
// }
// }
} }
} }
......
...@@ -141,10 +141,6 @@ public class Loader<P: PrecisionType> { ...@@ -141,10 +141,6 @@ public class Loader<P: PrecisionType> {
throw PaddleMobileError.loaderError(message: "get tensor desc failed") throw PaddleMobileError.loaderError(message: "get tensor desc failed")
} }
// guard (try? tensorDesc.dataType.dataTypeSize()) == MemoryLayout<P>.size else {
// throw PaddleMobileError.memoryError(message: "PrecisionType not support")
// }
if (varDesc.persistable if (varDesc.persistable
&& varDesc.type != .FeedMiniBatch && varDesc.type != .FeedMiniBatch
&& varDesc.type != .FetchList) { && varDesc.type != .FetchList) {
......
...@@ -56,7 +56,9 @@ class OpCreator<P: PrecisionType> { ...@@ -56,7 +56,9 @@ class OpCreator<P: PrecisionType> {
gBoxcoderType : BoxcoderOp<P>.creat, gBoxcoderType : BoxcoderOp<P>.creat,
gConvBnReluType : ConvBNReluOp<P>.creat, gConvBnReluType : ConvBNReluOp<P>.creat,
gDwConvBnReluType : DwConvBNReluOp<P>.creat, gDwConvBnReluType : DwConvBNReluOp<P>.creat,
gMulticlassNMSType : MulticlassNMSOp<P>.creat] gMulticlassNMSType : MulticlassNMSOp<P>.creat,
gTransposeType : TransposeOp<P>.creat,
gPriorBoxType : PriorBoxOp<P>.creat]
private init(){} private init(){}
} }
...@@ -18,19 +18,28 @@ class BoxcoderParam<P: PrecisionType>: OpParam { ...@@ -18,19 +18,28 @@ class BoxcoderParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
fatalError() priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
targetBox = try BoxcoderParam.getFirstTensor(key: "TargetBox", map: opDesc.inputs, from: inScope)
output = try BoxcoderParam.getFirstTensor(key: "OutputBox", map: opDesc.outputs, from: inScope)
codeType = try BoxcoderParam.getAttr(key: "code_type", attrs: opDesc.attrs)
boxNormalized = try BoxcoderParam.getAttr(key: "box_normalized", attrs: opDesc.attrs)
} catch let error { } catch let error {
throw error throw error
} }
} }
let input: Texture<P> let priorBox: Texture<P>
let priorBoxVar: Texture<P>
let targetBox: Texture<P>
var output: Texture<P> var output: Texture<P>
let codeType: String
let boxNormalized: Bool
} }
class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>>, Runable, Creator, InferShaperable{ class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { func inferShape() {
para.output.dim = para.input.dim // para.output.dim = para.input.dim
} }
typealias OpType = BoxcoderOp<P> typealias OpType = BoxcoderOp<P>
......
...@@ -18,19 +18,31 @@ class ConcatParam<P: PrecisionType>: OpParam { ...@@ -18,19 +18,31 @@ class ConcatParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
fatalError() guard let xlist = opDesc.inputs["X"] else {
fatalError()
}
for x in xlist {
guard let variant = inScope[x], let v = variant as? Texture<P> else {
fatalError()
}
input.append(v)
}
axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs)
output = try ConcatParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error { } catch let error {
throw error throw error
} }
} }
let input: Texture<P> var input: [Texture<P>] = []
var output: Texture<P> var output: Texture<P>
let axis: Int
} }
class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Runable, Creator, InferShaperable{ class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { func inferShape() {
para.output.dim = para.input.dim let dim = para.input.reduce([0, 0]) {[$0[0] + $1.dim[0], $1.dim[1]]}
para.output.dim = Dim.init(inDim: dim)
} }
typealias OpType = ConcatOp<P> typealias OpType = ConcatOp<P>
......
...@@ -19,7 +19,7 @@ class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -19,7 +19,7 @@ class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0) // encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConcatKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: ConcatParam<P>) {
super.init(device: device, inFunctionName: "concat")
}
}
...@@ -19,13 +19,13 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -19,13 +19,13 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0) // encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) // encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: ConcatParam<P>) { required init(device: MTLDevice, param: ConcatParam<P>) {
super.init(device: device, inFunctionName: "priorbox") super.init(device: device, inFunctionName: "concat")
} }
} }
...@@ -57,7 +57,7 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable { ...@@ -57,7 +57,7 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
} else { } else {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
} }
param.output.initTexture(device: device, transpose: [0, 2, 3, 1])
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
param.variance.initBuffer(device: device) param.variance.initBuffer(device: device)
......
...@@ -202,7 +202,7 @@ kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture ...@@ -202,7 +202,7 @@ kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture
gid.y >= outTexture.get_height() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return; gid.z >= outTexture.get_array_size()) return;
half4 r = inTexture.read(uint2(0, 0), gid.z); half4 r = inTexture.read(uint2(0, 0), gid.x);
outTexture.write(r, gid.xy, gid.z); outTexture.write(r, gid.xy, gid.z);
} }
...@@ -321,8 +321,53 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0 ...@@ -321,8 +321,53 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
} }
} }
void xyzn2abcd(uint C, uint xyzn[4], uint abcd[4]) {
abcd[1] = xyzn[0];
abcd[2] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
return;
}
void abcd2xyzn(uint C, uint abcd[4], uint xyzn[4]) {
xyzn[0] = abcd[1];
xyzn[1] = abcd[2];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
return;
}
struct TransposeParam {
int iC;
int oC;
int axis[4];
};
kernel void transpose(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if ((pm.axis[0] == 0) && (pm.axis[1] == 1) && (pm.axis[2] == 2) && (pm.axis[3] == 3)) {
// do nothing
float4 r = inTexture.read(gid.xy, gid.z);
outTexture.write(r, gid.xy, gid.z);
} else {
float4 r;
for (uint i = 0; i < 4; i++) {
uint ixyzn[] = {gid.x, gid.y, gid.z, i};
uint iabcd[4], oabcd[4], oxyzn[4];
xyzn2abcd(pm.oC, ixyzn, iabcd);
oabcd[pm.axis[0]] = iabcd[0];
oabcd[pm.axis[1]] = iabcd[1];
oabcd[pm.axis[2]] = iabcd[2];
oabcd[pm.axis[3]] = iabcd[3];
abcd2xyzn(pm.iC, oabcd, oxyzn);
float4 rt = inTexture.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2]);
r[i] = rt[oxyzn[3]];
}
outTexture.write(r, gid.xy, gid.z);
}
}
...@@ -19,8 +19,8 @@ class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -19,8 +19,8 @@ class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0) // encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) // encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
......
...@@ -14,18 +14,73 @@ ...@@ -14,18 +14,73 @@
import Foundation import Foundation
class TransposeKernel<P: PrecisionType>: Kernel, Computable{ struct TransposeMetalParam {
var iC: Int32 = 0
var oC: Int32 = 0
var i0: Int32
var i1: Int32
var i2: Int32
var i3: Int32
init(_ i0: Int32, _ i1: Int32, _ i2: Int32, _ i3: Int32) {
self.i0 = i0
self.i1 = i1
self.i2 = i2
self.i3 = i3
}
init(_ axis: [Int]) {
self.init(Int32(axis[0]), Int32(axis[1]), Int32(axis[2]), Int32(axis[3]))
}
}
struct TransposeTestParam: TestParam {
let inputTexture: MTLTexture
let outputTexture: MTLTexture
let iC: Int
let oC: Int
let axis: [Int]
}
class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
var invT: [Int] = [0, 1, 2, 3]
for (i, v) in param.input.transpose.enumerated() {
invT[v] = i
}
let realAxis = param.axis.map {invT[$0]}
var tmp = TransposeMetalParam.init(realAxis)
tmp.iC = Int32(param.input.dim[param.input.transpose[3]])
tmp.oC = Int32(param.output.dim[3])
encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&tmp, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: TransposeParam<P>) { required init(device: MTLDevice, param: TransposeParam<P>) {
super.init(device: device, inFunctionName: "priorbox") param.output.initTexture(device: device, transpose: [0, 1, 2, 3])
super.init(device: device, inFunctionName: "transpose")
} }
} required init(device: MTLDevice, testParam: TransposeTestParam) {
super.init(device: device, inFunctionName: "transpose")
}
public func test(commandBuffer: MTLCommandBuffer, param: TransposeTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(param.inputTexture, index: 0)
encoder.setTexture(param.outputTexture, index: 1)
var tmp = TransposeMetalParam.init(param.axis)
tmp.iC = Int32(param.iC)
tmp.oC = Int32(param.oC)
encoder.setBytes(&tmp, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
encoder.endEncoding()
}}
...@@ -18,19 +18,22 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam { ...@@ -18,19 +18,22 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
fatalError() scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope)
} catch let error { } catch let error {
throw error throw error
} }
} }
let input: Texture<P> let scores: Texture<P>
let bboxes: Texture<P>
var output: Texture<P> var output: Texture<P>
} }
class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{ class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { func inferShape() {
para.output.dim = para.input.dim // para.output.dim = para.input.dim
} }
typealias OpType = MulticlassNMSOp<P> typealias OpType = MulticlassNMSOp<P>
......
...@@ -18,13 +18,16 @@ class PriorBoxParam<P: PrecisionType>: OpParam { ...@@ -18,13 +18,16 @@ class PriorBoxParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
fatalError() input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope)
output = try PriorBoxParam.getFirstTensor(key: "Boxes", map: opDesc.outputs, from: inScope)
variances = try PriorBoxParam.getFirstTensor(key: "Variances", map: opDesc.outputs, from: inScope)
} catch let error { } catch let error {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture<P>
var output: Texture<P> var output: Texture<P>
let variances: Texture<P>
} }
class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
...@@ -36,7 +39,7 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P> ...@@ -36,7 +39,7 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
typealias OpType = PriorBoxOp<P> typealias OpType = PriorBoxOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { do {
try kernel.compute(commandBuffer: buffer, param: para) // try kernel.compute(commandBuffer: buffer, param: para)
} catch let error { } catch let error {
throw error throw error
} }
......
...@@ -18,13 +18,16 @@ class TransposeParam<P: PrecisionType>: OpParam { ...@@ -18,13 +18,16 @@ class TransposeParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
fatalError() input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope)
axis = try TransposeParam.getAttr(key: "axis", attrs: opDesc.attrs)
} catch let error { } catch let error {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture<P>
var output: Texture<P> var output: Texture<P>
let axis: [Int32]
} }
class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam<P>>, Runable, Creator, InferShaperable{ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam<P>>, Runable, Creator, InferShaperable{
......
...@@ -39,7 +39,6 @@ public struct Dim { ...@@ -39,7 +39,6 @@ public struct Dim {
return dims[index]; return dims[index];
} }
private(set) var dims: [Int] private(set) var dims: [Int]
private init(){ private init(){
fatalError() fatalError()
......
...@@ -95,43 +95,6 @@ public class Texture<P: PrecisionType>: Tensorial { ...@@ -95,43 +95,6 @@ public class Texture<P: PrecisionType>: Tensorial {
layout = DataLayout.init([(.N, fourDim[0]), (.C, fourDim[1]), (.H, fourDim[2]), (.W, fourDim[3])]) layout = DataLayout.init([(.N, fourDim[0]), (.C, fourDim[1]), (.H, fourDim[2]), (.W, fourDim[3])])
} }
// required public init(inDim: Dim, inLayout: DataLayout = .NHWC, inTexture: MTLTexture) {
// dim = inDim
// layout = inLayout
// metalTexture = inTexture
// let tmpTextureDes = MTLTextureDescriptor.init()
//
// if inDim.cout() == 1 {
// tmpTextureDes.width = inDim[0]
// tmpTextureDes.textureType = .type1D
// } else if inDim.cout() == 2 {
// tmpTextureDes.height = inDim[0]
// tmpTextureDes.width = inDim[1]
// tmpTextureDes.textureType = .type2D
// } else if inDim.cout() == 3 {
// fatalError(" not support texture dim 3")
// } else if inDim.cout() == 4 {
// tmpTextureDes.height = inDim[1]
// tmpTextureDes.width = inDim[2]
// tmpTextureDes.depth = inDim[3] * inDim[1]
// tmpTextureDes.textureType = .type2DArray
// }
//
// tmpTextureDes.pixelFormat = .r32Float
// tmpTextureDes.storageMode = .shared
// textureDesc = tmpTextureDes
// let device = MTLCreateSystemDefaultDevice()
// metalTexture = device!.makeTexture(descriptor: tmpTextureDes)!
// }
// init() {
// dim = Dim.init(inDim: [])
// layout = .NCHW
// let device = MTLCreateSystemDefaultDevice()
// textureDesc = MTLTextureDescriptor.init()
// metalTexture = device!.makeTexture(descriptor: textureDesc)!
// }
private(set) var layout: DataLayout private(set) var layout: DataLayout
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册