diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift index c0be8996f1a698d9398fd2b58aef3280b81894fb..8d173d6bcc546916ceb1c011110340e6ebf2e95a 100644 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift +++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift @@ -7,11 +7,21 @@ // import UIKit +import Metal +//import MetalKit import paddle_mobile class ViewController: UIViewController { override func viewDidLoad() { super.viewDidLoad() + let device = Metal.MTLCreateSystemDefaultDevice()! + let queue = device.makeCommandQueue()! + let test = PaddleMobileUnitTest.init( + inDevice: device, + inQueue: queue + ) + test.testReshape() +// test.testTranspose() print(" done ") } diff --git a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift index d2dd3e8f9ef2abd36a9bd2b1e112f496cc7ef36b..f4bd21452e2a011c7dd0ab09479df94d4b1f17a6 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift @@ -82,23 +82,93 @@ public class PaddleMobileUnitTest { indentPrintTensor(tensor: tensor, dim: ndim, ix: dim.map { $0 * 0 }, indentLevel: 0) } + public func testReshape() { + let buffer = queue.makeCommandBuffer() ?! "buffer is nil" +// let input: [Float32] = (0..<24).map { Float32($0) } +// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) +// let outTexture = device.tensor2texture(value: [Float32](), dim: [4, 6]) +// let mp = ReshapeMetalParam.init( +// idim: (1, 2, 3, 4), +// itrans: (0, 1, 2, 3), +// odim: (1, 1, 4, 6), +// otrans: (0, 1, 2, 3) +// ) +// let param = ReshapeTestParam.init( +// inputTexture: inTexture, +// outputTexture: outTexture, +// param: mp +// ) +// let reshapeKernel = ReshapeKernel.init(device: device, testParam: param) +// reshapeKernel.test(commandBuffer: buffer, testParam: param) +// buffer.addCompletedHandler { (buffer) in +// let _: Float32? = inTexture.logDesc() +// let _: Float32? = outTexture.logDesc() +// self.tensorPrint(tensor: input, dim: [2, 3, 4]) +// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6]) +// self.tensorPrint(tensor: tx, dim: [4, 6]) +// } + + let input: [Float32] = (0..<24).map { Float32($0) } + let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) + let outTexture = device.tensor2texture(value: [Float32](), dim: [24]) + let mp = ReshapeMetalParam.init( + idim: (1, 2, 3, 4), + itrans: (0, 1, 2, 3), + odim: (1, 1, 1, 24), + otrans: (0, 1, 2, 3) + ) + let param = ReshapeTestParam.init( + inputTexture: inTexture, + outputTexture: outTexture, + param: mp + ) + let reshapeKernel = ReshapeKernel.init(device: device, testParam: param) + reshapeKernel.test(commandBuffer: buffer, testParam: param) + buffer.addCompletedHandler { (buffer) in + let _: Float32? = inTexture.logDesc() + let _: Float32? = outTexture.logDesc() + self.tensorPrint(tensor: input, dim: [2, 3, 4]) + let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24]) + self.tensorPrint(tensor: tx, dim: [24]) + } + + + buffer.commit() + } + public func testTranspose() { let buffer = queue.makeCommandBuffer() ?! "buffer is nil" - var input: [Float32] = [] - for i in 0..<72 { - input.append(Float32(i)) - } -// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3) - let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]); - // group 1 - let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4]) - let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0]) -// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1]) -// // group 2 -// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6) -// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1]) +// var input: [Float32] = [] +// for i in 0..<72 { +// input.append(Float32(i)) +// } +//// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3) +// let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]); +// // group 1 +// let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4]) +// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0]) +//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1]) +//// // group 2 +//// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6) +//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1]) +//// +// let transposeKernel = TransposeKernel.init(device: device, testParam: param) +// +// transposeKernel.test(commandBuffer: buffer, param: param) // +// buffer.addCompletedHandler { (buffer) in +// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false) +// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false) +// self.tensorPrint(tensor: input, dim: [4, 3, 2, 3]) +// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4]) +// self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4]) +// } + + let input: [Float32] = (0..<24).map { Float32($0) } + let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4]) + let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2]) + let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1]) let transposeKernel = TransposeKernel.init(device: device, testParam: param) transposeKernel.test(commandBuffer: buffer, param: param) @@ -106,11 +176,11 @@ public class PaddleMobileUnitTest { buffer.addCompletedHandler { (buffer) in let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false) let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false) - self.tensorPrint(tensor: input, dim: [4, 3, 2, 3]) - let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4]) - self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4]) + self.tensorPrint(tensor: input, dim: [2, 3, 4]) + let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2]) + self.tensorPrint(tensor: tx, dim: [3, 4, 2]) } - + buffer.commit() } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal index 851dd35faf670282a06f5cf06282efa0bed9cde1..f8c9099415b3675c5f583d2905a1fa52e46b05a3 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal @@ -184,27 +184,6 @@ kernel void pool_half(texture2d_array inTexture [[texture(0) outTexture.write(r, gid.xy, gid.z); } -kernel void reshape(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - - float4 r = inTexture.read(uint2(0, 0), gid.z); - outTexture.write(r, gid.xy, gid.z); -} - -kernel void reshape_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - - half4 r = inTexture.read(uint2(0, 0), gid.x); - outTexture.write(r, gid.xy, gid.z); -} kernel void softmax(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], @@ -321,22 +300,47 @@ kernel void prior_box(texture2d_array inTexture [[texture(0 } } -void xyzn2abcd(uint C, uint xyzn[4], uint abcd[4]) { - abcd[1] = xyzn[0]; - abcd[2] = xyzn[1]; +inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) { + abcd[2] = xyzn[0]; + abcd[1] = xyzn[1]; uint t = xyzn[2] * 4 + xyzn[3]; abcd[0] = t / C; abcd[3] = t % C; - return; } -void abcd2xyzn(uint C, uint abcd[4], uint xyzn[4]) { - xyzn[0] = abcd[1]; - xyzn[1] = abcd[2]; +inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) { + xyzn[0] = abcd[2]; + xyzn[1] = abcd[1]; uint t = abcd[0] * C + abcd[3]; xyzn[2] = t / 4; xyzn[3] = t % 4; - return; +} + +inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) { + int32_t r = abcd[0]; + r = r * dim[1] + abcd[1]; + r = r * dim[2] + abcd[2]; + r = r * dim[3] + abcd[3]; + return r; +} + +inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) { + abcd[3] = ind % dim[3]; ind /= dim[3]; + abcd[2] = ind % dim[2]; ind /= dim[2]; + abcd[1] = ind % dim[1]; ind /= dim[1]; + abcd[0] = ind; +} + +inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { + for (int i = 0; i < 4; i++) { + opos[i] = ipos[trans[i]]; + } +} + +inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) { + for (int i = 0; i < 4; i++) { + opos[trans[i]] = ipos[i]; + } } struct TransposeParam { @@ -356,9 +360,9 @@ kernel void transpose(texture2d_array inTexture [[texture(0 outTexture.write(r, gid.xy, gid.z); } else { float4 r; - for (uint i = 0; i < 4; i++) { - uint ixyzn[] = {gid.x, gid.y, gid.z, i}; - uint iabcd[4], oabcd[4], oxyzn[4]; + for (int n = 0; n < 4; n++) { + int ixyzn[] = {int(gid.x), int(gid.y), int(gid.z), n}; + int iabcd[4], oabcd[4], oxyzn[4]; xyzn2abcd(pm.oC, ixyzn, iabcd); oabcd[pm.axis[0]] = iabcd[0]; oabcd[pm.axis[1]] = iabcd[1]; @@ -366,8 +370,58 @@ kernel void transpose(texture2d_array inTexture [[texture(0 oabcd[pm.axis[3]] = iabcd[3]; abcd2xyzn(pm.iC, oabcd, oxyzn); float4 rt = inTexture.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2]); - r[i] = rt[oxyzn[3]]; + r[n] = rt[oxyzn[3]]; } outTexture.write(r, gid.xy, gid.z); } } + +struct ReshapeParam { + int32_t idim[4]; + int32_t itrans[4]; + int32_t odim[4]; + int32_t otrans[4]; +}; + +kernel void reshape(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant ReshapeParam &rp [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + + int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4]; + ReshapeParam lrp = rp; + int oC = lrp.odim[lrp.otrans[3]]; + int iC = lrp.idim[lrp.itrans[3]]; + int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3]; + float4 r; + for (int n = 0; n < 4; n++) { + oxyzn[3] = n; + xyzn2abcd(oC, oxyzn, oabcd); + int tabcd[4]; + invtrans(lrp.otrans, oabcd, tabcd); + int index = abcd2index(lrp.odim, tabcd); + if (index < count) { + index2abcd(lrp.idim, index, tabcd); + trans(lrp.itrans, tabcd, iabcd); + abcd2xyzn(iC, tabcd, ixyzn); + r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]]; + } else { + r[n] = 0; + } + } + outTexture.write(r, gid.xy, gid.z); +} +// +//kernel void reshape_half(texture2d_array inTexture [[texture(0)]], +// texture2d_array outTexture [[texture(1)]], +// uint3 gid [[thread_position_in_grid]]) { +// if (gid.x >= outTexture.get_width() || +// gid.y >= outTexture.get_height() || +// gid.z >= outTexture.get_array_size()) return; +// +// half4 r = inTexture.read(uint2(0, 0), gid.x); +// outTexture.write(r, gid.xy, gid.z); +//} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift index 438c89e59eb7e9a2ef315997b9d8d1f3a44a5462..96a1abb6df964ee24d74ca9979ca59512f4e4265 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift @@ -14,8 +14,26 @@ import Foundation +struct ReshapeMetalParam { + var idim: (Int32, Int32, Int32, Int32) + var itrans: (Int32, Int32, Int32, Int32) + var odim: (Int32, Int32, Int32, Int32) + var otrans: (Int32, Int32, Int32, Int32) +} + +struct ReshapeTestParam: TestParam { + let inputTexture: MTLTexture + let outputTexture: MTLTexture + let param: ReshapeMetalParam +} + class ReshapeKernel: Kernel, Computable{ required init(device: MTLDevice, param: ReshapeParam

) { + param.output.initTexture(device: device) + super.init(device: device, inFunctionName: "reshape") + } + + required init(device: MTLDevice, testParam: ReshapeTestParam) { super.init(device: device, inFunctionName: "reshape") } @@ -25,7 +43,30 @@ class ReshapeKernel: Kernel, Computable{ } encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) + let id: [Int32] = (0..<4).map { Int32(param.input.dim[$0]) } + let it: [Int32] = param.input.transpose.map { Int32($0) } + let od: [Int32] = (0..<4).map { Int32(param.output.dim[$0]) } + let ot: [Int32] = param.output.transpose.map { Int32($0) } + var rmp = ReshapeMetalParam.init( + idim: (id[0], id[1], id[2], id[3]), + itrans: (it[0], it[1], it[2], it[3]), + odim: (od[0], od[1], od[2], od[3]), + otrans: (ot[0], ot[1], ot[2], ot[3]) + ) + encoder.setBytes(&rmp, length: MemoryLayout.size, index: 0) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.endEncoding() } + + func test(commandBuffer: MTLCommandBuffer, testParam: ReshapeTestParam) { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + encoder.setTexture(testParam.inputTexture, index: 0) + encoder.setTexture(testParam.outputTexture, index: 1) + var pm: ReshapeMetalParam = testParam.param + encoder.setBytes(&pm, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift index d6bdc06f59417af088a9913a12fc792ae376d942..8d7fee1c6ed586616a3ab281ed7221c52107d0f9 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift @@ -53,7 +53,11 @@ class TransposeKernel: Kernel, Computable, Testable { var tmp = TransposeMetalParam.init(realAxis) tmp.iC = Int32(param.input.dim[param.input.transpose[3]]) tmp.oC = Int32(param.output.dim[3]) - + if realAxis == [0, 1, 2, 3] { + print("====> transpose! FAST :)") + } else { + print("====> transpose! SLOW :(") + } encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) encoder.setBytes(&tmp, length: MemoryLayout.size, index: 0) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift index 759ffd4b8b46673e5245f8bbc67dbcc0956666aa..5ef52c407c9ed8268f7117a6b52ef24211eb6bb3 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift @@ -20,11 +20,15 @@ class ReshapeParam: OpParam { do { input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope) output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope) +// shape = output.dim + inplace = try ReshapeParam.getAttr(key: "inplace", attrs: opDesc.attrs) } catch let error { throw error } } let input: Texture

+// let shape: [Int] + let inplace: Bool var output: Texture

} diff --git a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift index 3e8cdf9328e0046a6d2b50131ff20dcafe056b4b..f34cebbfc9bf344fe52b3a5c77a9ab20288516c8 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift @@ -45,7 +45,7 @@ public class Texture: Tensorial { var metalTexture: MTLTexture! var transpose: [Int] = [0, 1, 2, 3] - func initTexture(device: MTLDevice, transpose: [Int]) { + func initTexture(device: MTLDevice, transpose: [Int] = [0, 1, 2, 3]) { let newDim = transpose.map { originDim[$0] } let newLayout = transpose.map {layout.layoutWithDim[$0] }