未验证 提交 8b214a7b 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #819 from dolphin8/metal

reshape op
......@@ -7,11 +7,21 @@
//
import UIKit
import Metal
//import MetalKit
import paddle_mobile
class ViewController: UIViewController {
override func viewDidLoad() {
super.viewDidLoad()
let device = Metal.MTLCreateSystemDefaultDevice()!
let queue = device.makeCommandQueue()!
let test = PaddleMobileUnitTest.init(
inDevice: device,
inQueue: queue
)
test.testReshape()
// test.testTranspose()
print(" done ")
}
......
......@@ -82,23 +82,93 @@ public class PaddleMobileUnitTest {
indentPrintTensor(tensor: tensor, dim: ndim, ix: dim.map { $0 * 0 }, indentLevel: 0)
}
public func testReshape() {
let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
// let input: [Float32] = (0..<24).map { Float32($0) }
// let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
// let outTexture = device.tensor2texture(value: [Float32](), dim: [4, 6])
// let mp = ReshapeMetalParam.init(
// idim: (1, 2, 3, 4),
// itrans: (0, 1, 2, 3),
// odim: (1, 1, 4, 6),
// otrans: (0, 1, 2, 3)
// )
// let param = ReshapeTestParam.init(
// inputTexture: inTexture,
// outputTexture: outTexture,
// param: mp
// )
// let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
// reshapeKernel.test(commandBuffer: buffer, testParam: param)
// buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inTexture.logDesc()
// let _: Float32? = outTexture.logDesc()
// self.tensorPrint(tensor: input, dim: [2, 3, 4])
// let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [4, 6])
// self.tensorPrint(tensor: tx, dim: [4, 6])
// }
let input: [Float32] = (0..<24).map { Float32($0) }
let inTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
let outTexture = device.tensor2texture(value: [Float32](), dim: [24])
let mp = ReshapeMetalParam.init(
idim: (1, 2, 3, 4),
itrans: (0, 1, 2, 3),
odim: (1, 1, 1, 24),
otrans: (0, 1, 2, 3)
)
let param = ReshapeTestParam.init(
inputTexture: inTexture,
outputTexture: outTexture,
param: mp
)
let reshapeKernel = ReshapeKernel<Float32>.init(device: device, testParam: param)
reshapeKernel.test(commandBuffer: buffer, testParam: param)
buffer.addCompletedHandler { (buffer) in
let _: Float32? = inTexture.logDesc()
let _: Float32? = outTexture.logDesc()
self.tensorPrint(tensor: input, dim: [2, 3, 4])
let tx: [Float32] = self.device.texture2tensor(texture: outTexture, dim: [24])
self.tensorPrint(tensor: tx, dim: [24])
}
buffer.commit()
}
public func testTranspose() {
let buffer = queue.makeCommandBuffer() ?! "buffer is nil"
var input: [Float32] = []
for i in 0..<72 {
input.append(Float32(i))
}
// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3)
let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]);
// group 1
let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4])
let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1])
// // group 2
// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6)
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1])
// var input: [Float32] = []
// for i in 0..<72 {
// input.append(Float32(i))
// }
//// let inputTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 2, arrayLength: 3)
// let inputTexture = device.tensor2texture(value: input, dim: [4, 3, 2, 3]);
// // group 1
// let outputTexture = device.tensor2texture(value: [Float32](), dim: [3, 3, 2, 4])
// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 3, oC: 4, axis: [3, 1, 2, 0])
//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [3, 0, 2, 1])
//// // group 2
//// let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 3, textureHeight: 3, arrayLength: 6)
//// let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 4, axis: [3, 0, 2, 1])
////
// let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
//
// transposeKernel.test(commandBuffer: buffer, param: param)
//
// buffer.addCompletedHandler { (buffer) in
// let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
// let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
// self.tensorPrint(tensor: input, dim: [4, 3, 2, 3])
// let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4])
// self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4])
// }
let input: [Float32] = (0..<24).map { Float32($0) }
let inputTexture = device.tensor2texture(value: input, dim: [2, 3, 4])
let outputTexture = device.tensor2texture(value: [Float](), dim: [3, 4, 2])
let param = TransposeTestParam.init(inputTexture: inputTexture, outputTexture: outputTexture, iC: 4, oC: 2, axis: [0, 2, 3, 1])
let transposeKernel = TransposeKernel<Float32>.init(device: device, testParam: param)
transposeKernel.test(commandBuffer: buffer, param: param)
......@@ -106,11 +176,11 @@ public class PaddleMobileUnitTest {
buffer.addCompletedHandler { (buffer) in
let _: Float32? = inputTexture.logDesc(header: "input texture", stridable: false)
let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
self.tensorPrint(tensor: input, dim: [4, 3, 2, 3])
let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 3, 2, 4])
self.tensorPrint(tensor: tx, dim: [3, 3, 2, 4])
self.tensorPrint(tensor: input, dim: [2, 3, 4])
let tx: [Float32] = self.device.texture2tensor(texture: outputTexture, dim: [3, 4, 2])
self.tensorPrint(tensor: tx, dim: [3, 4, 2])
}
buffer.commit()
}
......
......@@ -184,27 +184,6 @@ kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)
outTexture.write(r, gid.xy, gid.z);
}
kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 r = inTexture.read(uint2(0, 0), gid.z);
outTexture.write(r, gid.xy, gid.z);
}
kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 r = inTexture.read(uint2(0, 0), gid.x);
outTexture.write(r, gid.xy, gid.z);
}
kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
......@@ -321,22 +300,47 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
}
}
void xyzn2abcd(uint C, uint xyzn[4], uint abcd[4]) {
abcd[1] = xyzn[0];
abcd[2] = xyzn[1];
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
return;
}
void abcd2xyzn(uint C, uint abcd[4], uint xyzn[4]) {
xyzn[0] = abcd[1];
xyzn[1] = abcd[2];
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
return;
}
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
}
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
}
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
}
}
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
}
}
struct TransposeParam {
......@@ -356,9 +360,9 @@ kernel void transpose(texture2d_array<float, access::read> inTexture [[texture(0
outTexture.write(r, gid.xy, gid.z);
} else {
float4 r;
for (uint i = 0; i < 4; i++) {
uint ixyzn[] = {gid.x, gid.y, gid.z, i};
uint iabcd[4], oabcd[4], oxyzn[4];
for (int n = 0; n < 4; n++) {
int ixyzn[] = {int(gid.x), int(gid.y), int(gid.z), n};
int iabcd[4], oabcd[4], oxyzn[4];
xyzn2abcd(pm.oC, ixyzn, iabcd);
oabcd[pm.axis[0]] = iabcd[0];
oabcd[pm.axis[1]] = iabcd[1];
......@@ -366,8 +370,58 @@ kernel void transpose(texture2d_array<float, access::read> inTexture [[texture(0
oabcd[pm.axis[3]] = iabcd[3];
abcd2xyzn(pm.iC, oabcd, oxyzn);
float4 rt = inTexture.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2]);
r[i] = rt[oxyzn[3]];
r[n] = rt[oxyzn[3]];
}
outTexture.write(r, gid.xy, gid.z);
}
}
struct ReshapeParam {
int32_t idim[4];
int32_t itrans[4];
int32_t odim[4];
int32_t otrans[4];
};
kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant ReshapeParam &rp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4];
ReshapeParam lrp = rp;
int oC = lrp.odim[lrp.otrans[3]];
int iC = lrp.idim[lrp.itrans[3]];
int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3];
float4 r;
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
xyzn2abcd(oC, oxyzn, oabcd);
int tabcd[4];
invtrans(lrp.otrans, oabcd, tabcd);
int index = abcd2index(lrp.odim, tabcd);
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
abcd2xyzn(iC, tabcd, ixyzn);
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
} else {
r[n] = 0;
}
}
outTexture.write(r, gid.xy, gid.z);
}
//
//kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]],
// uint3 gid [[thread_position_in_grid]]) {
// if (gid.x >= outTexture.get_width() ||
// gid.y >= outTexture.get_height() ||
// gid.z >= outTexture.get_array_size()) return;
//
// half4 r = inTexture.read(uint2(0, 0), gid.x);
// outTexture.write(r, gid.xy, gid.z);
//}
......@@ -14,8 +14,26 @@
import Foundation
struct ReshapeMetalParam {
var idim: (Int32, Int32, Int32, Int32)
var itrans: (Int32, Int32, Int32, Int32)
var odim: (Int32, Int32, Int32, Int32)
var otrans: (Int32, Int32, Int32, Int32)
}
struct ReshapeTestParam: TestParam {
let inputTexture: MTLTexture
let outputTexture: MTLTexture
let param: ReshapeMetalParam
}
class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: ReshapeParam<P>) {
param.output.initTexture(device: device)
super.init(device: device, inFunctionName: "reshape")
}
required init(device: MTLDevice, testParam: ReshapeTestParam) {
super.init(device: device, inFunctionName: "reshape")
}
......@@ -25,7 +43,30 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
let id: [Int32] = (0..<4).map { Int32(param.input.dim[$0]) }
let it: [Int32] = param.input.transpose.map { Int32($0) }
let od: [Int32] = (0..<4).map { Int32(param.output.dim[$0]) }
let ot: [Int32] = param.output.transpose.map { Int32($0) }
var rmp = ReshapeMetalParam.init(
idim: (id[0], id[1], id[2], id[3]),
itrans: (it[0], it[1], it[2], it[3]),
odim: (od[0], od[1], od[2], od[3]),
otrans: (ot[0], ot[1], ot[2], ot[3])
)
encoder.setBytes(&rmp, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
func test(commandBuffer: MTLCommandBuffer, testParam: ReshapeTestParam) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(testParam.inputTexture, index: 0)
encoder.setTexture(testParam.outputTexture, index: 1)
var pm: ReshapeMetalParam = testParam.param
encoder.setBytes(&pm, length: MemoryLayout<ReshapeMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
encoder.endEncoding()
}
}
......@@ -53,7 +53,11 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable, Testable {
var tmp = TransposeMetalParam.init(realAxis)
tmp.iC = Int32(param.input.dim[param.input.transpose[3]])
tmp.oC = Int32(param.output.dim[3])
if realAxis == [0, 1, 2, 3] {
print("====> transpose! FAST :)")
} else {
print("====> transpose! SLOW :(")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&tmp, length: MemoryLayout<TransposeMetalParam>.size, index: 0)
......
......@@ -20,11 +20,15 @@ class ReshapeParam<P: PrecisionType>: OpParam {
do {
input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
// shape = output.dim
inplace = try ReshapeParam.getAttr(key: "inplace", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let input: Texture<P>
// let shape: [Int]
let inplace: Bool
var output: Texture<P>
}
......
......@@ -45,7 +45,7 @@ public class Texture<P: PrecisionType>: Tensorial {
var metalTexture: MTLTexture!
var transpose: [Int] = [0, 1, 2, 3]
func initTexture(device: MTLDevice, transpose: [Int]) {
func initTexture(device: MTLDevice, transpose: [Int] = [0, 1, 2, 3]) {
let newDim = transpose.map { originDim[$0] }
let newLayout = transpose.map {layout.layoutWithDim[$0] }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册