diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift index afbdccce5d54eff69d07ce7546679cf3781a27d2..01c9c6c1fc277be1ed5fa6ace6774fc7f03f2de9 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift @@ -145,7 +145,7 @@ extension MTLDevice { if value.count > 0 { var rcount: Int = (ndim[0] * ndim[3] + 3) / 4 rcount = rcount * 4 * ndim[1] * ndim[2] - var nvalue: [P] = .init(repeating: Float32(0.0) as! P, count: rcount) + var nvalue: [Float32] = .init(repeating: 0.0, count: rcount) for i0 in 0.. = UnsafeMutablePointer(mutating: nvalue) let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1)) - let bpR = ndim[2] * 4 * MemoryLayout

.size - let bpI = ndim[1] * bpR - for i in 0.. = UnsafeMutablePointer(mutating: nvalue) + let outputP: UnsafeMutablePointer = UnsafeMutablePointer(mutating: xvalue) + float32ToFloat16(input: pointer, output: outputP, count: rcount) + let bpR = ndim[2] * 4 * 2 + let bpI = ndim[1] * bpR + for i in 0.. = UnsafeMutablePointer(mutating: nvalue) + let bpR = ndim[2] * 4 * MemoryLayout

.size + let bpI = ndim[1] * bpR + for i in 0..: OpParam { let device = inputX.metalTexture!.device inputY = Texture.init(device: device, inDim: tensorY.dim) let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel())) - inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims) + inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision) } +// required init(device: MTLDevice, param: ElementwiseAddParam

) { +// param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) +// if computePrecision == .Float32 { +// super.init(device: device, inFunctionName: "elementwise_add") +// } else if computePrecision == .Float16 { +// super.init(device: device, inFunctionName: "elementwise_add_half") +// } else { +// fatalError() +// } +// } + var offset = axis if axis == -1 { offset = inputX.tensorDim.cout() - inputY.tensorDim.cout() @@ -65,14 +76,8 @@ class ElementwiseAddOp: Operator, Elem } func delogOutput() { -// print(" \(type) inputX: ") -// print(para.inputX.metalTexture.toTensor(dim: (n: para.inputX.tensorDim[0], c: para.inputX.tensorDim[1], h: para.inputX.tensorDim[2], w: para.inputX.tensorDim[3])).strideArray()) -// print(" \(type) inputY: ") -// print(para.inputY.metalTexture.toTensor(dim: (n: para.inputY.tensorDim[0], c: para.inputY.tensorDim[1], h: para.inputY.tensorDim[2], w: para.inputY.tensorDim[3])).strideArray()) - print(" \(type) output: ") - - print(para.inputY) + print(para.output) let padToFourDim = para.output.padToFourDim if para.output.transpose == [0, 1, 2, 3] { diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift index a4c88016b3a442d8d6937214755db9e33e9cb28f..d67876d307ce30a6c31f5d1e09eb2f960da1e0b5 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift @@ -15,10 +15,9 @@ import Foundation struct ElementwiseAddMetalParam { - var unsafe_one_dim: Int32 = 0 var fast: Int32 = 0 var axis: Int32 = 0 - var yoff: Int32 = 0 + var ylen: Int32 = 0 var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) var xtrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) var ydim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) @@ -60,17 +59,12 @@ class ElementwiseAddKernel: Kernel, Computable { } else { emp.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis) } - emp.yoff = 4 - Int32(param.inputY.tensorDim.cout()) + emp.ylen = Int32(param.inputY.tensorDim.cout()) if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { // print("===> elementwise_add fast!!!") emp.fast = 1 } - // TODO: - if param.inputY.tensorDim.cout() == 1 { - emp.unsafe_one_dim = 1; - } - encoder.setBytes(&emp, length: MemoryLayout.size, index: 0) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.endEncoding() diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift index 3916c07ce5e8d4f3179a8a3100563a77e68eb53b..6967b5ed72ce18f500305dfe1fdc0e3d3f6ecfd4 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift @@ -50,9 +50,15 @@ class ReshapeKernel: Kernel, Computable{ encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) - let id: [Int32] = (0..<4).map { Int32(param.input.dim[$0]) } + var id: [Int32] = [1, 1, 1, 1] + for i in 0.. inputX [[textur gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; float4 rx, ry; - if (pm.unsafe_one_dim == 1) { - rx = inputX.read(gid.xy, gid.z); - ry = inputY.read(uint2(0, 0), gid.z); - } else if (pm.fast == 1) { + + if (pm.fast == 1) { rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); } else { rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; - int32_t y_abcd[4] = {1, 1, 1, 1}, y_xyzn[4]; + int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; + int32_t yshift = 4 - pm.ylen - pm.axis; for (int n = 0; n < 4; n++) { + x_xyzn[3] = n; xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); invtrans(xtrans, x_abcd, t_abcd); - for (int k = pm.axis; k < (4 - pm.yoff); k++) { - y_abcd[k+pm.yoff] = t_abcd[k]; + for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { + y_abcd[yshift+k] = t_abcd[k]; } trans(ytrans, y_abcd, t_abcd); abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); @@ -73,23 +72,22 @@ kernel void elementwise_add_half(texture2d_array inputX [[te gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; half4 rx, ry; - if (pm.unsafe_one_dim == 1) { - rx = inputX.read(gid.xy, gid.z); - ry = inputY.read(uint2(0, 0), gid.z); - } else if (pm.fast == 1) { + + if (pm.fast == 1) { rx = inputX.read(gid.xy, gid.z); ry = inputY.read(gid.xy, gid.z); } else { rx = inputX.read(gid.xy, gid.z); int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; - int32_t y_abcd[4] = {1, 1, 1, 1}, y_xyzn[4]; + int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4]; int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; + int32_t yshift = 4 - pm.ylen - pm.axis; for (int n = 0; n < 4; n++) { xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); invtrans(xtrans, x_abcd, t_abcd); - for (int k = pm.axis; k < (4 - pm.yoff); k++) { - y_abcd[k+pm.yoff] = t_abcd[k]; + for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) { + y_abcd[yshift+k] = t_abcd[k]; } trans(ytrans, y_abcd, t_abcd); abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift index b37026d6a81c6eed4493953a84afc05d4a6b980f..3fd9ebfb883d43c51b5ede4f4c6d91b8a59cbeda 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift @@ -73,10 +73,13 @@ class ReshapeOp: Operator, ReshapeParam

>, func delogOutput() { print("reshape delog") // let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false) - +// +// let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false) let padToFourDim = para.output.padToFourDim let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) +// print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray()) + print(outputArray.strideArray()) }