diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift index c3520723ff02d13d364fe71328f3a6853e0dad33..ec21b5148734f25017ca5580a3d5be09b0a450f7 100644 --- a/metal/paddle-mobile/paddle-mobile/Executor.swift +++ b/metal/paddle-mobile/paddle-mobile/Executor.swift @@ -55,6 +55,7 @@ public class Executor { device = inDevice queue = inQueue for block in inProgram.programDesc.blocks { + //block.ops.count for i in 0.. { throw error } } + // for op in block.ops { // do { // let op = try OpCreator

.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) @@ -94,16 +96,14 @@ public class Executor { } buffer.addCompletedHandler { (commandbuffer) in - for op in self.ops { op.delogOutput() } + let afterDate = Date.init() print(" encoder end ! time: \(afterDate.timeIntervalSince(beforeDate))") - } - buffer.commit() guard let outputVar = program.scope.output() else { diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift index b4d471c68efc2ec79dfbeeb967d60c8fb38e3430..5c78b03f9fc5dafadc60d25975d952271ba3848a 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift @@ -107,17 +107,16 @@ class ConvAddBatchNormReluOp: Operator: Kernel, Computable { if param.filter.width == 1 && param.filter.height == 1 { super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") } else if param.filter.channel == 1 { - super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_1x1") + super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") } else { super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") } @@ -75,7 +75,7 @@ class ConvAddBatchNormReluKernel: Kernel, Computable { encoder.setTexture(param.output.metalTexture, index: 1) encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.setBuffer(param.bias.buffer, offset: 0, index: 2) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) encoder.setBuffer(param.newScale!, offset: 0, index: 3) encoder.setBuffer(param.newBiase!, offset: 0, index: 4) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift index 950abd47f3f98c3f1404c25bd0a572043086df5e..57726c5fb07d4644cce9c37ba6e6eafa81e661b4 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift @@ -15,11 +15,31 @@ import Foundation class ConvAddKernel: Kernel, Computable { + var metalParam: MetalConvParam! required init(device: MTLDevice, param: ConvAddParam

) { super.init(device: device, inFunctionName: "conv_add_1x1") + let offsetX = param.filter.width/2 - Int(param.paddings[0]) + let offsetY = param.filter.height/2 - Int(param.paddings[1]) + print("offset x: \(offsetX)") + print("offset y: \(offsetY)") + + let offsetZ = 0.0 + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) } func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + print("Conv Add compute") + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal index 7286e11d9a618ef6943b5d8462dc3a3e07072e1f..660235eb1431b716c499ca729cf4300cbe228309 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal @@ -66,7 +66,6 @@ struct MetalConvParam { // //} - kernel void conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], constant MetalConvParam ¶m [[buffer(0)]], @@ -120,8 +119,6 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array outTexture.write(output, gid.xy, gid.z); } - - kernel void conv_add_batch_norm_relu_1x1(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], constant MetalConvParam ¶m [[buffer(0)]], @@ -165,14 +162,11 @@ kernel void conv_add_batch_norm_relu_1x1(texture2d_array outTexture.write(output, gid.xy, gid.z); } - kernel void conv_add_1x1(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], constant MetalConvParam ¶m [[buffer(0)]], const device float4 *weights [[buffer(1)]], const device float4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outTexture.get_width() || @@ -210,10 +204,10 @@ kernel void conv_add_1x1(texture2d_array inTexture [[text } -kernel void depthwise_conv_add_batch_norm_relu_1x1(texture2d_array inTexture [[texture(0)]], +kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], constant MetalConvParam ¶m [[buffer(0)]], - const device float4 *weights [[buffer(1)]], + const device float *weights [[buffer(1)]], const device float4 *biase [[buffer(2)]], const device float4 *new_scale [[buffer(3)]], const device float4 *new_biase [[buffer(4)]], @@ -225,11 +219,10 @@ kernel void depthwise_conv_add_batch_norm_relu_1x1(texture2d_array: Tensorial { let cSlices = (C + 3) / 4 let paddedC = cSlices * 4 let count = paddedC * dim[0] * dim[1] * dim[2] - buffer = device.makeBuffer(length: count * MemoryLayout

.stride) if C == paddedC { + buffer = device.makeBuffer(length: count * MemoryLayout

.stride) buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout

.stride) } else if C == 1 { - buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout

.stride) + buffer = device.makeBuffer(length: numel() * MemoryLayout

.stride) + buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout

.stride) } else { + buffer = device.makeBuffer(length: count * MemoryLayout

.stride) var tmpPointer = data.pointer var dstPtr = buffer?.contents().bindMemory(to: P.self, capacity: count) for _ in 0..: Tensorial { } else { fatalError(" not support !") } - data.release() + //TODO: release +// data.release() } var width: Int {