diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift index 26d9c2cdf64ced43ec459b4a2de8304ff37dc222..722ab6b64c953c1fef28082f75794d9e581251ef 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BoxcoderKernel.swift @@ -34,6 +34,6 @@ class BoxcoderKernel: Kernel, Computable{ required init(device: MTLDevice, param: BoxcoderParam

) { param.output.initTexture(device: device) - super.init(device: device, inFunctionName: "priorbox") + super.init(device: device, inFunctionName: "boxcoder") } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift index 721f231dbb5522aaa496481621d5966391825d83..de77121c1a92b8ec13dade910ab0c37d74048d13 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift @@ -16,124 +16,126 @@ import Foundation import MetalPerformanceShaders struct ConvBNReluTestParam: TestParam { - let inputTexture: MTLTexture - let outputTexture: MTLTexture - var metalParam: MetalConvParam - let filterBuffer: MTLBuffer - let biaseBuffer: MTLBuffer - let newScaleBuffer: MTLBuffer - let newBiaseBuffer: MTLBuffer - let filterSize: (width: Int, height: Int, channel: Int) - init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { - - inputTexture = inInputTexture - outputTexture = inOutputTexture - metalParam = inMetalParam - filterBuffer = inFilterBuffer - biaseBuffer = inBiaseBuffer - newScaleBuffer = inNewScaleBuffer - newBiaseBuffer = inNewBiaseBuffer - filterSize = inFilterSize - } + let inputTexture: MTLTexture + let outputTexture: MTLTexture + var metalParam: MetalConvParam + let filterBuffer: MTLBuffer + let biaseBuffer: MTLBuffer + let newScaleBuffer: MTLBuffer + let newBiaseBuffer: MTLBuffer + let filterSize: (width: Int, height: Int, channel: Int) + init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { + + inputTexture = inInputTexture + outputTexture = inOutputTexture + metalParam = inMetalParam + filterBuffer = inFilterBuffer + biaseBuffer = inBiaseBuffer + newScaleBuffer = inNewScaleBuffer + newBiaseBuffer = inNewBiaseBuffer + filterSize = inFilterSize + } } class ConvBNReluKernel: Kernel, Computable, Testable { - required init(device: MTLDevice, testParam: ConvBNReluTestParam) { - if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") - } else if testParam.filterSize.channel == 1 { - super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") - } else { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") - } + required init(device: MTLDevice, testParam: ConvBNReluTestParam) { + if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1") + } else if testParam.filterSize.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3") + } else { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3") + } + } + + var metalParam: MetalConvParam! + + required init(device: MTLDevice, param: ConvBNReluParam

) { + + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3") + } else { + super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3") + } + param.output.initTexture(device: device, transpose: [0, 2, 3, 1]) + param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) + + param.variance.initBuffer(device: device) + param.mean.initBuffer(device: device) + param.scale.initBuffer(device: device) + param.bias.initBuffer(device: device) + + let offsetX = param.filter.width/2 - Int(param.paddings[0]) + let offsetY = param.filter.height/2 - Int(param.paddings[1]) + + print("offset x: \(offsetX)") + print("offset y: \(offsetY)") + + let offsetZ = 0.0 + + print(" fuck ") + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + + var invs: [P] = [] + let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) + + for i in 0...stride { + let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) + invs.append(P(inv)) } - var metalParam: MetalConvParam! - - required init(device: MTLDevice, param: ConvBNReluParam

) { - - if param.filter.width == 1 && param.filter.height == 1 { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") - } else if param.filter.channel == 1 { - super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") - } else { - super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") - } - param.output.initTexture(device: device, transpose: [0, 2, 3, 1]) - param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - - param.variance.initBuffer(device: device) - param.mean.initBuffer(device: device) - param.scale.initBuffer(device: device) - param.bias.initBuffer(device: device) - - let offsetX = param.filter.width/2 - Int(param.paddings[0]) - let offsetY = param.filter.height/2 - Int(param.paddings[1]) - - print("offset x: \(offsetX)") - print("offset y: \(offsetY)") - - let offsetZ = 0.0 - metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) - - var invs: [P] = [] - let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) - - for i in 0...stride { - let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) - invs.append(P(inv)) - } - - let newScale: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.scale.buffer.length) - let newBiase: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.bias.buffer.length) - - let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self) - let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self) - let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self) - for i in 0...stride { - newScale[i] = invs[i] * scaleContents[i] - newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i] - } - - param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length) - param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length) - - newScale.deinitialize(count: param.scale.buffer.length) - newScale.deallocate() - - newBiase.deinitialize(count: param.bias.buffer.length) - newBiase.deallocate() + let newScale: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.scale.buffer.length) + let newBiase: UnsafeMutablePointer

= UnsafeMutablePointer

.allocate(capacity: param.bias.buffer.length) + + let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self) + let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self) + let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self) + for i in 0...stride { + newScale[i] = invs[i] * scaleContents[i] + newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i] } - func compute(commandBuffer: MTLCommandBuffer, param: ConvBNReluParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.setBuffer(param.newScale!, offset: 0, index: 3) - encoder.setBuffer(param.newBiase!, offset: 0, index: 4) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length) + param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length) + + newScale.deinitialize(count: param.scale.buffer.length) + newScale.deallocate() + + newBiase.deinitialize(count: param.bias.buffer.length) + newBiase.deallocate() + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvBNReluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } - public func test(commandBuffer: MTLCommandBuffer, param: ConvBNReluTestParam) { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - fatalError() - } - - encoder.setTexture(param.inputTexture, index: 0) - encoder.setTexture(param.outputTexture, index: 1) - var inMetalParam = param.metalParam - encoder.setBytes(&inMetalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filterBuffer, offset: 0, index: 1) - encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2) - encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3) - encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4) - encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture) - encoder.endEncoding() + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.newScale!, offset: 0, index: 3) + encoder.setBuffer(param.newBiase!, offset: 0, index: 4) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + public func test(commandBuffer: MTLCommandBuffer, param: ConvBNReluTestParam) { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() } + + encoder.setTexture(param.inputTexture, index: 0) + encoder.setTexture(param.outputTexture, index: 1) + var inMetalParam = param.metalParam + encoder.setBytes(&inMetalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filterBuffer, offset: 0, index: 1) + encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2) + encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3) + encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4) + encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift index 7db6bcca8b013bc8396b879e52ec68a3f2abf20b..91517143819eff9d3564f3315fbf08310d03b081 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift @@ -26,6 +26,6 @@ class MulticlassNMSKernel: Kernel, Computable{ } required init(device: MTLDevice, param: MulticlassNMSParam

) { - super.init(device: device, inFunctionName: "priorbox") + super.init(device: device, inFunctionName: "prior_box") } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift index 6228741ef7c0694dae1c8abf7bdbfb1b7f7b8343..029fdf0a45db067d874757bad38d91f1b6c70d59 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift @@ -33,7 +33,7 @@ class PriorBoxKernel: Kernel, Computable{ var metalParam: PriorBoxMetalParam! required init(device: MTLDevice, param: PriorBoxParam

) { - super.init(device: device, inFunctionName: "priorbox") + super.init(device: device, inFunctionName: "prior_box") param.output.initTexture(device: device, transpose: [2, 0, 1, 3]) param.outputVariances.initTexture(device: device, transpose: [2, 0, 1, 3]) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift index b524c3ac80fac6fa98ac6c9d4e680fee1af4e46a..f82516459ea61bd61b35b953a61a4f75d1bd9629 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift @@ -15,23 +15,24 @@ import Foundation struct Texture2DTo2DArrayParam { - let input: MTLTexture - let output: MTLTexture - let expectDim: Dim + let input: MTLTexture + let output: MTLTexture + let expectDim: Dim } class Texture2DTo2DArrayKernel: Kernel, Computable{ - func compute(commandBuffer: MTLCommandBuffer, param: FeedParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - encoder.setTexture(param.input.mtlTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture) - encoder.endEncoding() - } - - required init(device: MTLDevice, param: FeedParam

) { - super.init(device: device, inFunctionName: "texture2d_to_2d_array") + func compute(commandBuffer: MTLCommandBuffer, param: FeedParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } + encoder.setTexture(param.input.mtlTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: FeedParam

) { + param.output.initTexture(device: device, transpose: [0, 2, 3, 1]) + super.init(device: device, inFunctionName: "texture2d_to_2d_array") + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal index 273c21c1a27862f6dbdddd0fb45bcb2a6cb1488a..5a059d89bcc98d9be658d8f3346f31ad747b68d9 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal @@ -699,3 +699,144 @@ kernel void depthwise_conv_add_3x3(texture2d_array inText outTexture.write(output, gid.xy, gid.z); } +#pragma mark - conv bn relu +kernel void conv_batch_norm_relu_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift index 37215dba591ed84c8df1036fdaee94828f0b5534..7b6817ee06e04947ce53f3dbdbc33ca6e54dedd0 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift @@ -27,7 +27,7 @@ class PriorBoxParam: OpParam { aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs) variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs) flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs) - clip = try PriorBoxParam.getAttr(key: "clop", attrs: opDesc.attrs) + clip = try PriorBoxParam.getAttr(key: "clip", attrs: opDesc.attrs) stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs) stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs) offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs)