diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift index 426e91cfbe26385a7b30931d155513633d2bc988..8947e275597d68237e00cc188586d60dd6866119 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift @@ -38,13 +38,27 @@ class PriorBoxKernel: Kernel, Computable{ param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision) if computePrecision == .Float32 { - super.init(device: device, inFunctionName: "prior_box") + if param.min_max_aspect_ratios_order { + super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder") + } else { + super.init(device: device, inFunctionName: "prior_box") + } + } else if computePrecision == .Float16 { - super.init(device: device, inFunctionName: "prior_box_half") + if param.min_max_aspect_ratios_order { + super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half") + } else { + super.init(device: device, inFunctionName: "prior_box_half") + } } else { fatalError() } + + guard param.minSizes.count == 1 else { + fatalError(" need implement ") + } + let n = 1 let h = param.output.dim[1] let w = param.output.dim[2] diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal index 794f0ea6770688f0468d9ab4f9716adf3e93dd0c..7630febf77210bb364f0191e8b10a5a6923d6c95 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal @@ -161,3 +161,207 @@ kernel void prior_box_half(texture2d_array inTexture [[textu } } + + +kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + const device float *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + + + if (gid.z == 0) { + box_width = box_height = param.minSize / 2; + + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(res, gid.xy, gid.z); + } + + if (gid.z == 1 && param.maxSizeSize > 0) { + + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(res, gid.xy, gid.z); + } + + int aspect_to = 0; + if (param.maxSizeSize > 0) { + aspect_to = gid.z - 2; + } else { + aspect_to = gid.z - 1; + } + + + + + if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) { + + int skip = 0; + for (int i = 0; i < aspect_to + 1; ++i) { + if (fabs(aspect_ratios[i] - 1.) < 1e-6) { + skip += 1; + } + } + aspect_to += skip; + + float ar = aspect_ratios[aspect_to]; + + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(res, gid.xy, gid.z); + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + varianceTexture.write(variances_output, gid.xy, gid.z); + } +} + + +kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + const device half *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + + + if (gid.z == 0) { + box_width = box_height = param.minSize / 2; + + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(half4(res), gid.xy, gid.z); + } + + if (gid.z == 1 && param.maxSizeSize > 0) { + + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(half4(res), gid.xy, gid.z); + } + + int aspect_to = 0; + if (param.maxSizeSize > 0) { + aspect_to = gid.z - 2; + } else { + aspect_to = gid.z - 1; + } + + if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) { + float ar = aspect_ratios[aspect_to]; + + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = fmin(fmax(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(half4(res), gid.xy, gid.z); + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + varianceTexture.write(half4(variances_output), gid.xy, gid.z); + } +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift index 72665c471ff0d1b1a2b7966f45afbf847438138b..c681eadcd19ab4f091d1fbb5e2e315492e0b8a49 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift @@ -17,6 +17,11 @@ import Foundation class PriorBoxParam: OpParam { typealias ParamPrecisionType = P required init(opDesc: OpDesc, inScope: Scope) throws { + do { + min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs) + } catch _ { + } + do { input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope) output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope) @@ -36,6 +41,7 @@ class PriorBoxParam: OpParam { } } + var min_max_aspect_ratios_order: Bool = false let minSizes: [Float32] let maxSizes: [Float32] let aspectRatios: [Float32] @@ -74,17 +80,20 @@ class PriorBoxOp: Operator, PriorBoxParam

// output // let outputArray = para.output.metalTexture.float32Array() // print(outputArray.strideArray()) - let device = para.input.metalTexture!.device - let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3]) - let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3]) - print("boxes: ") - print(boxes.strideArray()) - print("variances: ") - print(variances.strideArray()) +// let device = para.input.metalTexture!.device +// let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3]) +// let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3]) +// print("boxes: ") +// print(boxes.strideArray()) +// print("variances: ") +// print(variances.strideArray()) // output print(" \(type) output: ") - print(para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3])).strideArray()) + let box = para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3])) + print(" dim: \(para.output.dim)") + print(box.strideArray()) +// print((0..