From e003756c8752b1e648652b66712f3a304efc0a2d Mon Sep 17 00:00:00 2001 From: liuruilong Date: Fri, 31 Aug 2018 19:38:07 +0800 Subject: [PATCH] add half compute --- .../paddle-mobile-demo/Net/MobileNetSSD.swift | 6 ++--- .../paddle-mobile-demo/Net/Net.swift | 4 +++- .../paddle-mobile-demo/ViewController.swift | 1 + .../paddle-mobile/Common/MetalExtension.swift | 18 +++++++++++++- .../paddle-mobile/Executor.swift | 7 +++--- .../paddle-mobile/Operators/BoxcoderOp.swift | 7 +++++- .../paddle-mobile/Operators/FeedOp.swift | 5 ++-- .../Operators/Kernels/ConvBNReluKernel.swift | 4 ++-- .../Operators/Kernels/PriorBoxKernel.swift | 2 +- .../Kernels/Texture2DTo2DArrayKernel.swift | 11 +++++++-- .../Operators/Kernels/metal/Kernels.metal | 19 ++------------- .../Kernels/metal/PriorBoxKernel.metal | 4 ++-- .../paddle-mobile/Operators/PriorBoxOp.swift | 24 +++++++++++-------- .../paddle-mobile/Operators/TransposeOp.swift | 12 +++++++--- 14 files changed, 74 insertions(+), 50 deletions(-) diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MobileNetSSD.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MobileNetSSD.swift index 322a6b9f67..888df15716 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MobileNetSSD.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/MobileNetSSD.swift @@ -25,7 +25,7 @@ class MobileNet_ssd_hand: Net{ class MobilenetssdPreProccess: CusomKernel { init(device: MTLDevice) { let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3) - super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false) + super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess_half", outputDim: s, usePaddleMobileLib: false) } } @@ -49,9 +49,7 @@ class MobileNet_ssd_hand: Net{ var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.originDim[0], h: score.originDim[1], w: score.originDim[2], c: score.originDim[3])) - var bboxArr = bbox.metalTexture.floatArray { (f) -> Float32 in - return f - } + var bboxArr = bbox.metalTexture.float32Array() let nmsCompute = NMSCompute.init() nmsCompute.scoreThredshold = 0.01 diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/Net.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/Net.swift index 0b8b428c73..b12b57d2dd 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/Net.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/Net.swift @@ -22,7 +22,7 @@ import MetalPerformanceShaders class ScaleKernel: CusomKernel { init(device: MTLDevice, shape: Shape) { - super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false) + super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false) } } @@ -79,6 +79,8 @@ extension Net { func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error" MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (dim.w, dim.h)) { (resTexture) in + print("after scale") + print(resTexture.float32Array().strideArray()) getTexture(resTexture) } } diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift index a9fd4dd515..d2f68d3725 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift @@ -112,6 +112,7 @@ class ViewController: UIViewController { selectImage = UIImage.init(named: "hand.jpg") selectImageView.image = selectImage net.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in + self?.toPredictTexture = texture } } diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift index 352d036e00..6cf5d789eb 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift @@ -285,6 +285,23 @@ public extension MTLTexture { return fArr } + func float32Array() -> [Float32] { + if pixelFormat == .rgba32Float { + let float32Array = floatArray { (f: Float32) -> Float32 in + return f + } + return float32Array + } else if pixelFormat == .rgba16Float { + + var float16Array = floatArray { (f: Float16) -> Float16 in + return f + } + return float16To32(input: &float16Array, count: float16Array.count) + } else { + fatalError() + } + } + func logDesc(header: String = "", stridable: Bool = true) -> T? { print(header) print("texture: \(self)") @@ -385,7 +402,6 @@ public extension MTLTexture { // print(self) var textureArray: [Float32] - // if texturePrecision == .Float16 if pixelFormat == .rgba32Float { textureArray = floatArray { (i : Float32) -> Float32 in return i diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift index efdc0b2164..bebf414301 100644 --- a/metal/paddle-mobile/paddle-mobile/Executor.swift +++ b/metal/paddle-mobile/paddle-mobile/Executor.swift @@ -16,7 +16,7 @@ import Foundation let testTo = 54 -let computePrecision: ComputePrecision = .Float32 +let computePrecision: ComputePrecision = .Float16 public class ResultHolder { public let dim: [Int] @@ -111,10 +111,11 @@ public class Executor { } buffer.addCompletedHandler { (commandbuffer) in - + // let inputArr = resInput.floatArray(res: { (p:P) -> P in // return p // }) +// print(inputArr.strideArray()) // // writeToLibrary(fileName: "genet_input_hand", array: inputArr) // print("write to library done") @@ -130,7 +131,7 @@ public class Executor { // print(" 第 \(i) 个 op: ") // op.delogOutput() // } -// +// // return let afterDate = Date.init() diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift index 1bf5cde92e..193a271ccf 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift @@ -74,9 +74,14 @@ class BoxcoderOp: Operator, BoxcoderParam

// print(" target box ") // print(targetBoxArray.strideArray()) - let originDim = para.output.originDim + let targetBoxOriginDim = para.targetBox.originDim + let targetBoxArray = para.targetBox.metalTexture.realNHWC(dim: (n: targetBoxOriginDim[0], h: targetBoxOriginDim[1], w: targetBoxOriginDim[2], c: targetBoxOriginDim[3]), texturePrecision: computePrecision) + print(" target box ") + print(targetBoxArray.strideArray()) + let originDim = para.output.originDim let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) + print(" output ") print(outputArray.strideArray()) } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift index 59e5443a0f..b6075a807d 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift @@ -60,9 +60,8 @@ class FeedOp: Operator, FeedParam< } func delogOutput() { - // para.input.mtlTexture.logDesc() - // let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true) - // let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false) + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3]), texturePrecision: computePrecision).strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift index 350c81cece..cd528bb588 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift @@ -117,10 +117,10 @@ class ConvBNReluKernel: Kernel, Computable, Testable { var newBiaseBuffer: MTLBuffer var newScaleBuffer: MTLBuffer - if computePrecision == .Float16 { + if computePrecision == .Float32 { newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)! newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)! - } else if computePrecision == .Float32 { + } else if computePrecision == .Float16 { newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift index 08a489ab22..ece3e3915d 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift @@ -85,7 +85,7 @@ class PriorBoxKernel: Kernel, Computable{ } if computePrecision == .Float16 { - let buffer = device.makeBuffer(length: outputAspectRatior.count) + let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout.size) float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count) param.newAspectRatios = buffer diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift index 8554beea2b..0943686660 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift @@ -32,7 +32,14 @@ class Texture2DTo2DArrayKernel: Kernel, Computable{ } required init(device: MTLDevice, param: FeedParam

) { - param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1]) - super.init(device: device, inFunctionName: "texture2d_to_2d_array") + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "texture2d_to_2d_array_half") + } else if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "texture2d_to_2d_array") + } else { + fatalError() + } + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal index 8bce97f21e..368509f001 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal @@ -44,18 +44,6 @@ kernel void resize(texture2d inTexture [[texture(0)]], } - -//kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], -// texture2d_array outTexture [[texture(1)]], -// uint3 gid [[thread_position_in_grid]]) { -// if (gid.x >= inTexture.get_width() || -// gid.y >= inTexture.get_height()){ -// return; -// } -// const half4 input = inTexture.read(gid.xy); -// outTexture.write(input, gid.xy, 0); -//} - kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], uint3 gid [[thread_position_in_grid]]) { @@ -67,10 +55,9 @@ kernel void texture2d_to_2d_array(texture2d inTexture [[tex outTexture.write(input, gid.xy, 0); } - kernel void texture2d_to_2d_array_half(texture2d inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { if (gid.x >= inTexture.get_width() || gid.y >= inTexture.get_height()){ return; @@ -80,5 +67,3 @@ kernel void texture2d_to_2d_array_half(texture2d inTexture [ } - - diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal index 4107f0e308..794f0ea677 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal @@ -100,8 +100,8 @@ kernel void prior_box(texture2d_array inTexture [[texture(0 kernel void prior_box_half(texture2d_array inTexture [[texture(0)]], texture2d_array outBoxTexture [[texture(1)]], texture2d_array varianceTexture [[texture(2)]], - constant PriorBoxMetalParam ¶m [[buffer(0)]], - const device half *aspect_ratios [[buffer(1)]], + const device half *aspect_ratios [[buffer(0)]], + constant PriorBoxMetalParam ¶m [[buffer(1)]], const device float4 *variances [[buffer(2)]], uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outBoxTexture.get_width() || diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift index 6999043ab7..d48fc4cd1c 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift @@ -70,17 +70,21 @@ class PriorBoxOp: Operator, PriorBoxParam

func delogOutput() { - // output print(" \(type) output: ") - let originDim = para.output.originDim - if para.output.transpose == [0, 1, 2, 3] { - let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) - print(outputArray.strideArray()) - } else if para.output.transpose == [0, 2, 3, 1] { - print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray()) - } else { - print(" not implement") - } + // output + let outputArray = para.output.metalTexture.float32Array() + print(outputArray) + // output +// print(" \(type) output: ") +// let originDim = para.output.originDim +// if para.output.transpose == [0, 1, 2, 3] { +// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) +// print(outputArray.strideArray()) +// } else if para.output.transpose == [0, 2, 3, 1] { +// print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray()) +// } else { +// print(" not implement") +// } // writeToLibrary(fileName: "box_out", array: outputArray) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift index dc658793a3..8281ba5433 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift @@ -48,9 +48,15 @@ class TransposeOp: Operator, TransposeParam func delogOutput() { print(" \(type) output: ") - let originDim = para.output.tensorDim - let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) - print(outputArray.strideArray()) + let originDim = para.output.originDim + if para.output.transpose == [0, 1, 2, 3] { + let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3])) + print(outputArray.strideArray()) + } else if para.output.transpose == [0, 2, 3, 1] { + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + } else { + print(" not implement") + } } } -- GitLab