提交 e003756c 编写于 作者: L liuruilong

add half compute

上级 e2337dd2
...@@ -25,7 +25,7 @@ class MobileNet_ssd_hand: Net{ ...@@ -25,7 +25,7 @@ class MobileNet_ssd_hand: Net{
class MobilenetssdPreProccess: CusomKernel { class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3) let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess_half", outputDim: s, usePaddleMobileLib: false)
} }
} }
...@@ -49,9 +49,7 @@ class MobileNet_ssd_hand: Net{ ...@@ -49,9 +49,7 @@ class MobileNet_ssd_hand: Net{
var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.originDim[0], h: score.originDim[1], w: score.originDim[2], c: score.originDim[3])) var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.originDim[0], h: score.originDim[1], w: score.originDim[2], c: score.originDim[3]))
var bboxArr = bbox.metalTexture.floatArray { (f) -> Float32 in var bboxArr = bbox.metalTexture.float32Array()
return f
}
let nmsCompute = NMSCompute.init() let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.01 nmsCompute.scoreThredshold = 0.01
......
...@@ -22,7 +22,7 @@ import MetalPerformanceShaders ...@@ -22,7 +22,7 @@ import MetalPerformanceShaders
class ScaleKernel: CusomKernel { class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape) { init(device: MTLDevice, shape: Shape) {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
} }
} }
...@@ -79,6 +79,8 @@ extension Net { ...@@ -79,6 +79,8 @@ extension Net {
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error" let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (dim.w, dim.h)) { (resTexture) in MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (dim.w, dim.h)) { (resTexture) in
print("after scale")
print(resTexture.float32Array().strideArray())
getTexture(resTexture) getTexture(resTexture)
} }
} }
......
...@@ -112,6 +112,7 @@ class ViewController: UIViewController { ...@@ -112,6 +112,7 @@ class ViewController: UIViewController {
selectImage = UIImage.init(named: "hand.jpg") selectImage = UIImage.init(named: "hand.jpg")
selectImageView.image = selectImage selectImageView.image = selectImage
net.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in net.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture self?.toPredictTexture = texture
} }
} }
......
...@@ -285,6 +285,23 @@ public extension MTLTexture { ...@@ -285,6 +285,23 @@ public extension MTLTexture {
return fArr return fArr
} }
func float32Array() -> [Float32] {
if pixelFormat == .rgba32Float {
let float32Array = floatArray { (f: Float32) -> Float32 in
return f
}
return float32Array
} else if pixelFormat == .rgba16Float {
var float16Array = floatArray { (f: Float16) -> Float16 in
return f
}
return float16To32(input: &float16Array, count: float16Array.count)
} else {
fatalError()
}
}
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? { func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print(header) print(header)
print("texture: \(self)") print("texture: \(self)")
...@@ -385,7 +402,6 @@ public extension MTLTexture { ...@@ -385,7 +402,6 @@ public extension MTLTexture {
// print(self) // print(self)
var textureArray: [Float32] var textureArray: [Float32]
// if texturePrecision == .Float16
if pixelFormat == .rgba32Float { if pixelFormat == .rgba32Float {
textureArray = floatArray { (i : Float32) -> Float32 in textureArray = floatArray { (i : Float32) -> Float32 in
return i return i
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
let testTo = 54 let testTo = 54
let computePrecision: ComputePrecision = .Float32 let computePrecision: ComputePrecision = .Float16
public class ResultHolder<P: PrecisionType> { public class ResultHolder<P: PrecisionType> {
public let dim: [Int] public let dim: [Int]
...@@ -111,10 +111,11 @@ public class Executor<P: PrecisionType> { ...@@ -111,10 +111,11 @@ public class Executor<P: PrecisionType> {
} }
buffer.addCompletedHandler { (commandbuffer) in buffer.addCompletedHandler { (commandbuffer) in
// let inputArr = resInput.floatArray(res: { (p:P) -> P in // let inputArr = resInput.floatArray(res: { (p:P) -> P in
// return p // return p
// }) // })
// print(inputArr.strideArray())
// //
// writeToLibrary(fileName: "genet_input_hand", array: inputArr) // writeToLibrary(fileName: "genet_input_hand", array: inputArr)
// print("write to library done") // print("write to library done")
...@@ -130,7 +131,7 @@ public class Executor<P: PrecisionType> { ...@@ -130,7 +131,7 @@ public class Executor<P: PrecisionType> {
// print(" 第 \(i) 个 op: ") // print(" 第 \(i) 个 op: ")
// op.delogOutput() // op.delogOutput()
// } // }
// //
// return // return
let afterDate = Date.init() let afterDate = Date.init()
......
...@@ -74,9 +74,14 @@ class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P> ...@@ -74,9 +74,14 @@ class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>
// print(" target box ") // print(" target box ")
// print(targetBoxArray.strideArray()) // print(targetBoxArray.strideArray())
let originDim = para.output.originDim let targetBoxOriginDim = para.targetBox.originDim
let targetBoxArray = para.targetBox.metalTexture.realNHWC(dim: (n: targetBoxOriginDim[0], h: targetBoxOriginDim[1], w: targetBoxOriginDim[2], c: targetBoxOriginDim[3]), texturePrecision: computePrecision)
print(" target box ")
print(targetBoxArray.strideArray())
let originDim = para.output.originDim
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
print(" output ")
print(outputArray.strideArray()) print(outputArray.strideArray())
} }
......
...@@ -60,9 +60,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam< ...@@ -60,9 +60,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
} }
func delogOutput() { func delogOutput() {
// para.input.mtlTexture.logDesc() print(" \(type) output: ")
// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true) print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3]), texturePrecision: computePrecision).strideArray())
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
} }
} }
...@@ -117,10 +117,10 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable { ...@@ -117,10 +117,10 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
var newBiaseBuffer: MTLBuffer var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer var newScaleBuffer: MTLBuffer
if computePrecision == .Float16 { if computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)! newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)! newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float32 { } else if computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
......
...@@ -85,7 +85,7 @@ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -85,7 +85,7 @@ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
} }
if computePrecision == .Float16 { if computePrecision == .Float16 {
let buffer = device.makeBuffer(length: outputAspectRatior.count) let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size)
float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count) float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count)
param.newAspectRatios = buffer param.newAspectRatios = buffer
......
...@@ -32,7 +32,14 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -32,7 +32,14 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
} }
required init(device: MTLDevice, param: FeedParam<P>) { required init(device: MTLDevice, param: FeedParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1]) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
super.init(device: device, inFunctionName: "texture2d_to_2d_array") if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array_half")
} else if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
} else {
fatalError()
}
} }
} }
...@@ -44,18 +44,6 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]], ...@@ -44,18 +44,6 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
} }
//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]],
// uint3 gid [[thread_position_in_grid]]) {
// if (gid.x >= inTexture.get_width() ||
// gid.y >= inTexture.get_height()){
// return;
// }
// const half4 input = inTexture.read(gid.xy);
// outTexture.write(input, gid.xy, 0);
//}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]], kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]], texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
...@@ -67,10 +55,9 @@ kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[tex ...@@ -67,10 +55,9 @@ kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[tex
outTexture.write(input, gid.xy, 0); outTexture.write(input, gid.xy, 0);
} }
kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]], kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() || if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){ gid.y >= inTexture.get_height()){
return; return;
...@@ -80,5 +67,3 @@ kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [ ...@@ -80,5 +67,3 @@ kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [
} }
...@@ -100,8 +100,8 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0 ...@@ -100,8 +100,8 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]], kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]], texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]], texture2d_array<half, access::write> varianceTexture [[texture(2)]],
constant PriorBoxMetalParam &param [[buffer(0)]], const device half *aspect_ratios [[buffer(0)]],
const device half *aspect_ratios [[buffer(1)]], constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]], const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() || if (gid.x >= outBoxTexture.get_width() ||
......
...@@ -70,17 +70,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P> ...@@ -70,17 +70,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
func delogOutput() { func delogOutput() {
// output
print(" \(type) output: ") print(" \(type) output: ")
let originDim = para.output.originDim // output
if para.output.transpose == [0, 1, 2, 3] { let outputArray = para.output.metalTexture.float32Array()
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) print(outputArray)
print(outputArray.strideArray()) // output
} else if para.output.transpose == [0, 2, 3, 1] { // print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray()) // let originDim = para.output.originDim
} else { // if para.output.transpose == [0, 1, 2, 3] {
print(" not implement") // let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
} // print(outputArray.strideArray())
// } else if para.output.transpose == [0, 2, 3, 1] {
// print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray())
// } else {
// print(" not implement")
// }
// writeToLibrary(fileName: "box_out", array: outputArray) // writeToLibrary(fileName: "box_out", array: outputArray)
......
...@@ -48,9 +48,15 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam ...@@ -48,9 +48,15 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
func delogOutput() { func delogOutput() {
print(" \(type) output: ") print(" \(type) output: ")
let originDim = para.output.tensorDim let originDim = para.output.originDim
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision) if para.output.transpose == [0, 1, 2, 3] {
print(outputArray.strideArray()) let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册