提交 ed8fe141 编写于 作者: L liuruilong

add half compute

上级 8a3867e1
......@@ -25,7 +25,7 @@ class MobileNet_ssd_hand: Net{
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess_half", outputDim: s, usePaddleMobileLib: false)
}
}
......@@ -49,9 +49,7 @@ class MobileNet_ssd_hand: Net{
var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.originDim[0], h: score.originDim[1], w: score.originDim[2], c: score.originDim[3]))
var bboxArr = bbox.metalTexture.floatArray { (f) -> Float32 in
return f
}
var bboxArr = bbox.metalTexture.float32Array()
let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.01
......
......@@ -22,7 +22,7 @@ import MetalPerformanceShaders
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape) {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
}
}
......@@ -79,6 +79,8 @@ extension Net {
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (dim.w, dim.h)) { (resTexture) in
print("after scale")
print(resTexture.float32Array().strideArray())
getTexture(resTexture)
}
}
......
......@@ -112,6 +112,7 @@ class ViewController: UIViewController {
selectImage = UIImage.init(named: "hand.jpg")
selectImageView.image = selectImage
net.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
}
......
......@@ -285,6 +285,23 @@ public extension MTLTexture {
return fArr
}
func float32Array() -> [Float32] {
if pixelFormat == .rgba32Float {
let float32Array = floatArray { (f: Float32) -> Float32 in
return f
}
return float32Array
} else if pixelFormat == .rgba16Float {
var float16Array = floatArray { (f: Float16) -> Float16 in
return f
}
return float16To32(input: &float16Array, count: float16Array.count)
} else {
fatalError()
}
}
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print(header)
print("texture: \(self)")
......@@ -385,7 +402,6 @@ public extension MTLTexture {
// print(self)
var textureArray: [Float32]
// if texturePrecision == .Float16
if pixelFormat == .rgba32Float {
textureArray = floatArray { (i : Float32) -> Float32 in
return i
......
......@@ -16,7 +16,7 @@ import Foundation
let testTo = 54
let computePrecision: ComputePrecision = .Float32
let computePrecision: ComputePrecision = .Float16
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
......@@ -111,10 +111,11 @@ public class Executor<P: PrecisionType> {
}
buffer.addCompletedHandler { (commandbuffer) in
// let inputArr = resInput.floatArray(res: { (p:P) -> P in
// return p
// })
// print(inputArr.strideArray())
//
// writeToLibrary(fileName: "genet_input_hand", array: inputArr)
// print("write to library done")
......@@ -130,7 +131,7 @@ public class Executor<P: PrecisionType> {
// print(" 第 \(i) 个 op: ")
// op.delogOutput()
// }
//
//
// return
let afterDate = Date.init()
......
......@@ -74,9 +74,14 @@ class BoxcoderOp<P: PrecisionType>: Operator<BoxcoderKernel<P>, BoxcoderParam<P>
// print(" target box ")
// print(targetBoxArray.strideArray())
let originDim = para.output.originDim
let targetBoxOriginDim = para.targetBox.originDim
let targetBoxArray = para.targetBox.metalTexture.realNHWC(dim: (n: targetBoxOriginDim[0], h: targetBoxOriginDim[1], w: targetBoxOriginDim[2], c: targetBoxOriginDim[3]), texturePrecision: computePrecision)
print(" target box ")
print(targetBoxArray.strideArray())
let originDim = para.output.originDim
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
print(" output ")
print(outputArray.strideArray())
}
......
......@@ -60,9 +60,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
}
func delogOutput() {
// para.input.mtlTexture.logDesc()
// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3]), texturePrecision: computePrecision).strideArray())
}
}
......@@ -117,10 +117,10 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer
if computePrecision == .Float16 {
if computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float32 {
} else if computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
......
......@@ -85,7 +85,7 @@ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
}
if computePrecision == .Float16 {
let buffer = device.makeBuffer(length: outputAspectRatior.count)
let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size)
float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count)
param.newAspectRatios = buffer
......
......@@ -32,7 +32,14 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
}
required init(device: MTLDevice, param: FeedParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array_half")
} else if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
} else {
fatalError()
}
}
}
......@@ -44,18 +44,6 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
}
//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]],
// uint3 gid [[thread_position_in_grid]]) {
// if (gid.x >= inTexture.get_width() ||
// gid.y >= inTexture.get_height()){
// return;
// }
// const half4 input = inTexture.read(gid.xy);
// outTexture.write(input, gid.xy, 0);
//}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
......@@ -67,10 +55,9 @@ kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[tex
outTexture.write(input, gid.xy, 0);
}
kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
......@@ -80,5 +67,3 @@ kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [
}
......@@ -100,8 +100,8 @@ kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0
kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
constant PriorBoxMetalParam &param [[buffer(0)]],
const device half *aspect_ratios [[buffer(1)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
......
......@@ -70,17 +70,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
func delogOutput() {
// output
print(" \(type) output: ")
let originDim = para.output.originDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray())
} else {
print(" not implement")
}
// output
let outputArray = para.output.metalTexture.float32Array()
print(outputArray)
// output
// print(" \(type) output: ")
// let originDim = para.output.originDim
// if para.output.transpose == [0, 1, 2, 3] {
// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
// print(outputArray.strideArray())
// } else if para.output.transpose == [0, 2, 3, 1] {
// print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray())
// } else {
// print(" not implement")
// }
// writeToLibrary(fileName: "box_out", array: outputArray)
......
......@@ -48,9 +48,15 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
func delogOutput() {
print(" \(type) output: ")
let originDim = para.output.tensorDim
let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
print(outputArray.strideArray())
let originDim = para.output.originDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册