.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
-// op.inferShape()
ops.append(op)
} catch let error {
throw error
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift
index eaa596e071b7628339be185a7e3599a370763041..1bf5cde92eba79dbd8be2ca8cbd17e0398c428d5 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BoxcoderOp.swift
@@ -58,28 +58,26 @@ class BoxcoderOp
}
func delogOutput() {
-
print(" \(type) output: ")
- let priorBoxOriginDim = para.priorBox.originDim
- let priorBoxArray = para.priorBox.metalTexture.realNHWC(dim: (n: priorBoxOriginDim[0], h: priorBoxOriginDim[1], w: priorBoxOriginDim[2], c: priorBoxOriginDim[3]))
- print(" prior box ")
- print(priorBoxArray.strideArray())
-
- let priorBoxVarOriginDim = para.priorBoxVar.originDim
- let priorBoxVarArray = para.priorBoxVar.metalTexture.realNHWC(dim: (n: priorBoxVarOriginDim[0], h: priorBoxVarOriginDim[1], w: priorBoxVarOriginDim[2], c: priorBoxVarOriginDim[3]))
- print(" prior box var ")
- print(priorBoxVarArray.strideArray())
-
- let targetBoxOriginDim = para.targetBox.originDim
- let targetBoxArray = para.targetBox.metalTexture.realNHWC(dim: (n: targetBoxOriginDim[0], h: targetBoxOriginDim[1], w: targetBoxOriginDim[2], c: targetBoxOriginDim[3]))
- print(" target box ")
- print(targetBoxArray.strideArray())
+// let priorBoxOriginDim = para.priorBox.originDim
+// let priorBoxArray: [Float32] = para.priorBox.metalTexture.realNHWC(dim: (n: priorBoxOriginDim[0], h: priorBoxOriginDim[1], w: priorBoxOriginDim[2], c: priorBoxOriginDim[3]))
+// print(" prior box ")
+// print(priorBoxArray.strideArray())
+//
+// let priorBoxVarOriginDim = para.priorBoxVar.originDim
+// let priorBoxVarArray: [Float32] = para.priorBoxVar.metalTexture.realNHWC(dim: (n: priorBoxVarOriginDim[0], h: priorBoxVarOriginDim[1], w: priorBoxVarOriginDim[2], c: priorBoxVarOriginDim[3]))
+// print(" prior box var ")
+// print(priorBoxVarArray.strideArray())
+//
+// let targetBoxOriginDim = para.targetBox.originDim
+// let targetBoxArray: [Float32] = para.targetBox.metalTexture.realNHWC(dim: (n: targetBoxOriginDim[0], h: targetBoxOriginDim[1], w: targetBoxOriginDim[2], c: targetBoxOriginDim[3]))
+// print(" target box ")
+// print(targetBoxArray.strideArray())
let originDim = para.output.originDim
- let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
+ let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
print(outputArray.strideArray())
-
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
index 117fd8c39fadec55b88cc1aeaf3b91e5a0dd966d..aac56ef4f9ffd9711791c3bc7e2ca11702fc4e7b 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
@@ -66,9 +66,16 @@ class ConcatOp >, Run
func delogOutput() {
print(" \(type) output: ")
let originDim = para.output.originDim
- let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
- print(outputArray.strideArray())
- print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
+
+ if para.output.transpose == [0, 1, 2, 3] {
+ let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
+ print(outputArray.strideArray())
+ } else if para.output.transpose == [0, 2, 3, 1] {
+ print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray())
+ } else {
+ fatalError(" not implemet")
+ }
+
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
index 6f67014444e5ef82fe4cdc30f99bc371fef2d417..7bced214bd11bfef61eb405d59073f004e765e03 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
@@ -125,13 +125,6 @@ class ConvAddBatchNormReluOp >, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddOp
-
-
-
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
@@ -64,7 +61,6 @@ class ConvAddOp >,
return gConvAddType
}
-
func inferShape() {
let inDims = para.input.dim
@@ -101,10 +97,8 @@ class ConvAddOp >,
print(para.stride)
print("dilations: ")
print(para.dilations)
-
-
-
print(" \(type) output: ")
- print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
+
+ print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3]), texturePrecision: computePrecision).strideArray())
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift
index 3c521a2210614550577369c603dbbdc5e2cb6692..be8c57d3ace01dabd652e0e80a43c5a053213e28 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvBNReluOp.swift
@@ -110,7 +110,7 @@ class ConvBNReluOp >, Runa
func delogOutput() {
print(" \(type) output: ")
- print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3])).strideArray())
+ print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3]), texturePrecision: computePrecision).strideArray())
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift
index 16a42d5c7b24e7b3a26cab35f68decd226076876..0ea8a62c5c0bf30da200add2a96410136d2f40fb 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/DwConvBNReluOp.swift
@@ -65,6 +65,6 @@ class DwConvBNReluOp ) throws {
- guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
- throw PaddleMobileError.predictError(message: " encode is nil")
- }
- encoder.setTexture(param.priorBox.metalTexture, index: 0)
- encoder.setTexture(param.priorBoxVar.metalTexture, index: 1)
- encoder.setTexture(param.targetBox.metalTexture, index: 2)
- encoder.setTexture(param.output.metalTexture, index: 3)
- var bmp = BoxcoderMetalParam.init()
- encoder.setBytes(&bmp, length: MemoryLayout ) throws {
+ guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+ throw PaddleMobileError.predictError(message: " encode is nil")
}
-
- required init(device: MTLDevice, param: BoxcoderParam ) {
- param.output.initTexture(device: device)
- super.init(device: device, inFunctionName: "boxcoder")
+ encoder.setTexture(param.priorBox.metalTexture, index: 0)
+ encoder.setTexture(param.priorBoxVar.metalTexture, index: 1)
+ encoder.setTexture(param.targetBox.metalTexture, index: 2)
+ encoder.setTexture(param.output.metalTexture, index: 3)
+ var bmp = BoxcoderMetalParam.init()
+ encoder.setBytes(&bmp, length: MemoryLayout ) {
+ param.output.initTexture(device: device, computePrecision: computePrecision)
+ if computePrecision == .Float32 {
+ super.init(device: device, inFunctionName: "boxcoder")
+ } else if computePrecision == .Float16 {
+ super.init(device: device, inFunctionName: "boxcoder_half")
+ } else {
+ fatalError()
}
+ }
+
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift
index 60f1437e7fabf0ae088b41f37cc01e2981cbf236..644476ad9dbb471786611fe25a30ed9c4833edbd 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift
@@ -121,8 +121,14 @@ class ConcatKernel ) {
- param.output.initTexture(device: device, inTranspose: param.transpose)
- super.init(device: device, inFunctionName: "concat")
+ param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: computePrecision)
+ if computePrecision == .Float32 {
+ super.init(device: device, inFunctionName: "concat")
+ } else if computePrecision == .Float16 {
+ super.init(device: device, inFunctionName: "concat_half")
+ } else {
+ fatalError()
+ }
}
required init(device: MTLDevice, testParam: ConcatTestParam) {
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
index eabadc9d44e7b98fccb0f87e73dd2ffd8da931d7..092207cfb7b9fda63cd6b5aa7082640bae515149 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
@@ -15,127 +15,155 @@
import Foundation
struct ConvAddBatchNormReluTestParam: TestParam {
- let inputTexture: MTLTexture
- let outputTexture: MTLTexture
- var metalParam: MetalConvParam
- let filterBuffer: MTLBuffer
- let biaseBuffer: MTLBuffer
- let newScaleBuffer: MTLBuffer
- let newBiaseBuffer: MTLBuffer
- let filterSize: (width: Int, height: Int, channel: Int)
- init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
- inputTexture = inInputTexture
- outputTexture = inOutputTexture
- metalParam = inMetalParam
- filterBuffer = inFilterBuffer
- biaseBuffer = inBiaseBuffer
- newScaleBuffer = inNewScaleBuffer
- newBiaseBuffer = inNewBiaseBuffer
- filterSize = inFilterSize
- }
+ let inputTexture: MTLTexture
+ let outputTexture: MTLTexture
+ var metalParam: MetalConvParam
+ let filterBuffer: MTLBuffer
+ let biaseBuffer: MTLBuffer
+ let newScaleBuffer: MTLBuffer
+ let newBiaseBuffer: MTLBuffer
+ let filterSize: (width: Int, height: Int, channel: Int)
+ init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
+ inputTexture = inInputTexture
+ outputTexture = inOutputTexture
+ metalParam = inMetalParam
+ filterBuffer = inFilterBuffer
+ biaseBuffer = inBiaseBuffer
+ newScaleBuffer = inNewScaleBuffer
+ newBiaseBuffer = inNewBiaseBuffer
+ filterSize = inFilterSize
+ }
}
class ConvAddBatchNormReluKernel ) {
- var metalParam: MetalConvParam!
-
- required init(device: MTLDevice, param: ConvAddBatchNormReluParam ) {
-
- param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
-
- if param.filter.width == 1 && param.filter.height == 1 {
- super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
- } else if param.filter.channel == 1 {
- super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
- } else {
- super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
- }
-
- param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
- param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
- param.variance.initBuffer(device: device)
- param.mean.initBuffer(device: device)
- param.scale.initBuffer(device: device)
- param.bias.initBuffer(device: device)
-
-
- let offsetX = param.filter.width/2 - Int(param.paddings[0])
- let offsetY = param.filter.height/2 - Int(param.paddings[1])
-
- print("offset x: \(offsetX)")
- print("offset y: \(offsetY)")
-
- let offsetZ = 0.0
- metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
-
- var invs: [P] = []
- let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
-
- for i in 0.. = UnsafeMutablePointer .allocate(capacity: param.scale.buffer.length)
- let newBiase: UnsafeMutablePointer = UnsafeMutablePointer .allocate(capacity: param.bias.buffer.length)
-
- let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
- let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
- let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
- for i in 0.. ) throws {
- guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
- throw PaddleMobileError.predictError(message: " encode is nil")
- }
+ param.filter.initBuffer(device: device, precision: computePrecision)
+
+ param.y.initBuffer(device: device, precision: computePrecision)
+
+ param.variance.initBuffer(device: device, precision: .Float32)
+ param.mean.initBuffer(device: device, precision: .Float32)
+ param.scale.initBuffer(device: device, precision: .Float32)
+ param.bias.initBuffer(device: device, precision: .Float32)
+
+
+ let offsetX = param.filter.width/2 - Int(param.paddings[0])
+ let offsetY = param.filter.height/2 - Int(param.paddings[1])
+
+ print("offset x: \(offsetX)")
+ print("offset y: \(offsetY)")
+
+ let offsetZ = 0.0
+ metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
+
+ var invs: [P] = []
+ let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
+
+ for i in 0.. = UnsafeMutablePointer .allocate(capacity: param.scale.buffer.length)
+ let newBiase: UnsafeMutablePointer = UnsafeMutablePointer .allocate(capacity: param.bias.buffer.length)
+
+ let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
+ let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
+ let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
+ for i in 0.. .size)
+
+
+// let newBiaseFloat16 = device.makeBuffer(length: <#T##Int#>, options: <#T##MTLResourceOptions#>)
+
+ var newBiaseBuffer: MTLBuffer
+ var newScaleBuffer: MTLBuffer
+
+ if computePrecision == .Float16 {
+ newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
+ newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
+ } else if computePrecision == .Float32 {
-
- encoder.setTexture(param.input.metalTexture, index: 0)
- encoder.setTexture(param.output.metalTexture, index: 1)
- encoder.setBytes(&metalParam, length: MemoryLayout .size)
+
+ float32ToFloat16(input: newScale as! UnsafeMutablePointer .size)
+ } else {
+ fatalError(" unsupport ")
}
- public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
- guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
- fatalError()
- }
-
- encoder.setTexture(param.inputTexture, index: 0)
- encoder.setTexture(param.outputTexture, index: 1)
- var inMetalParam = param.metalParam
- encoder.setBytes(&inMetalParam, length: MemoryLayout ) throws {
+ guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+ throw PaddleMobileError.predictError(message: " encode is nil")
}
+
+
+ encoder.setTexture(param.input.metalTexture, index: 0)
+ encoder.setTexture(param.output.metalTexture, index: 1)
+ encoder.setBytes(&metalParam, length: MemoryLayout ) {
- if param.filter.width == 1 && param.filter.height == 1 {
- super.init(device: device, inFunctionName: "conv_add_1x1")
- } else if param.filter.channel == 1 {
- super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
+
+ if computePrecision == .Float16 {
+ if param.filter.width == 1 && param.filter.height == 1 {
+ super.init(device: device, inFunctionName: "conv_add_1x1_half")
+ } else if param.filter.channel == 1 {
+ super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half")
+ } else {
+ super.init(device: device, inFunctionName: "conv_add_3x3_half")
+ }
+ } else if computePrecision == .Float32 {
+ if param.filter.width == 1 && param.filter.height == 1 {
+ super.init(device: device, inFunctionName: "conv_add_1x1")
+ } else if param.filter.channel == 1 {
+ super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
+ } else {
+ super.init(device: device, inFunctionName: "conv_add_3x3")
+ }
} else {
- super.init(device: device, inFunctionName: "conv_add_3x3")
+ fatalError()
}
- param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
+ param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
- param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
- param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+ param.filter.initBuffer(device: device, precision: computePrecision)
+ param.y.initBuffer(device: device, precision: computePrecision)
print("offset x: \(offsetX)")
print("offset y: \(offsetY)")
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift
index c5d3ffe6c944ab9019f5b80e66b4691057209529..350c81cece15a242e1c6b7bb91cf515a4eaf2335 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift
@@ -51,21 +51,33 @@ class ConvBNReluKernel ) {
-
- if param.filter.width == 1 && param.filter.height == 1 {
- super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
- } else if param.filter.channel == 1 {
- super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
+ if computePrecision == .Float32 {
+ if param.filter.width == 1 && param.filter.height == 1 {
+ super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
+ } else if param.filter.channel == 1 {
+ super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
+ } else {
+ super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
+ }
+ } else if computePrecision == .Float16 {
+ if param.filter.width == 1 && param.filter.height == 1 {
+ super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half")
+ } else if param.filter.channel == 1 {
+ super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half")
+ } else {
+ super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half")
+ }
} else {
- super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
+ fatalError()
}
- param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
- param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
- param.variance.initBuffer(device: device)
- param.mean.initBuffer(device: device)
- param.scale.initBuffer(device: device)
- param.bias.initBuffer(device: device)
+ param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
+ param.filter.initBuffer(device: device, precision: computePrecision)
+
+ param.variance.initBuffer(device: device, precision: .Float32)
+ param.mean.initBuffer(device: device, precision: .Float32)
+ param.scale.initBuffer(device: device, precision: .Float32)
+ param.bias.initBuffer(device: device, precision: .Float32)
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
@@ -102,8 +114,26 @@ class ConvBNReluKernel .size)
+
+ float32ToFloat16(input: newScale as! UnsafeMutablePointer .size)
+ } else {
+ fatalError(" unsupport ")
+ }
+
+ param.newBiase = newBiaseBuffer
+ param.newScale = newScaleBuffer
newScale.deinitialize(count: param.scale.buffer.length)
newScale.deallocate()
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
index 680beba1ea711b389dd6117fc84f00b6079c9a60..e0485851fd610781f475eb43be1ce6fd4937a4ef 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
@@ -39,7 +39,7 @@ class ConvKernel ) {
super.init(device: device, inFunctionName: "pool")
- param.output.initTexture(device: device, inTranspose: param.input.transpose)
+ param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
}
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam ) throws {
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift
index ad925eb174414ed8f48cc8dd5bf090bc2ed0aed2..1545a848dacb4f11a2a68df31f7ea49a23799a87 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift
@@ -17,8 +17,8 @@ class PreluKernel ) throws {
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift
index e2363e44d3a3d81b430f82303b2b1017ddfc5200..08a489ab2298c937f8878af94b557c2fa60d18d0 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift
@@ -33,11 +33,16 @@ class PriorBoxKernel ) {
- super.init(device: device, inFunctionName: "prior_box")
- param.output.initTexture(device: device, inTranspose: [2, 0, 1, 3])
+ if computePrecision == .Float32 {
+ super.init(device: device, inFunctionName: "prior_box")
+ } else if computePrecision == .Float16 {
+ super.init(device: device, inFunctionName: "prior_box_half")
+ } else {
+ fatalError()
+ }
-
- param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3])
+ param.output.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision)
+ param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision)
let n = 1
let h = param.output.dim[1]
@@ -79,7 +84,18 @@ class PriorBoxKernel ) {
- param.output.initTexture(device: device)
- super.init(device: device, inFunctionName: "reshape")
+ required init(device: MTLDevice, param: ReshapeParam ) {
+ param.output.initTexture(device: device, computePrecision: computePrecision)
+ if computePrecision == .Float32 {
+ super.init(device: device, inFunctionName: "reshape")
+ } else if computePrecision == .Float16 {
+ super.init(device: device, inFunctionName: "reshape_half")
+ } else {
+ fatalError()
}
-
- required init(device: MTLDevice, testParam: ReshapeTestParam) {
- super.init(device: device, inFunctionName: "reshape")
- }
-
- func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam ) throws {
- guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
- throw PaddleMobileError.predictError(message: " encoder is nil")
- }
- encoder.setTexture(param.input.metalTexture, index: 0)
- encoder.setTexture(param.output.metalTexture, index: 1)
- let id: [Int32] = (0..<4).map { Int32(param.input.dim[$0]) }
- let it: [Int32] = param.input.transpose.map { Int32($0) }
- let od: [Int32] = (0..<4).map { Int32(param.output.dim[$0]) }
- let ot: [Int32] = param.output.transpose.map { Int32($0) }
- var rmp = ReshapeMetalParam.init(
- idim: (id[0], id[1], id[2], id[3]),
- itrans: (it[0], it[1], it[2], it[3]),
- odim: (od[0], od[1], od[2], od[3]),
- otrans: (ot[0], ot[1], ot[2], ot[3])
- )
- encoder.setBytes(&rmp, length: MemoryLayout ) throws {
+ guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+ throw PaddleMobileError.predictError(message: " encoder is nil")
}
- func test(commandBuffer: MTLCommandBuffer, testParam: ReshapeTestParam) {
- guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
- fatalError()
- }
- encoder.setTexture(testParam.inputTexture, index: 0)
- encoder.setTexture(testParam.outputTexture, index: 1)
- var pm: ReshapeMetalParam = testParam.param
- encoder.setBytes(&pm, length: MemoryLayout ) {
- param.output.initTexture(device: device)
- super.init(device: device, inFunctionName: "softmax")
+ param.output.initTexture(device: device, computePrecision: computePrecision)
+ if computePrecision == .Float32 {
+ super.init(device: device, inFunctionName: "softmax")
+ } else if computePrecision == .Float16 {
+ super.init(device: device, inFunctionName: "softmax_half")
+ } else {
+ fatalError()
+ }
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift
index 33e1219b4d0fff972d8db3d16fc7ce1477841351..6594b3474f0abb04364246830f79302f487af499 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift
@@ -41,33 +41,27 @@ struct TransposeTestParam: TestParam {
}
class TransposeKernel ) throws {
- guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
- throw PaddleMobileError.predictError(message: " encode is nil")
- }
-
- encoder.setTexture(param.input.metalTexture, index: 0)
- encoder.setTexture(param.output.metalTexture, index: 1)
- encoder.setBytes(&metalParam, length: MemoryLayout ) {
- param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3])
- super.init(device: device, inFunctionName: "transpose")
+ param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: computePrecision)
+ if computePrecision == .Float16 {
+ super.init(device: device, inFunctionName: "transpose_half")
+ } else if computePrecision == .Float32 {
+ super.init(device: device, inFunctionName: "transpose")
+ } else {
+ fatalError()
+ }
var invT: [Int] = [0, 1, 2, 3]
for (i, v) in param.input.transpose.enumerated() {
invT[v] = i
}
var axis: [Int] = [0, 1, 2, 3]
-// var doNothing = false
-// if param.axis.count == param.input.transpose.count {
-// doNothing = param.axis == param.input.transpose.map { Int32($0) }
-// }
+ // var doNothing = false
+ // if param.axis.count == param.input.transpose.count {
+ // doNothing = param.axis == param.input.transpose.map { Int32($0) }
+ // }
for i in 0.. ) throws {
+ guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+ throw PaddleMobileError.predictError(message: " encode is nil")
+ }
+
+ encoder.setTexture(param.input.metalTexture, index: 0)
+ encoder.setTexture(param.output.metalTexture, index: 1)
+ encoder.setBytes(&metalParam, length: MemoryLayout >, Runable,
func delogOutput() {
print(" \(type) output: ")
- print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
+ print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3]), texturePrecision: computePrecision).strideArray())
// print("pool2d delog")
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift
index 10b5816d7b4528572cdc6b84d53b73499dde93b4..c7e049e3c1b21d9747acca8812abfff8c25d6d98 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift
@@ -51,13 +51,13 @@ class PreluOp >, Runabl
func delogOutput() {
print(" \(type) input: ")
- print(para.input.metalTexture.toTensor(dim: (n: para.input.originDim[0], c: para.input.originDim[1], h: para.input.originDim[2], w: para.input.originDim[3])).strideArray())
+ print(para.input.metalTexture.toTensor(dim: (n: para.input.originDim[0], c: para.input.originDim[1], h: para.input.originDim[2], w: para.input.originDim[3]), texturePrecision: computePrecision).strideArray())
print(" \(type) Alpha: ")
let _: Float32? = para.alpha.buffer.logDesc(header: " alpha: ", stridable: false)
print(" \(type) output: ")
- print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3])).strideArray())
+ print(para.output.metalTexture.toTensor(dim: (n: para.output.originDim[0], c: para.output.originDim[1], h: para.output.originDim[2], w: para.output.originDim[3]), texturePrecision: computePrecision).strideArray())
}
// print("softmax delog")
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
index 7e82fdec37fb7bd66181fde3af01aedbaf87a023..6999043ab75532e6517398c5bbfe6b893b49cf57 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
@@ -39,7 +39,7 @@ class PriorBoxParam
}
func delogOutput() {
- print(" \(type) output: ")
// output
- let outputArray = para.output.metalTexture.floatArray { (o: Float32) -> Float32 in
- return o
+ print(" \(type) output: ")
+ let originDim = para.output.originDim
+ if para.output.transpose == [0, 1, 2, 3] {
+ let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]), texturePrecision: computePrecision)
+ print(outputArray.strideArray())
+ } else if para.output.transpose == [0, 2, 3, 1] {
+ print(para.output.metalTexture.toTensor(dim: (n: originDim[0], c: originDim[1], h: originDim[2], w: originDim[3]), texturePrecision: computePrecision).strideArray())
+ } else {
+ print(" not implement")
}
- print(outputArray)
-
// writeToLibrary(fileName: "box_out", array: outputArray)
// output variance
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
index c9f054c88af44ac3f5dd453b4696c7988d01fa8f..0325f860e078cf639c08e279970a105e3f562a32 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
@@ -46,7 +46,7 @@ class ReluOp >, Runable,
func delogOutput() {
print(" \(type) output: ")
- print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
+ print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3]), texturePrecision: computePrecision).strideArray())
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
index 373c448b16d3a597f28884ee2e70b29c152f5526..451b064ce19e0e1cb70700d046b6ab059e6df9e3 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
@@ -76,7 +76,7 @@ class ReshapeOp