提交 78271926 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #841 from codeWorm2015/metal

update
...@@ -342,6 +342,10 @@ public extension MTLTexture { ...@@ -342,6 +342,10 @@ public extension MTLTexture {
// n c h w - dim // n c h w - dim
func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] { func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] {
print("origin dim: \(dim)")
print("texture: ")
print(self)
let textureArray = floatArray { (i : Float32) -> Float32 in let textureArray = floatArray { (i : Float32) -> Float32 in
return i return i
} }
...@@ -358,8 +362,42 @@ public extension MTLTexture { ...@@ -358,8 +362,42 @@ public extension MTLTexture {
} }
} }
} }
print(" tensor count -- \(output.count)")
return output return output
} }
func realNHWC(dim: (n: Int, h: Int, w: Int, c: Int)) -> [Float32] {
print("origin dim: \(dim)")
print("texture: ")
print(self)
let textureArray = floatArray { (i : Float32) -> Float32 in
return i
}
var output: [Float32] = []
let numOfASlice = dim.h * dim.w * 4
for h in 0..<dim.h {
for w in 0..<dim.w {
for sliceIndex in 0..<arrayLength {
if sliceIndex * 4 + 4 > dim.c {
for i in 0..<((sliceIndex * 4 + 4) - dim.c) {
let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i]
output.append(value)
}
} else {
for i in 0..<4 {
let value = textureArray[sliceIndex * numOfASlice + h * dim.w * 4 + w * 4 + i]
output.append(value)
}
}
}
}
}
print(" tensor count -- \(output.count)")
return output
}
} }
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
let testTo = 41 let testTo = 48
public class ResultHolder<P: PrecisionType> { public class ResultHolder<P: PrecisionType> {
public let dim: [Int] public let dim: [Int]
......
...@@ -47,9 +47,9 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, ...@@ -47,9 +47,9 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
func delogOutput() { func delogOutput() {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
// print(" conv add: ") // print(" conv add: ")
// print(para.input.metalTexture) // print(para.input.metalTexture)
// print(" filter array: ") // print(" filter array: ")
......
...@@ -50,7 +50,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable ...@@ -50,7 +50,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) { required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2]) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
......
...@@ -25,7 +25,7 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -25,7 +25,7 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
super.init(device: device, inFunctionName: "conv_add_3x3") super.init(device: device, inFunctionName: "conv_add_3x3")
} }
param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2]) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
let offsetX = param.filter.width/2 - Int(param.paddings[0]) let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1]) let offsetY = param.filter.height/2 - Int(param.paddings[1])
......
...@@ -49,8 +49,12 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam ...@@ -49,8 +49,12 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
} }
} }
func delogOutput() { func delogOutput() {
print(para.input.metalTexture.toTensor(dim: (n: para.input.tensorDim[0], c: para.input.tensorDim[1], h: para.input.tensorDim[2], w: para.input.tensorDim[3])).strideArray())
print(para.output.metalTexture.toTensor(dim: (n: 1, c: 21, h: 19, w: 19)).strideArray())
let originDim = para.output.tensorDim
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
// let inputArray: [Float32] = para.input.metalTexture.floatArray { (ele: Float32) -> Float32 in // let inputArray: [Float32] = para.input.metalTexture.floatArray { (ele: Float32) -> Float32 in
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册