未验证 提交 ea5d520a 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #840 from codeWorm2015/metal

update
...@@ -339,6 +339,27 @@ public extension MTLTexture { ...@@ -339,6 +339,27 @@ public extension MTLTexture {
return nil return nil
} }
// n c h w - dim
func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] {
let textureArray = floatArray { (i : Float32) -> Float32 in
return i
}
var output: [Float32] = []
for s in 0..<arrayLength {
for c in 0..<4{
for h in 0..<dim.h {
for w in 0..<dim.w {
if (s * 4 + c) < dim.c {
let textureValue = textureArray[dim.w * dim.h * 4 * s + h * dim.w * 4 + w * 4 + c]
output.append(textureValue)
}
}
}
}
}
return output
}
} }
......
...@@ -45,7 +45,10 @@ class ConvAddParam<P: PrecisionType>: OpParam { ...@@ -45,7 +45,10 @@ class ConvAddParam<P: PrecisionType>: OpParam {
class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{
func delogOutput() { func delogOutput() {
print(" conv add: ") print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
// print(" conv add: ")
// print(para.input.metalTexture) // print(para.input.metalTexture)
...@@ -53,16 +56,16 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, ...@@ -53,16 +56,16 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
// let filterArray: [P] = para.filter.buffer.array() // let filterArray: [P] = para.filter.buffer.array()
// print(filterArray) // print(filterArray)
let input = para.input.metalTexture.floatArray { (p: P) -> P in // let input = para.input.metalTexture.floatArray { (p: P) -> P in
return p // return p
} // }
// print(input) // print(input)
let output = para.output.metalTexture.floatArray { (p: P) -> P in // let output = para.output.metalTexture.floatArray { (p: P) -> P in
return p // return p
} // }
// print(para.output.metalTexture) // print(para.output.metalTexture)
print(output) // print(output)
} }
......
...@@ -159,19 +159,23 @@ class ConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluPa ...@@ -159,19 +159,23 @@ class ConvBNReluOp<P: PrecisionType>: Operator<ConvBNReluKernel<P>, ConvBNReluPa
// print("index: \(i) \(newBiase![i]) ") // print("index: \(i) \(newBiase![i]) ")
// } // }
print(para.output.metalTexture) // print(para.output.metalTexture)
//
let output = para.output.metalTexture.floatArray { (p: P) -> P in //
return p
}
print(output)
// //
writeToLibrary(fileName: "batch_norm_34.tmp_2", array: output) // let output = para.output.metalTexture.floatArray { (p: P) -> P in
print(" write done") // return p
// }
// print(output)
// //
//
// writeToLibrary(fileName: "batch_norm_34.tmp_2", array: output)
// print(" write done")
//
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
// let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: true) // let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: true)
} }
......
...@@ -50,7 +50,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable ...@@ -50,7 +50,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) { required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1]) param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2])
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
......
...@@ -25,7 +25,7 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -25,7 +25,7 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
super.init(device: device, inFunctionName: "conv_add_3x3") super.init(device: device, inFunctionName: "conv_add_3x3")
} }
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1]) param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2])
let offsetX = param.filter.width/2 - Int(param.paddings[0]) let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1]) let offsetY = param.filter.height/2 - Int(param.paddings[1])
......
...@@ -49,17 +49,21 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam ...@@ -49,17 +49,21 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
} }
} }
func delogOutput() { func delogOutput() {
let inputArray: [Float32] = para.input.metalTexture.floatArray { (ele: Float32) -> Float32 in
return ele
}
print(inputArray.strideArray()) print(para.output.metalTexture.toTensor(dim: (n: 1, c: 21, h: 19, w: 19)).strideArray())
let outputArray: [Float32] = para.output.metalTexture.floatArray { (ele: Float32) -> Float32 in
return ele // let inputArray: [Float32] = para.input.metalTexture.floatArray { (ele: Float32) -> Float32 in
} // return ele
// }
print(outputArray.strideArray()) //
// print(inputArray.strideArray())
//
// let outputArray: [Float32] = para.output.metalTexture.floatArray { (ele: Float32) -> Float32 in
// return ele
// }
//
// print(outputArray.strideArray())
// writeToLibrary(fileName: "transpose_ouput", array: outputArray) // writeToLibrary(fileName: "transpose_ouput", array: outputArray)
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册