提交 b737f62a 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #865 from dolphin8/metal

elementwise_add
......@@ -14,7 +14,7 @@
import Foundation
let testTo = 7
let testTo = 12
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
......
......@@ -68,6 +68,7 @@ class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Run
let originDim = para.output.originDim
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
......
......@@ -56,7 +56,24 @@ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, Elem
// para.output.dim = para.input.dim
}
func delogOutput() {
print(" \(type) inputX: ")
print(para.inputX.metalTexture.toTensor(dim: (n: para.inputX.tensorDim[0], c: para.inputX.tensorDim[1], h: para.inputX.tensorDim[2], w: para.inputX.tensorDim[3])).strideArray())
print(" \(type) inputY: ")
print(para.inputY.metalTexture.toTensor(dim: (n: para.inputY.tensorDim[0], c: para.inputY.tensorDim[1], h: para.inputY.tensorDim[2], w: para.inputY.tensorDim[3])).strideArray())
print(" \(type) output: ")
let originDim = para.output.originDim
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
}
......
......@@ -55,6 +55,7 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
}
emp.yoff = 4 - Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
print("===> elementwise_add fast!!!")
emp.fast = 1
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册