提交 6ec031ff 编写于 作者: L liuruilong

add dilation

上级 ee6ef4d9
...@@ -26,7 +26,7 @@ let modelHelperMap: [SupportModel : Net] = [.mobilenet_ssd : MobileNet_ssd_hand. ...@@ -26,7 +26,7 @@ let modelHelperMap: [SupportModel : Net] = [.mobilenet_ssd : MobileNet_ssd_hand.
enum SupportModel: String{ enum SupportModel: String{
// case mobilenet = "mobilenet" // case mobilenet = "mobilenet"
case mobilenet_ssd = "mobilenetssd" case mobilenet_ssd = "mobilenetssd"
case genet = "enet" case genet = "genet"
static func supportedModels() -> [SupportModel] { static func supportedModels() -> [SupportModel] {
//.mobilenet, //.mobilenet,
return [.mobilenet_ssd ,.genet] return [.mobilenet_ssd ,.genet]
...@@ -79,7 +79,7 @@ class ViewController: UIViewController { ...@@ -79,7 +79,7 @@ class ViewController: UIViewController {
return return
} }
do { do {
let max = 1 let max = 10
let startDate = Date.init() let startDate = Date.init()
for i in 0..<max { for i in 0..<max {
try net.predict(inTexture: inTexture) { [weak self] (result) in try net.predict(inTexture: inTexture) { [weak self] (result) in
...@@ -87,6 +87,7 @@ class ViewController: UIViewController { ...@@ -87,6 +87,7 @@ class ViewController: UIViewController {
fatalError() fatalError()
} }
print(result.resultArray)
if i == max - 1 { if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate) let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async { DispatchQueue.main.async {
......
...@@ -699,6 +699,7 @@ ...@@ -699,6 +699,7 @@
"@executable_path/Frameworks", "@executable_path/Frameworks",
"@loader_path/Frameworks", "@loader_path/Frameworks",
); );
MACH_O_TYPE = mh_dylib;
MTL_LANGUAGE_REVISION = UseDeploymentTarget; MTL_LANGUAGE_REVISION = UseDeploymentTarget;
PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile"; PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
...@@ -727,6 +728,7 @@ ...@@ -727,6 +728,7 @@
"@executable_path/Frameworks", "@executable_path/Frameworks",
"@loader_path/Frameworks", "@loader_path/Frameworks",
); );
MACH_O_TYPE = mh_dylib;
MTL_LANGUAGE_REVISION = UseDeploymentTarget; MTL_LANGUAGE_REVISION = UseDeploymentTarget;
PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile"; PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
......
...@@ -342,7 +342,7 @@ public extension MTLTexture { ...@@ -342,7 +342,7 @@ public extension MTLTexture {
// n c h w - dim // n c h w - dim
func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] { func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] {
print("origin dim: \(dim)") // print("origin dim: \(dim)")
print("texture: ") print("texture: ")
print(self) print(self)
......
...@@ -314,7 +314,7 @@ public class PaddleMobileUnitTest { ...@@ -314,7 +314,7 @@ public class PaddleMobileUnitTest {
let offsetX = filterSize.width/2 - paddings.0 let offsetX = filterSize.width/2 - paddings.0
let offsetY = filterSize.height/2 - paddings.1 let offsetY = filterSize.height/2 - paddings.1
let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0)) let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0), dilationX: UInt16(1), dilationY: UInt16(1))
let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize) let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
let testTo = 12 let testTo = 54
public class ResultHolder<P: PrecisionType> { public class ResultHolder<P: PrecisionType> {
public let dim: [Int] public let dim: [Int]
...@@ -62,7 +62,7 @@ public class Executor<P: PrecisionType> { ...@@ -62,7 +62,7 @@ public class Executor<P: PrecisionType> {
queue = inQueue queue = inQueue
for block in inProgram.programDesc.blocks { for block in inProgram.programDesc.blocks {
//block.ops.count //block.ops.count
for i in 0..<testTo { for i in 0..<block.ops.count {
let op = block.ops[i] let op = block.ops[i]
do { do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
...@@ -124,13 +124,13 @@ public class Executor<P: PrecisionType> { ...@@ -124,13 +124,13 @@ public class Executor<P: PrecisionType> {
// print(stridableInput) // print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true) // let _: Flo? = input.logDesc(header: "input: ", stridable: true)
for i in 0..<self.ops.count { // for i in 0..<self.ops.count {
let op = self.ops[i] // let op = self.ops[i]
print(" 第 \(i) 个 op: ") // print(" 第 \(i) 个 op: ")
op.delogOutput() // op.delogOutput()
} // }
//
return // return
let afterDate = Date.init() let afterDate = Date.init()
...@@ -146,7 +146,6 @@ public class Executor<P: PrecisionType> { ...@@ -146,7 +146,6 @@ public class Executor<P: PrecisionType> {
}), inElapsedTime: afterDate.timeIntervalSince(beforeDate)) }), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
} }
completionHandle(resultHolder) completionHandle(resultHolder)
} }
buffer.commit() buffer.commit()
......
...@@ -70,8 +70,8 @@ public class Loader<P: PrecisionType> { ...@@ -70,8 +70,8 @@ public class Loader<P: PrecisionType> {
*/ */
//现在模型传入模型为 Float 类型, 这块应该根据模型来 //现在模型传入模型为 Float 类型, 这块应该根据模型来
// let tmpCapacity = MemoryLayout<Float>.size * tensor.numel() // let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
// let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity); // let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file) let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
guard bytesRead == tensor.data.size else { guard bytesRead == tensor.data.size else {
...@@ -79,12 +79,12 @@ public class Loader<P: PrecisionType> { ...@@ -79,12 +79,12 @@ public class Loader<P: PrecisionType> {
} }
// TODO: use script to convert // TODO: use script to convert
// let bytesRead = fread(tmpPointer, 1, tmpCapacity, file) // let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
// for i in 0..<tensor.numel() { // for i in 0..<tensor.numel() {
// tensor.data[i] = P.init(inFloat: tmpPointer[i]) // tensor.data[i] = P.init(inFloat: tmpPointer[i])
// } // }
// tmpPointer.deinitialize(count: tmpCapacity) // tmpPointer.deinitialize(count: tmpCapacity)
// tmpPointer.deallocate() // tmpPointer.deallocate()
nowIndex += bytesRead nowIndex += bytesRead
} }
...@@ -95,6 +95,7 @@ public class Loader<P: PrecisionType> { ...@@ -95,6 +95,7 @@ public class Loader<P: PrecisionType> {
} }
public init(){} public init(){}
public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{ public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else { guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !") throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
} }
...@@ -120,6 +121,7 @@ public class Loader<P: PrecisionType> { ...@@ -120,6 +121,7 @@ public class Loader<P: PrecisionType> {
guard let firstOp = block.ops.first, let lastOp = block.ops.last else { guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
throw PaddleMobileError.loaderError(message: "at least two operator") throw PaddleMobileError.loaderError(message: "at least two operator")
} }
guard firstOp.type == gFeedType, lastOp.type == gFetchType else { guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch") throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
} }
...@@ -158,7 +160,7 @@ public class Loader<P: PrecisionType> { ...@@ -158,7 +160,7 @@ public class Loader<P: PrecisionType> {
throw error throw error
} }
tensor.convert(to: DataLayout.NHWC()) tensor.convert(to: DataLayout.NHWC())
// tensor.initBuffer(device: device) // tensor.initBuffer(device: device)
scope[varDesc.name] = tensor scope[varDesc.name] = tensor
} else { } else {
let dim = Dim.init(inDim: tensorDesc.dims) let dim = Dim.init(inDim: tensorDesc.dims)
......
...@@ -97,6 +97,13 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, ...@@ -97,6 +97,13 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
} }
func delogOutput() { func delogOutput() {
print("stride: ")
print(para.stride)
print("dilations: ")
print(para.dilations)
print(" \(type) output: ") print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} }
......
...@@ -43,8 +43,15 @@ class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTr ...@@ -43,8 +43,15 @@ class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTr
} }
func delogOutput() { func delogOutput() {
print("conv transpose delog") print(" \(type) output: ")
let _: P? = para.input.metalTexture.logDesc(header: "conv transpose input: ", stridable: true) let originDim = para.output.originDim
let _: P? = para.output.metalTexture.logDesc(header: "conv transpose output: ", stridable: true) if para.output.transpose == [0, 1, 2, 3] {
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
} }
} }
...@@ -61,11 +61,18 @@ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, Elem ...@@ -61,11 +61,18 @@ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, Elem
print(para.inputX.metalTexture.toTensor(dim: (n: para.inputX.tensorDim[0], c: para.inputX.tensorDim[1], h: para.inputX.tensorDim[2], w: para.inputX.tensorDim[3])).strideArray()) print(para.inputX.metalTexture.toTensor(dim: (n: para.inputX.tensorDim[0], c: para.inputX.tensorDim[1], h: para.inputX.tensorDim[2], w: para.inputX.tensorDim[3])).strideArray())
print(" \(type) inputY: ") print(" \(type) inputY: ")
print(para.inputY.metalTexture.toTensor(dim: (n: para.inputY.tensorDim[0], c: para.inputY.tensorDim[1], h: para.inputY.tensorDim[2], w: para.inputY.tensorDim[3])).strideArray()) print(para.inputY.metalTexture.toTensor(dim: (n: para.inputY.tensorDim[0], c: para.inputY.tensorDim[1], h: para.inputY.tensorDim[2], w: para.inputY.tensorDim[3])).strideArray())
print(" \(type) output: ") print(" \(type) output: ")
let originDim = para.output.originDim let originDim = para.output.originDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3])) let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray()) print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
} }
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
......
...@@ -75,7 +75,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable ...@@ -75,7 +75,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
print("offset y: \(offsetY)") print("offset y: \(offsetY)")
let offsetZ = 0.0 let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
var invs: [P] = [] var invs: [P] = []
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
......
...@@ -27,8 +27,9 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -27,8 +27,9 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1]) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
let offsetX = param.filter.width/2 - Int(param.paddings[0]) let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
...@@ -37,7 +38,11 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -37,7 +38,11 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
print("offset y: \(offsetY)") print("offset y: \(offsetY)")
let offsetZ = 0.0 let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
print("metal param: ")
print(inMetalParam)
metalParam = inMetalParam
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
......
...@@ -81,7 +81,7 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable { ...@@ -81,7 +81,7 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
let offsetZ = 0.0 let offsetZ = 0.0
print(" fuck ") print(" fuck ")
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
var invs: [P] = [] var invs: [P] = []
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
......
...@@ -21,6 +21,8 @@ public struct MetalConvParam { ...@@ -21,6 +21,8 @@ public struct MetalConvParam {
let strideX: UInt16 let strideX: UInt16
let strideY: UInt16 let strideY: UInt16
let paddedZ: UInt16 let paddedZ: UInt16
let dilationX: UInt16
let dilationY: UInt16
} }
class ConvKernel<P: PrecisionType>: Kernel, Computable { class ConvKernel<P: PrecisionType>: Kernel, Computable {
...@@ -39,7 +41,7 @@ class ConvKernel<P: PrecisionType>: Kernel, Computable { ...@@ -39,7 +41,7 @@ class ConvKernel<P: PrecisionType>: Kernel, Computable {
let offsetZ = 0.0 let offsetZ = 0.0
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
......
...@@ -45,6 +45,7 @@ class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -45,6 +45,7 @@ class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY) metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY)
param.output.initTexture(device: device, inTranspose: param.input.transpose) param.output.initTexture(device: device, inTranspose: param.input.transpose)
param.filter.initBuffer(device: device)
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws {
......
...@@ -55,7 +55,7 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -55,7 +55,7 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
} }
emp.yoff = 4 - Int32(param.inputY.tensorDim.cout()) emp.yoff = 4 - Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
print("===> elementwise_add fast!!!") // print("===> elementwise_add fast!!!")
emp.fast = 1 emp.fast = 1
} }
......
...@@ -21,9 +21,10 @@ struct MetalConvParam { ...@@ -21,9 +21,10 @@ struct MetalConvParam {
short offsetZ; short offsetZ;
ushort strideX; ushort strideX;
ushort strideY; ushort strideY;
ushort dilationX;
ushort dilationY;
}; };
kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]], kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]], constant MetalConvParam &param [[buffer(0)]],
...@@ -39,7 +40,6 @@ kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::samp ...@@ -39,7 +40,6 @@ kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::samp
return; return;
} }
ushort2 stride = ushort2(param.strideX, param.strideY); ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
...@@ -556,17 +556,20 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text ...@@ -556,17 +556,20 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
float4 output = float4(0.0); float4 output = float4(0.0);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9]; float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) { for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) { for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x); output.x += dot(input[j], weight_x);
......
...@@ -48,7 +48,9 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { ...@@ -48,7 +48,9 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String { var description: String {
var str = "" var str = ""
for op in ops { for i in 0..<ops.count {
str += " op \(i): "
let op = ops[i]
str += op.description str += op.description
} }
......
...@@ -57,7 +57,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -57,7 +57,7 @@ class Tensor<P: PrecisionType>: Tensorial {
pointer.deallocate() pointer.deallocate()
} }
deinit { deinit {
// release() // release()
} }
} }
...@@ -163,12 +163,13 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -163,12 +163,13 @@ class Tensor<P: PrecisionType>: Tensorial {
} }
} }
} else if dim.cout() == 1 { } else if dim.cout() == 1 {
buffer = device.makeBuffer(length: numel() * precisionSize) let num = ((numel() + 3) / 4) * 4
buffer = device.makeBuffer(length: num * precisionSize)
switch precision { switch precision {
case .Float32: case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride) buffer?.contents().copyMemory(from: data.pointer, byteCount: num * MemoryLayout<P>.stride)
case .Float16: case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel()) float32ToFloat16(input: floatPointer, output: buffer.contents(), count: num)
} }
} else { } else {
fatalError(" not support !") fatalError(" not support !")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册