提交 6ec031ff 编写于 作者: L liuruilong

add dilation

上级 ee6ef4d9
......@@ -26,7 +26,7 @@ let modelHelperMap: [SupportModel : Net] = [.mobilenet_ssd : MobileNet_ssd_hand.
enum SupportModel: String{
// case mobilenet = "mobilenet"
case mobilenet_ssd = "mobilenetssd"
case genet = "enet"
case genet = "genet"
static func supportedModels() -> [SupportModel] {
//.mobilenet,
return [.mobilenet_ssd ,.genet]
......@@ -79,7 +79,7 @@ class ViewController: UIViewController {
return
}
do {
let max = 1
let max = 10
let startDate = Date.init()
for i in 0..<max {
try net.predict(inTexture: inTexture) { [weak self] (result) in
......@@ -87,6 +87,7 @@ class ViewController: UIViewController {
fatalError()
}
print(result.resultArray)
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
......
......@@ -699,6 +699,7 @@
"@executable_path/Frameworks",
"@loader_path/Frameworks",
);
MACH_O_TYPE = mh_dylib;
MTL_LANGUAGE_REVISION = UseDeploymentTarget;
PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
......@@ -727,6 +728,7 @@
"@executable_path/Frameworks",
"@loader_path/Frameworks",
);
MACH_O_TYPE = mh_dylib;
MTL_LANGUAGE_REVISION = UseDeploymentTarget;
PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
......
......@@ -342,7 +342,7 @@ public extension MTLTexture {
// n c h w - dim
func toTensor(dim: (n: Int, c: Int, h: Int, w: Int)) -> [Float32] {
print("origin dim: \(dim)")
// print("origin dim: \(dim)")
print("texture: ")
print(self)
......
......@@ -314,7 +314,7 @@ public class PaddleMobileUnitTest {
let offsetX = filterSize.width/2 - paddings.0
let offsetY = filterSize.height/2 - paddings.1
let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0))
let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0), dilationX: UInt16(1), dilationY: UInt16(1))
let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
......
......@@ -14,7 +14,7 @@
import Foundation
let testTo = 12
let testTo = 54
public class ResultHolder<P: PrecisionType> {
public let dim: [Int]
......@@ -62,7 +62,7 @@ public class Executor<P: PrecisionType> {
queue = inQueue
for block in inProgram.programDesc.blocks {
//block.ops.count
for i in 0..<testTo {
for i in 0..<block.ops.count {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
......@@ -124,13 +124,13 @@ public class Executor<P: PrecisionType> {
// print(stridableInput)
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
for i in 0..<self.ops.count {
let op = self.ops[i]
print(" 第 \(i) 个 op: ")
op.delogOutput()
}
return
// for i in 0..<self.ops.count {
// let op = self.ops[i]
// print(" 第 \(i) 个 op: ")
// op.delogOutput()
// }
//
// return
let afterDate = Date.init()
......@@ -146,7 +146,6 @@ public class Executor<P: PrecisionType> {
}), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
}
completionHandle(resultHolder)
}
buffer.commit()
......
......@@ -70,8 +70,8 @@ public class Loader<P: PrecisionType> {
*/
//现在模型传入模型为 Float 类型, 这块应该根据模型来
// let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
// let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
// let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
// let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
guard bytesRead == tensor.data.size else {
......@@ -79,12 +79,12 @@ public class Loader<P: PrecisionType> {
}
// TODO: use script to convert
// let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
// for i in 0..<tensor.numel() {
// tensor.data[i] = P.init(inFloat: tmpPointer[i])
// }
// tmpPointer.deinitialize(count: tmpCapacity)
// tmpPointer.deallocate()
// let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
// for i in 0..<tensor.numel() {
// tensor.data[i] = P.init(inFloat: tmpPointer[i])
// }
// tmpPointer.deinitialize(count: tmpCapacity)
// tmpPointer.deallocate()
nowIndex += bytesRead
}
......@@ -95,6 +95,7 @@ public class Loader<P: PrecisionType> {
}
public init(){}
public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
}
......@@ -120,6 +121,7 @@ public class Loader<P: PrecisionType> {
guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
throw PaddleMobileError.loaderError(message: "at least two operator")
}
guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
}
......@@ -158,7 +160,7 @@ public class Loader<P: PrecisionType> {
throw error
}
tensor.convert(to: DataLayout.NHWC())
// tensor.initBuffer(device: device)
// tensor.initBuffer(device: device)
scope[varDesc.name] = tensor
} else {
let dim = Dim.init(inDim: tensorDesc.dims)
......
......@@ -97,6 +97,13 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
}
func delogOutput() {
print("stride: ")
print(para.stride)
print("dilations: ")
print(para.dilations)
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
......
......@@ -43,8 +43,15 @@ class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTr
}
func delogOutput() {
print("conv transpose delog")
let _: P? = para.input.metalTexture.logDesc(header: "conv transpose input: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "conv transpose output: ", stridable: true)
print(" \(type) output: ")
let originDim = para.output.originDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
}
}
......@@ -61,11 +61,18 @@ class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, Elem
print(para.inputX.metalTexture.toTensor(dim: (n: para.inputX.tensorDim[0], c: para.inputX.tensorDim[1], h: para.inputX.tensorDim[2], w: para.inputX.tensorDim[3])).strideArray())
print(" \(type) inputY: ")
print(para.inputY.metalTexture.toTensor(dim: (n: para.inputY.tensorDim[0], c: para.inputY.tensorDim[1], h: para.inputY.tensorDim[2], w: para.inputY.tensorDim[3])).strideArray())
print(" \(type) output: ")
let originDim = para.output.originDim
if para.output.transpose == [0, 1, 2, 3] {
let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3]))
print(outputArray.strideArray())
} else if para.output.transpose == [0, 2, 3, 1] {
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} else {
print(" not implement")
}
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
......
......@@ -75,7 +75,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
print("offset y: \(offsetY)")
let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
var invs: [P] = []
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
......
......@@ -27,8 +27,9 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1])
let offsetX = param.filter.width/2 - Int(param.paddings[0])
let offsetY = param.filter.height/2 - Int(param.paddings[1])
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
......@@ -37,7 +38,11 @@ class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
print("offset y: \(offsetY)")
let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
print("metal param: ")
print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
......
......@@ -81,7 +81,7 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
let offsetZ = 0.0
print(" fuck ")
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
var invs: [P] = []
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
......
......@@ -21,6 +21,8 @@ public struct MetalConvParam {
let strideX: UInt16
let strideY: UInt16
let paddedZ: UInt16
let dilationX: UInt16
let dilationY: UInt16
}
class ConvKernel<P: PrecisionType>: Kernel, Computable {
......@@ -39,7 +41,7 @@ class ConvKernel<P: PrecisionType>: Kernel, Computable {
let offsetZ = 0.0
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
......
......@@ -45,6 +45,7 @@ class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY)
param.output.initTexture(device: device, inTranspose: param.input.transpose)
param.filter.initBuffer(device: device)
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws {
......
......@@ -55,7 +55,7 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
}
emp.yoff = 4 - Int32(param.inputY.tensorDim.cout())
if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) {
print("===> elementwise_add fast!!!")
// print("===> elementwise_add fast!!!")
emp.fast = 1
}
......
......@@ -21,9 +21,10 @@ struct MetalConvParam {
short offsetZ;
ushort strideX;
ushort strideY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
......@@ -39,7 +40,6 @@ kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::samp
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
......@@ -556,17 +556,20 @@ kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[text
float4 output = float4(0.0);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
......
......@@ -48,7 +48,9 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
var str = ""
for op in ops {
for i in 0..<ops.count {
str += " op \(i): "
let op = ops[i]
str += op.description
}
......
......@@ -57,7 +57,7 @@ class Tensor<P: PrecisionType>: Tensorial {
pointer.deallocate()
}
deinit {
// release()
// release()
}
}
......@@ -163,12 +163,13 @@ class Tensor<P: PrecisionType>: Tensorial {
}
}
} else if dim.cout() == 1 {
buffer = device.makeBuffer(length: numel() * precisionSize)
let num = ((numel() + 3) / 4) * 4
buffer = device.makeBuffer(length: num * precisionSize)
switch precision {
case .Float32:
buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
buffer?.contents().copyMemory(from: data.pointer, byteCount: num * MemoryLayout<P>.stride)
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: num)
}
} else {
fatalError(" not support !")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册