mps and uncomplete interface

e32e90c3 · liuruilong · b378e1c2 · e32e90c3 · e32e90c3 · e32e90c3
5 changed file
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift
+//
+//  PaddleMobile.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/9/5.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+import Foundation
--- a/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift
+//
+//  CNNConvAddBatchNormReluOp.swift
+//  paddle-mobile
+import Foundation
+class CNNMPSConvTestParam: TestParam {
+    var outputTexture: MTLTexture?
+    var metalParam: MetalConvParam
+    let filterPointer: UnsafeMutableRawPointer
+    let biasePointer: UnsafeMutablePointer<Float>
+    let filterSize: (width: Int, height: Int, channel: Int)
+    init(inMetalParam: MetalConvParam, inFilter: [Float], inBiase: [Float], inFilterSize: (width: Int, height: Int, channel: Int)) {
+        metalParam = inMetalParam
+        filterPointer = UnsafeMutableRawPointer.init(mutating: inFilter)
+        biasePointer = UnsafeMutablePointer.init(mutating: inBiase)
+        filterSize = inFilterSize
+    }
+}
+@available(iOS 10.0, *)
+class CNNMPSConvOp<P: PrecisionType>: Operator<CNNConvKernel<P>, CNNConvParam<P>>, Runable, Creator, InferShaperable, Fusion {
+    typealias OpType = CNNMPSConvOp<P>
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        fatalError()
+    }
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+    }
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode-->Node.init(inType: gElementwiseAdd);
+        return beginNode
+    }
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
+    static func fusionType() -> String {
+        return gMPSCNNConvType
+    }
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift
+//
+//  BatchNormRelu.swift
+//  paddle-mobile
+//
+//  Created by zhangxinjun on 2018/8/23.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+import Foundation
+class BatchNormReluParam<P: PrecisionType>: BatchNormParam<P> {
+}
+class BatchNormReluKernel<P: PrecisionType>: Kernel, Computable{
+    typealias ParamType = BatchNormReluParam<P>
+    var newScale: MTLBuffer
+    var newBias: MTLBuffer
+    required init(device: MTLDevice, testParam: BatchNormReluTestParam) {
+        newScale = testParam.newScaleBuffer
+        newBias = testParam.newBiaseBuffer
+        super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
+    }
+    required init(device: MTLDevice, param: BatchNormReluParam<P>) {
+        guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
+            fatalError()
+        }
+        guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
+            fatalError()
+        }
+        self.newScale = newScale
+        self.newBias = newBias
+        super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
+        let varianceBuffer : MTLBuffer = param.inputVariance.buffer
+        var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
+        let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
+            invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
+        }
+        let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
+        let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
+        let scale : MTLBuffer = param.inputScale.buffer
+        let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
+        let bias : MTLBuffer = param.inputBias.buffer
+        let biasContents = bias.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
+            newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
+            newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
+        }
+    }
+    func compute(commandBuffer: MTLCommandBuffer, param: BatchNormReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            fatalError()
+        }
+        encoder.setTexture(param.input as? MTLTexture, index: 0)
+        encoder.setTexture(param.output as? MTLTexture, index: 1)
+        encoder.setBuffer(newScale, offset: 0, index: 1)
+        encoder.setBuffer(newBias, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output as! MTLTexture)
+        encoder.endEncoding()
+    }
+    func testCompute(commandBuffer: MTLCommandBuffer, testParam: BatchNormReluTestParam) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            fatalError()
+        }
+        encoder.setTexture(testParam.inputTexture, index: 0)
+        encoder.setTexture(testParam.outputTexture, index: 1)
+        encoder.setBuffer(newScale, offset: 0, index: 0)
+        encoder.setBuffer(newBias, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
+        encoder.endEncoding()
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift
+//
+//  CNNConvKernel.swift
+//  paddle-mobile
+//
+import Foundation
+import Metal
+import Accelerate
+import MetalPerformanceShaders
+@available(iOS 10.0, *)
+class WeightsDataSource: NSObject, MPSCNNConvolutionDataSource  {
+    let desc: MPSCNNConvolutionDescriptor
+    let weight:UnsafeMutableRawPointer
+    let bias:UnsafeMutablePointer<Float>
+    init(inDesc: MPSCNNConvolutionDescriptor, inWeight: UnsafeMutableRawPointer, inBias: UnsafeMutablePointer<Float>) {
+        desc = inDesc
+        weight = inWeight
+        bias = inBias
+    }
+    func dataType() -> MPSDataType {
+        return .float32
+    }
+    func descriptor() -> MPSCNNConvolutionDescriptor {
+        return desc
+    }
+    func weights() -> UnsafeMutableRawPointer {
+        return self.weight
+    }
+    func biasTerms() -> UnsafeMutablePointer<Float>? {
+        return self.bias
+    }
+    func load() -> Bool {
+        return true
+    }
+    func purge() {
+    }
+    func label() -> String? {
+        return "Conv"
+    }
+}
+@available(iOS 10.0, *)
+class CNNConvParam<P: PrecisionType>: OpParam{
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try CNNConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try CNNConvParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try CNNConvParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try CNNConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try CNNConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            // 暂时不用关心
+            dilations = try CNNConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            // 暂时不用关心
+            groups = try CNNConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            variance = try CNNConvParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            // bias
+            y = try CNNConvParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    var input: Texture<P>
+    let variance: Tensor<ParamPrecisionType>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+@available(iOS 10.0, *)
+class CNNConvKernel<P: PrecisionType>: Kernel, Computable {
+    typealias ParamType = CNNConvParam<P>
+    var mpsImageCreator: MpsImageCreator<P>?
+    var activation:MPSCNNNeuron?
+    var conv:MPSCNNConvolution?
+    var weightDataSource:WeightsDataSource?
+    var param: CNNConvParam<P>?
+    var device: MTLDevice?
+    required init(device:MTLDevice, testParam:CNNMPSConvTestParam) {
+        self.device = device
+        let desc = MPSCNNConvolutionDescriptor(kernelWidth: testParam.filterSize.width, kernelHeight: testParam.filterSize.height, inputFeatureChannels: testParam.filterSize.channel, outputFeatureChannels: testParam.filterSize.channel, neuronFilter: activation)
+        desc.strideInPixelsX = Int(testParam.metalParam.offsetX)
+        desc.strideInPixelsY = Int(testParam.metalParam.offsetY)
+        weightDataSource = WeightsDataSource(inDesc: desc, inWeight:testParam.filterPointer, inBias:testParam.biasePointer)
+        if #available(iOS 11.0, *) {
+            conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
+        } else {
+            // Fallback on earlier versions
+        }
+        super.init(device: device, inFunctionName: "")
+    }
+    required init(device:MTLDevice, param:CNNConvParam<P>) {
+        self.device = device
+        let inChannels: Int
+        let outChannels: Int
+        if param.y.dim.cout() == 4 {
+            inChannels = (param.y.dim[3])
+            outChannels = inChannels
+        } else {
+            inChannels = 0
+            outChannels = inChannels
+        }
+        let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.width, kernelHeight: param.filter.height, inputFeatureChannels: inChannels, outputFeatureChannels: outChannels, neuronFilter: activation)
+        desc.strideInPixelsX = Int(param.stride[0])
+        desc.strideInPixelsY = Int(param.stride[1])
+        weightDataSource = WeightsDataSource(inDesc: desc, inWeight:param.filter.data.pointer as! UnsafeMutablePointer<Float>, inBias: param.y.data.pointer as! UnsafeMutablePointer<Float>)
+        if #available(iOS 11.0, *) {
+            conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
+        } else {
+            // Fallback on earlier versions
+        }
+        super.init(device: device, inFunctionName: "")
+    }
+    func compute(commandBuffer: MTLCommandBuffer, param: CNNConvParam<P>) throws {
+        let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
+        var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
+        // 运算conv和add两个步骤，add用了bias偏差做为参数，被Metal API进行调用
+        conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
+        param.input = outputImage.texture as! Texture<P>
+    }
+    func testCompute(commandBuffer: MTLCommandBuffer, testParam: CNNMPSConvTestParam) throws {
+        let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
+        var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
+        // 运算conv和add两个步骤，add用了bias偏差做为参数，被Metal API进行调用
+        conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
+        testParam.outputTexture = outputImage.texture
+    }
+}
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal
+//
+//  BatchNormRelu.metal
+//  paddle-mobile
+//
+#include <metal_stdlib>
+using namespace metal;
+struct MetalConvParam {
+    short offsetX;
+    short offsetY;
+    short offsetZ;
+    ushort strideX;
+    ushort strideY;
+};
+kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         const device float4 *new_scale [[buffer(0)]],
+                                         const device float4 *new_biase [[buffer(1)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    float4 input;
+    float4 output;
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    input = inTexture.sample(sample, gid.x, gid.y, gid.z);
+    output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}