From e32e90c32fe43373759c7e1108806a6263495943 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Wed, 5 Sep 2018 23:12:22 +0800 Subject: [PATCH] mps and uncomplete interface --- .../paddle-mobile-demo/Net/PaddleMobile.swift | 9 + .../Operators/CNNMPSConvOp.swift | 75 ++++++++ .../Kernels/BatchNormReluKernel.swift | 91 +++++++++ .../Operators/Kernels/CNNConvKernel.swift | 176 ++++++++++++++++++ .../Kernels/metal/BatchNormRelu.metal | 36 ++++ 5 files changed, 387 insertions(+) create mode 100644 metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift create mode 100644 metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift create mode 100644 metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormReluKernel.swift create mode 100644 metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift create mode 100644 metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift new file mode 100644 index 0000000000..a954328aca --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PaddleMobile.swift @@ -0,0 +1,9 @@ +// +// PaddleMobile.swift +// paddle-mobile-demo +// +// Created by liuRuiLong on 2018/9/5. +// Copyright © 2018年 orange. All rights reserved. +// + +import Foundation diff --git a/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift new file mode 100644 index 0000000000..8ba74a1c31 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/CNNMPSConvOp.swift @@ -0,0 +1,75 @@ +// +// CNNConvAddBatchNormReluOp.swift +// paddle-mobile + +import Foundation + +class CNNMPSConvTestParam: TestParam { + var outputTexture: MTLTexture? + var metalParam: MetalConvParam + let filterPointer: UnsafeMutableRawPointer + let biasePointer: UnsafeMutablePointer + let filterSize: (width: Int, height: Int, channel: Int) + init(inMetalParam: MetalConvParam, inFilter: [Float], inBiase: [Float], inFilterSize: (width: Int, height: Int, channel: Int)) { + metalParam = inMetalParam + filterPointer = UnsafeMutableRawPointer.init(mutating: inFilter) + biasePointer = UnsafeMutablePointer.init(mutating: inBiase) + filterSize = inFilterSize + } +} + +@available(iOS 10.0, *) +class CNNMPSConvOp: Operator, CNNConvParam

>, Runable, Creator, InferShaperable, Fusion { + + typealias OpType = CNNMPSConvOp

+ + required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws { + fatalError() + } + + func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + + func delogOutput() { + } + + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode-->Node.init(inType: gElementwiseAdd); + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gMPSCNNConvType + } + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0..: BatchNormParam

{ + +} + +class BatchNormReluKernel: Kernel, Computable{ + + + typealias ParamType = BatchNormReluParam

+ var newScale: MTLBuffer + var newBias: MTLBuffer + + required init(device: MTLDevice, testParam: BatchNormReluTestParam) { + + newScale = testParam.newScaleBuffer + newBias = testParam.newBiaseBuffer + + super.init(device: device, inFunctionName: "batch_norm_relu_3x3") + } + + required init(device: MTLDevice, param: BatchNormReluParam

) { + guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else { + fatalError() + } + guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else { + fatalError() + } + self.newScale = newScale + self.newBias = newBias + + super.init(device: device, inFunctionName: "batch_norm_relu_3x3") + + + let varianceBuffer : MTLBuffer = param.inputVariance.buffer + + var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length) + let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self) + for i in 0..<(varianceBuffer.length / MemoryLayout

.stride) { + invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot() + } + + let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self) + let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self) + let scale : MTLBuffer = param.inputScale.buffer + let scaleContents = scale.contents().assumingMemoryBound(to: P.self) + let bias : MTLBuffer = param.inputBias.buffer + let biasContents = bias.contents().assumingMemoryBound(to: P.self) + let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self) + + for i in 0..<(newScale.length / MemoryLayout

.stride) { + newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i])) + newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i])) + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: BatchNormReluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + encoder.setTexture(param.input as? MTLTexture, index: 0) + encoder.setTexture(param.output as? MTLTexture, index: 1) + encoder.setBuffer(newScale, offset: 0, index: 1) + encoder.setBuffer(newBias, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output as! MTLTexture) + encoder.endEncoding() + } + + func testCompute(commandBuffer: MTLCommandBuffer, testParam: BatchNormReluTestParam) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + fatalError() + } + encoder.setTexture(testParam.inputTexture, index: 0) + encoder.setTexture(testParam.outputTexture, index: 1) + encoder.setBuffer(newScale, offset: 0, index: 0) + encoder.setBuffer(newBias, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture) + encoder.endEncoding() + } + + +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift new file mode 100644 index 0000000000..14a5bd5214 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/CNNConvKernel.swift @@ -0,0 +1,176 @@ +// +// CNNConvKernel.swift +// paddle-mobile +// + +import Foundation +import Metal +import Accelerate +import MetalPerformanceShaders + +@available(iOS 10.0, *) +class WeightsDataSource: NSObject, MPSCNNConvolutionDataSource { + + let desc: MPSCNNConvolutionDescriptor + let weight:UnsafeMutableRawPointer + let bias:UnsafeMutablePointer + + + + init(inDesc: MPSCNNConvolutionDescriptor, inWeight: UnsafeMutableRawPointer, inBias: UnsafeMutablePointer) { + desc = inDesc + weight = inWeight + bias = inBias + } + + + func dataType() -> MPSDataType { + return .float32 + } + + func descriptor() -> MPSCNNConvolutionDescriptor { + return desc + } + + func weights() -> UnsafeMutableRawPointer { + return self.weight + } + + func biasTerms() -> UnsafeMutablePointer? { + return self.bias + } + + func load() -> Bool { + return true + } + + func purge() { + } + + func label() -> String? { + return "Conv" + } + + +} + +@available(iOS 10.0, *) +class CNNConvParam: OpParam{ + + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try CNNConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try CNNConvParam.input(inputs: opDesc.inputs, from: inScope) + output = try CNNConvParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try CNNConvParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try CNNConvParam.getAttr(key: "paddings", attrs: opDesc.attrs) + // 暂时不用关心 + dilations = try CNNConvParam.getAttr(key: "dilations", attrs: opDesc.attrs) + // 暂时不用关心 + groups = try CNNConvParam.getAttr(key: "groups", attrs: opDesc.attrs) + + variance = try CNNConvParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) + // bias + y = try CNNConvParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error + } + } + + var input: Texture

+ let variance: Tensor + let y: Tensor + let filter: Tensor + var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int +} + +@available(iOS 10.0, *) +class CNNConvKernel: Kernel, Computable { + + typealias ParamType = CNNConvParam

+ + var mpsImageCreator: MpsImageCreator

? + var activation:MPSCNNNeuron? + var conv:MPSCNNConvolution? + var weightDataSource:WeightsDataSource? + var param: CNNConvParam

? + var device: MTLDevice? + + + required init(device:MTLDevice, testParam:CNNMPSConvTestParam) { + self.device = device + + let desc = MPSCNNConvolutionDescriptor(kernelWidth: testParam.filterSize.width, kernelHeight: testParam.filterSize.height, inputFeatureChannels: testParam.filterSize.channel, outputFeatureChannels: testParam.filterSize.channel, neuronFilter: activation) + + desc.strideInPixelsX = Int(testParam.metalParam.offsetX) + desc.strideInPixelsY = Int(testParam.metalParam.offsetY) + + + weightDataSource = WeightsDataSource(inDesc: desc, inWeight:testParam.filterPointer, inBias:testParam.biasePointer) + + if #available(iOS 11.0, *) { + conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!) + } else { + // Fallback on earlier versions + } + + super.init(device: device, inFunctionName: "") + } + + required init(device:MTLDevice, param:CNNConvParam

) { + + self.device = device + + let inChannels: Int + let outChannels: Int + + if param.y.dim.cout() == 4 { + inChannels = (param.y.dim[3]) + outChannels = inChannels + } else { + inChannels = 0 + outChannels = inChannels + } + + let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.width, kernelHeight: param.filter.height, inputFeatureChannels: inChannels, outputFeatureChannels: outChannels, neuronFilter: activation) + + desc.strideInPixelsX = Int(param.stride[0]) + desc.strideInPixelsY = Int(param.stride[1]) + + + weightDataSource = WeightsDataSource(inDesc: desc, inWeight:param.filter.data.pointer as! UnsafeMutablePointer, inBias: param.y.data.pointer as! UnsafeMutablePointer) + + if #available(iOS 11.0, *) { + conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!) + } else { + // Fallback on earlier versions + } + + super.init(device: device, inFunctionName: "") + } + + func compute(commandBuffer: MTLCommandBuffer, param: CNNConvParam

) throws { + let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))! + var outputImage = (mpsImageCreator?.createMPSImage(device: device!))! + + // 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用 + conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage) + + param.input = outputImage.texture as! Texture

+ } + + func testCompute(commandBuffer: MTLCommandBuffer, testParam: CNNMPSConvTestParam) throws { + let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))! + var outputImage = (mpsImageCreator?.createMPSImage(device: device!))! + + // 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用 + conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage) + + testParam.outputTexture = outputImage.texture + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal new file mode 100644 index 0000000000..eb94408c8a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormRelu.metal @@ -0,0 +1,36 @@ +// +// BatchNormRelu.metal +// paddle-mobile +// + +#include +using namespace metal; + +struct MetalConvParam { + short offsetX; + short offsetY; + short offsetZ; + ushort strideX; + ushort strideY; +}; + +kernel void batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 *new_scale [[buffer(0)]], + const device float4 *new_biase [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + float4 input; + float4 output; + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + input = inTexture.sample(sample, gid.x, gid.y, gid.z); + output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0); + outTexture.write(output, gid.xy, gid.z); + +} -- GitLab