提交 e32e90c3 编写于 作者: L liuruilong

mps and uncomplete interface

上级 b378e1c2
//
// PaddleMobile.swift
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/9/5.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
//
// CNNConvAddBatchNormReluOp.swift
// paddle-mobile
import Foundation
class CNNMPSConvTestParam: TestParam {
var outputTexture: MTLTexture?
var metalParam: MetalConvParam
let filterPointer: UnsafeMutableRawPointer
let biasePointer: UnsafeMutablePointer<Float>
let filterSize: (width: Int, height: Int, channel: Int)
init(inMetalParam: MetalConvParam, inFilter: [Float], inBiase: [Float], inFilterSize: (width: Int, height: Int, channel: Int)) {
metalParam = inMetalParam
filterPointer = UnsafeMutableRawPointer.init(mutating: inFilter)
biasePointer = UnsafeMutablePointer.init(mutating: inBiase)
filterSize = inFilterSize
}
}
@available(iOS 10.0, *)
class CNNMPSConvOp<P: PrecisionType>: Operator<CNNConvKernel<P>, CNNConvParam<P>>, Runable, Creator, InferShaperable, Fusion {
typealias OpType = CNNMPSConvOp<P>
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
fatalError()
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
}
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode-->Node.init(inType: gElementwiseAdd);
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gMPSCNNConvType
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
}
//
// BatchNormRelu.swift
// paddle-mobile
//
// Created by zhangxinjun on 2018/8/23.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
class BatchNormReluParam<P: PrecisionType>: BatchNormParam<P> {
}
class BatchNormReluKernel<P: PrecisionType>: Kernel, Computable{
typealias ParamType = BatchNormReluParam<P>
var newScale: MTLBuffer
var newBias: MTLBuffer
required init(device: MTLDevice, testParam: BatchNormReluTestParam) {
newScale = testParam.newScaleBuffer
newBias = testParam.newBiaseBuffer
super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
}
required init(device: MTLDevice, param: BatchNormReluParam<P>) {
guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
fatalError()
}
guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
fatalError()
}
self.newScale = newScale
self.newBias = newBias
super.init(device: device, inFunctionName: "batch_norm_relu_3x3")
let varianceBuffer : MTLBuffer = param.inputVariance.buffer
var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
}
let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
let scale : MTLBuffer = param.inputScale.buffer
let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
let bias : MTLBuffer = param.inputBias.buffer
let biasContents = bias.contents().assumingMemoryBound(to: P.self)
let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
}
}
func compute(commandBuffer: MTLCommandBuffer, param: BatchNormReluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(param.input as? MTLTexture, index: 0)
encoder.setTexture(param.output as? MTLTexture, index: 1)
encoder.setBuffer(newScale, offset: 0, index: 1)
encoder.setBuffer(newBias, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output as! MTLTexture)
encoder.endEncoding()
}
func testCompute(commandBuffer: MTLCommandBuffer, testParam: BatchNormReluTestParam) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
fatalError()
}
encoder.setTexture(testParam.inputTexture, index: 0)
encoder.setTexture(testParam.outputTexture, index: 1)
encoder.setBuffer(newScale, offset: 0, index: 0)
encoder.setBuffer(newBias, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: testParam.outputTexture)
encoder.endEncoding()
}
}
//
// CNNConvKernel.swift
// paddle-mobile
//
import Foundation
import Metal
import Accelerate
import MetalPerformanceShaders
@available(iOS 10.0, *)
class WeightsDataSource: NSObject, MPSCNNConvolutionDataSource {
let desc: MPSCNNConvolutionDescriptor
let weight:UnsafeMutableRawPointer
let bias:UnsafeMutablePointer<Float>
init(inDesc: MPSCNNConvolutionDescriptor, inWeight: UnsafeMutableRawPointer, inBias: UnsafeMutablePointer<Float>) {
desc = inDesc
weight = inWeight
bias = inBias
}
func dataType() -> MPSDataType {
return .float32
}
func descriptor() -> MPSCNNConvolutionDescriptor {
return desc
}
func weights() -> UnsafeMutableRawPointer {
return self.weight
}
func biasTerms() -> UnsafeMutablePointer<Float>? {
return self.bias
}
func load() -> Bool {
return true
}
func purge() {
}
func label() -> String? {
return "Conv"
}
}
@available(iOS 10.0, *)
class CNNConvParam<P: PrecisionType>: OpParam{
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try CNNConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try CNNConvParam.input(inputs: opDesc.inputs, from: inScope)
output = try CNNConvParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try CNNConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try CNNConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
// 暂时不用关心
dilations = try CNNConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
// 暂时不用关心
groups = try CNNConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try CNNConvParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
// bias
y = try CNNConvParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
var input: Texture<P>
let variance: Tensor<ParamPrecisionType>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
@available(iOS 10.0, *)
class CNNConvKernel<P: PrecisionType>: Kernel, Computable {
typealias ParamType = CNNConvParam<P>
var mpsImageCreator: MpsImageCreator<P>?
var activation:MPSCNNNeuron?
var conv:MPSCNNConvolution?
var weightDataSource:WeightsDataSource?
var param: CNNConvParam<P>?
var device: MTLDevice?
required init(device:MTLDevice, testParam:CNNMPSConvTestParam) {
self.device = device
let desc = MPSCNNConvolutionDescriptor(kernelWidth: testParam.filterSize.width, kernelHeight: testParam.filterSize.height, inputFeatureChannels: testParam.filterSize.channel, outputFeatureChannels: testParam.filterSize.channel, neuronFilter: activation)
desc.strideInPixelsX = Int(testParam.metalParam.offsetX)
desc.strideInPixelsY = Int(testParam.metalParam.offsetY)
weightDataSource = WeightsDataSource(inDesc: desc, inWeight:testParam.filterPointer, inBias:testParam.biasePointer)
if #available(iOS 11.0, *) {
conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
} else {
// Fallback on earlier versions
}
super.init(device: device, inFunctionName: "")
}
required init(device:MTLDevice, param:CNNConvParam<P>) {
self.device = device
let inChannels: Int
let outChannels: Int
if param.y.dim.cout() == 4 {
inChannels = (param.y.dim[3])
outChannels = inChannels
} else {
inChannels = 0
outChannels = inChannels
}
let desc = MPSCNNConvolutionDescriptor(kernelWidth: param.filter.width, kernelHeight: param.filter.height, inputFeatureChannels: inChannels, outputFeatureChannels: outChannels, neuronFilter: activation)
desc.strideInPixelsX = Int(param.stride[0])
desc.strideInPixelsY = Int(param.stride[1])
weightDataSource = WeightsDataSource(inDesc: desc, inWeight:param.filter.data.pointer as! UnsafeMutablePointer<Float>, inBias: param.y.data.pointer as! UnsafeMutablePointer<Float>)
if #available(iOS 11.0, *) {
conv = MPSCNNConvolution(device: self.device!, weights: weightDataSource!)
} else {
// Fallback on earlier versions
}
super.init(device: device, inFunctionName: "")
}
func compute(commandBuffer: MTLCommandBuffer, param: CNNConvParam<P>) throws {
let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
// 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用
conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
param.input = outputImage.texture as! Texture<P>
}
func testCompute(commandBuffer: MTLCommandBuffer, testParam: CNNMPSConvTestParam) throws {
let inputImage:MPSImage = (mpsImageCreator?.createMPSImage(device: device!))!
var outputImage = (mpsImageCreator?.createMPSImage(device: device!))!
// 运算conv和add两个步骤,add用了bias偏差做为参数,被Metal API进行调用
conv?.encode(commandBuffer: commandBuffer, sourceImage: inputImage, destinationImage: outputImage)
testParam.outputTexture = outputImage.texture
}
}
//
// BatchNormRelu.metal
// paddle-mobile
//
#include <metal_stdlib>
using namespace metal;
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
};
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
float4 input;
float4 output;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册