diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj index 64e83351b8ca195b42f0a0b32b7537f8e6ba5bdb..776c7d992c0e28844375dad6210921a91fefc6af 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj @@ -14,6 +14,8 @@ FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; }; FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; }; FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; }; + FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; }; + FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; }; FC8CFEE62135452C0094D569 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEE42135452B0094D569 /* genet_params */; }; FC8CFEE72135452C0094D569 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEE52135452B0094D569 /* genet_model */; }; FC8CFEF8213551D10094D569 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEF6213551D00094D569 /* params */; }; @@ -61,6 +63,8 @@ FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = ""; }; FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; }; + FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = ""; }; + FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = ""; }; FC8CFEE42135452B0094D569 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = ""; }; FC8CFEE52135452B0094D569 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = ""; }; FC8CFEF6213551D00094D569 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = ""; }; @@ -132,6 +136,7 @@ FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = { isa = PBXGroup; children = ( + FC803BCA214D27920094B8E5 /* VideoCapture */, FC8CFED2213519540094D569 /* Net */, FC0E2C2020EDC03B009C1FAC /* models */, FC0E2C1D20EDC030009C1FAC /* images */, @@ -172,6 +177,15 @@ path = ../../models; sourceTree = ""; }; + FC803BCA214D27920094B8E5 /* VideoCapture */ = { + isa = PBXGroup; + children = ( + FC803BCB214D27920094B8E5 /* FPSCounter.swift */, + FC803BCC214D27920094B8E5 /* VideoCapture.swift */, + ); + path = VideoCapture; + sourceTree = ""; + }; FC8CFED2213519540094D569 /* Net */ = { isa = PBXGroup; children = ( @@ -345,9 +359,11 @@ buildActionMask = 2147483647; files = ( FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */, + FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */, FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */, FCF437E8214B6DDB00943429 /* Multi-Predict-ViewController.swift in Sources */, FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */, + FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */, FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; @@ -499,7 +515,7 @@ DEVELOPMENT_TEAM = A798K58VVL; ENABLE_BITCODE = NO; INFOPLIST_FILE = "paddle-mobile-demo/Info.plist"; - IPHONEOS_DEPLOYMENT_TARGET = 9.0; + IPHONEOS_DEPLOYMENT_TARGET = 10.0; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", @@ -526,7 +542,7 @@ DEVELOPMENT_TEAM = A798K58VVL; ENABLE_BITCODE = NO; INFOPLIST_FILE = "paddle-mobile-demo/Info.plist"; - IPHONEOS_DEPLOYMENT_TARGET = 9.0; + IPHONEOS_DEPLOYMENT_TARGET = 10.0; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/Frameworks", diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard index 2a6d748bbe43327d6c4155ff42b5821a4c601210..b7cc74593fc80209efa55355c8f0ea10d1c05222 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard @@ -37,7 +37,7 @@ - + @@ -48,7 +48,7 @@ - + + @@ -187,11 +192,12 @@ - + + @@ -203,10 +209,12 @@ + + @@ -223,11 +231,12 @@ + - + diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Multi-Predict-ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/Multi-Predict-ViewController.swift index 2ffdd0f17edfdfdcbbd230493fccfab04cb6001b..6948340a1873f86bd3ac76e0097974a9454d4528 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/Multi-Predict-ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Multi-Predict-ViewController.swift @@ -14,11 +14,53 @@ class Multi_Predict_ViewController: UIViewController { var runner2: Runner! override func viewDidLoad() { super.viewDidLoad() -// let net = MobileNet_ssd_hand.init(device: MetalHelper.shared.device) -// runner1 = Runner.init(inNet: <#T##Net#>, commandQueue: <#T##MTLCommandQueue?#>, inPlatform: <#T##Platform#>) + let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device) + let genet = Genet.init(device: MetalHelper.shared.device) + runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) + let queue2 = MetalHelper.shared.device.makeCommandQueue() + + runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) + + + + + + } @IBAction func predictAct(_ sender: Any) { - + let success = self.runner2.load() +// DispatchQueue.global().async { + let image1 = UIImage.init(named: "hand.jpg") +// let success = self.runner2.load() +// if success { +// for i in 0..<10000 { +// print(i) +// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in +// print("result1: ") +//// print(res) +// }) +// } +// } else { +// print("load failed") +// } +// self.runner1.clear() +// } +// return +// DispatchQueue.global().async { +//// sleep(1) +// let image1 = UIImage.init(named: "banana.jpeg") +//// if success { +// for _ in 0..<10 { +// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in +// print("result2: ") +// print(res) +// }) +// } +//// } else { +//// print("load failed") +//// } +//// self.runner2.clear() +// } } } diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift new file mode 100644 index 0000000000000000000000000000000000000000..f9e841f9c2a3060e775726023b6d5cfc3eeb679d --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/FPSCounter.swift @@ -0,0 +1,31 @@ + + +import Foundation +import QuartzCore + +public class FPSCounter { + private(set) public var fps: Double = 0 + + var frames = 0 + var startTime: CFTimeInterval = 0 + + public func start() { + frames = 0 + startTime = CACurrentMediaTime() + } + + public func frameCompleted() { + frames += 1 + let now = CACurrentMediaTime() + let elapsed = now - startTime + if elapsed > 0.1 { + let current = Double(frames) / elapsed + let smoothing = 0.75 + fps = smoothing*fps + (1 - smoothing)*current + if elapsed > 1 { + frames = 0 + startTime = CACurrentMediaTime() + } + } + } +} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift new file mode 100644 index 0000000000000000000000000000000000000000..5bbd33927a6a76fd3dfc5fb54c6a876663ffba33 --- /dev/null +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/VideoCapture/VideoCapture.swift @@ -0,0 +1,215 @@ + +import UIKit +import Metal +import CoreVideo +import AVFoundation + +@available(iOS 10.0, *) +@objc public protocol VideoCaptureDelegate: NSObjectProtocol { + @objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime) + @objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) + @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?) + @objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?) +} + +/** + Simple interface to the iPhone's camera. +*/ +@available(iOS 10.0, *) +public class VideoCapture: NSObject { + public var previewLayer: AVCaptureVideoPreviewLayer? + public weak var delegate: VideoCaptureDelegate? + public var fps = -1 + private let device: MTLDevice? + private let videoOrientation: AVCaptureVideoOrientation + private var textureCache: CVMetalTextureCache? + private let captureSession = AVCaptureSession() + private let videoOutput = AVCaptureVideoDataOutput() + private let photoOutput = AVCapturePhotoOutput() + private let queue = DispatchQueue(label: "net.machinethink.camera-queue") + private var lastTimestamp = CMTime() + private let cameraPosition: AVCaptureDevice.Position + public init(device: MTLDevice? = nil, orientation: AVCaptureVideoOrientation = .portrait, position: AVCaptureDevice.Position = .back) { + self.device = device + self.videoOrientation = orientation + self.cameraPosition = position + super.init() + } + + public func setUp(sessionPreset: AVCaptureSession.Preset = .medium, + completion: @escaping (Bool) -> Void) { + queue.async { + let success = self.setUpCamera(sessionPreset: sessionPreset) + DispatchQueue.main.async { + completion(success) + } + } + } + + func fontCamera() -> AVCaptureDevice? { + let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices + return deveices.first + + } + + func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool { + if let inDevice = device{ + guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, inDevice, nil, &textureCache) == kCVReturnSuccess else { + print("Error: could not create a texture cache") + return false + } + } + + captureSession.beginConfiguration() + captureSession.sessionPreset = sessionPreset + + var oCaptureDevice: AVCaptureDevice? + switch cameraPosition { + case .back: + oCaptureDevice = AVCaptureDevice.default(for: AVMediaType.video) + break + case .front: + oCaptureDevice = fontCamera() + break + default: + break + } + + guard let captureDevice = oCaptureDevice else { + print("Error: no video devices available") + return false + } + + guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else { + print("Error: could not create AVCaptureDeviceInput") + return false + } + + if captureSession.canAddInput(videoInput) { + captureSession.addInput(videoInput) + } + + let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession) + previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect + previewLayer.connection?.videoOrientation = self.videoOrientation + self.previewLayer = previewLayer + + let settings: [String : Any] = [ + kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA) + ] + + videoOutput.videoSettings = settings + videoOutput.alwaysDiscardsLateVideoFrames = true + videoOutput.setSampleBufferDelegate(self, queue: queue) + if captureSession.canAddOutput(videoOutput) { + captureSession.addOutput(videoOutput) + } + + // We want the buffers to be in portrait orientation otherwise they are + // rotated by 90 degrees. Need to set this _after_ addOutput()! + videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation + + if captureSession.canAddOutput(photoOutput) { + captureSession.addOutput(photoOutput) + } + + captureSession.commitConfiguration() + return true + } + + public func start() { + if !captureSession.isRunning { + captureSession.startRunning() + } + } + + public func stop() { + if captureSession.isRunning { + captureSession.stopRunning() + } + } + + /* Captures a single frame of the camera input. */ + public func capturePhoto() { + let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)]) + settings.previewPhotoFormat = [ + kCVPixelBufferPixelFormatTypeKey as String: settings.__availablePreviewPhotoPixelFormatTypes[0], + kCVPixelBufferWidthKey as String: 480, + kCVPixelBufferHeightKey as String: 360, + ] + photoOutput.capturePhoto(with: settings, delegate: self) + } + + func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? { + if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + let width = CVPixelBufferGetWidth(imageBuffer) + let height = CVPixelBufferGetHeight(imageBuffer) + var texture: CVMetalTexture? + CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache, imageBuffer, nil, .bgra8Unorm, width, height, 0, &texture) + if let texture = texture { + return CVMetalTextureGetTexture(texture) + } + } + return nil + } + + func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? { + if let sampleBuffer = sampleBuffer, + let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { + let width = CVPixelBufferGetWidth(imageBuffer) + let height = CVPixelBufferGetHeight(imageBuffer) + let rect = CGRect(x: 0, y: 0, width: CGFloat(width), height: CGFloat(height)) + let ciImage = CIImage(cvPixelBuffer: imageBuffer) + let ciContext = CIContext(options: nil) + if let cgImage = ciContext.createCGImage(ciImage, from: rect) { + return UIImage(cgImage: cgImage) + } + } + return nil + } +} + +extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate { + public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { + // Because lowering the capture device's FPS looks ugly in the preview, + // we capture at full speed but only call the delegate at its desired + // framerate. If `fps` is -1, we run at the full framerate. + let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer) + let deltaTime = timestamp - lastTimestamp + if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) { + lastTimestamp = timestamp + self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp) + if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{ + let texture = convertToMTLTexture(sampleBuffer: sampleBuffer) + delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp) + } + } + } + + public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { + print("dropped frame") + } +} + +extension VideoCapture: AVCapturePhotoCaptureDelegate { + public func photoOutput(_ captureOutput: AVCapturePhotoOutput, + didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?, + previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?, + resolvedSettings: AVCaptureResolvedPhotoSettings, + bracketSettings: AVCaptureBracketedStillImageSettings?, + error: Error?) { + var imageTexture: MTLTexture? + var previewImage: UIImage? + if error == nil { + if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{ + imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer) + self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture) + } + + if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{ + previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer) + self.delegate?.videoCapture?(self, didCapturePhoto: previewImage) + } + } + } +} diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift index 3bc9e1668750af53d53bb14fd05c363c96e924c0..14b4920899560a1e41e6a75d572431370d9af7e4 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift @@ -14,13 +14,15 @@ import UIKit import MetalKit +import CoreMedia import paddle_mobile import MetalPerformanceShaders let platform: Platform = .GPU let threadSupport = [1] -let modelHelperMap: [SupportModel : Runner] = [.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), +//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), +let modelHelperMap: [SupportModel : Runner] = [ .genet : Runner.init(inNet: Genet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), .mobilenet_ssd_ar : Runner.init(inNet: MobileNet_ssd_AR.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)] //, .genet : Genet.init() @@ -28,13 +30,14 @@ let modelHelperMap: [SupportModel : Runner] = [.mobilenet_ssd : Runner.init(inNe enum SupportModel: String{ // case mobilenet = "mobilenet" - case mobilenet_ssd = "mobilenetssd" +// case mobilenet_ssd = "mobilenetssd" case genet = "genet" case mobilenet_ssd_ar = "mobilenetssd_ar" static func supportedModels() -> [SupportModel] { - //.mobilenet, - return [.mobilenet_ssd, .genet, .mobilenet_ssd_ar] + // .mobilenet, + // .mobilenet_ssd, + return [.genet, .mobilenet_ssd_ar] } } @@ -44,14 +47,15 @@ class ViewController: UIViewController { @IBOutlet weak var elapsedTimeLabel: UILabel! @IBOutlet weak var modelPickerView: UIPickerView! @IBOutlet weak var threadPickerView: UIPickerView! - + @IBOutlet weak var videoView: UIView! + var videoCapture: VideoCapture! + var selectImage: UIImage? var inputPointer: UnsafeMutablePointer? var modelType: SupportModel = SupportModel.supportedModels()[0] var toPredictTexture: MTLTexture? var runner: Runner { - get { return modelHelperMap[modelType] ?! " has no this type " } @@ -81,7 +85,7 @@ class ViewController: UIViewController { } @IBAction func predictAct(_ sender: Any) { - let max = 1 + let max = 50 switch platform { case .GPU: guard let inTexture = toPredictTexture else { @@ -91,7 +95,7 @@ class ViewController: UIViewController { let startDate = Date.init() for i in 0.. + +@interface CPUResult: NSObject +@property (assign, nonatomic) float *output; +@property (assign, nonatomic) int outputSize; +@end + @interface NMSCompute: NSObject @property (assign, nonatomic) float scoreThredshold; @@ -34,6 +40,6 @@ @property (strong, nonatomic) NSArray *bboxDim; --(NSArray *)computeWithScore:(float *)score andBBoxs:(float *)bbox; +-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox; @end diff --git a/metal/paddle-mobile/paddle-mobile/CPUCompute.mm b/metal/paddle-mobile/paddle-mobile/CPUCompute.mm index be13105cb48ada269896f5dfde0df19b28a218d0..b97153765b46bb63d604d8845eee08d91283481d 100644 --- a/metal/paddle-mobile/paddle-mobile/CPUCompute.mm +++ b/metal/paddle-mobile/paddle-mobile/CPUCompute.mm @@ -21,6 +21,8 @@ #import + + struct NMSParam { float *score_data; @@ -282,9 +284,12 @@ void MultiClassNMSCompute(NMSParam *param) { param->output_size = output_size; } +@implementation CPUResult +@end + @implementation NMSCompute --(NSArray *)computeWithScore:(float *)score andBBoxs:(float *)bbox { +-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox { NMSParam param; param.box_data = bbox; param.score_data = score; @@ -306,12 +311,10 @@ void MultiClassNMSCompute(NMSParam *param) { } param.box_dim = box_dim; MultiClassNMSCompute(¶m); - NSMutableArray *output = [NSMutableArray arrayWithCapacity:param.output_size]; - for (int i = 0; i < param.output_size; ++i) { - [output addObject:[NSNumber numberWithFloat:param.output[i]]]; - } - delete param.output; - return output; + CPUResult *cr = [[CPUResult alloc] init]; + cr.output = param.output; + cr.outputSize = param.output_size; + return cr; } @end diff --git a/metal/paddle-mobile/paddle-mobile/Common/Types.swift b/metal/paddle-mobile/paddle-mobile/Common/Types.swift index 9e5a66dbd2f0ace1f4f727fffad6c94b9061b457..a1197ed2188a263af3c0819fec09b584af501dd3 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/Types.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/Types.swift @@ -243,7 +243,7 @@ extension Tensor: Variant { extension Texture: Variant { } -extension ResultHolder: Variant { +extension GPUResultHolder: Variant { } extension InputTexture: Variant { @@ -252,3 +252,43 @@ extension InputTexture: Variant { extension MTLTexture where Self: Variant { } + +class FetchHolder: Variant { + var resultBuffer: MTLBuffer? + var dim: [Int] + var capacity: Int + + init(inCapacity: Int, inDim: [Int]) { + capacity = inCapacity + dim = inDim + } + + func initBuffer(device: MTLDevice) { + resultBuffer = device.makeBuffer(length: capacity * 4, options: []) + } + + var result: UnsafeMutablePointer { + guard let inResultBuffer = resultBuffer else { + fatalError() + } + return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity) + } + +} + +extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible { + var description: String { + fatalError() +// return "\(result)" + } + + var debugDescription: String { + fatalError() +// return "\(result)" + } + + +} + + + diff --git a/metal/paddle-mobile/paddle-mobile/Genet.swift b/metal/paddle-mobile/paddle-mobile/Genet.swift index 40c190ef875f2fa559eec8c1999de98694d793e1..fba5b22ca6a5b132a9c62e2baf6d3d58dea12ced 100644 --- a/metal/paddle-mobile/paddle-mobile/Genet.swift +++ b/metal/paddle-mobile/paddle-mobile/Genet.swift @@ -34,8 +34,9 @@ public class Genet: Net { } } - override public func resultStr(res: [Float]) -> String { - return " \(Array(res.suffix(10))) ... " + override public func resultStr(res: ResultHolder) -> String { + fatalError() +// return " \(Array(res.suffix(10))) ... " } } diff --git a/metal/paddle-mobile/paddle-mobile/MobileNet.swift b/metal/paddle-mobile/paddle-mobile/MobileNet.swift index a383a75d7216b3c574d3bd881d3b63774b9e36b8..7d10a920d15e751f29fce7f9f6be71cd6a2d6b69 100644 --- a/metal/paddle-mobile/paddle-mobile/MobileNet.swift +++ b/metal/paddle-mobile/paddle-mobile/MobileNet.swift @@ -42,9 +42,12 @@ class MobileNet: Net{ let labels = PreWords.init(fileName: "synset") - override public func resultStr(res: [Float]) -> String { + override public func resultStr(res: ResultHolder) -> String { + guard let resPointer = res.result else { + fatalError() + } var s: [String] = [] - res.top(r: 5).enumerated().forEach{ + (0.. String { + override public func resultStr(res: ResultHolder) -> String { return " \(res)" } - override func fetchResult(paddleMobileRes: ResultHolder) -> [Float32] { + override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { - guard let interRes = paddleMobileRes.intermediateResults else { - fatalError(" need have inter result ") - } - - guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture else { - fatalError(" need score ") - } - - guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture else { - fatalError() - } - - var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) -// print("score: ") -// print(scoreFormatArr.strideArray()) +// guard let interRes = paddleMobileRes.intermediateResults else { +// fatalError(" need have inter result ") +// } // - var bboxArr = bbox.metalTexture.float32Array() -// print("bbox: ") -// print(bboxArr.strideArray()) - - let nmsCompute = NMSCompute.init() - nmsCompute.scoreThredshold = 0.01 - nmsCompute.nmsTopK = 400 - nmsCompute.keepTopK = 200 - nmsCompute.nmsEta = 1.0 - nmsCompute.nmsThreshold = 0.45 - nmsCompute.background_label = 0; - - nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])] - - nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])] - guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else { - fatalError( " result error " ) - } - - let output: [Float32] = result.map { $0.floatValue } - - - return output +// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture else { +// fatalError(" need score ") +// } +// +// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture else { +// fatalError() +// } +// +// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) +//// print("score: ") +//// print(scoreFormatArr.strideArray()) +//// +// var bboxArr = bbox.metalTexture.float32Array() +//// print("bbox: ") +//// print(bboxArr.strideArray()) +// +// let nmsCompute = NMSCompute.init() +// nmsCompute.scoreThredshold = 0.01 +// nmsCompute.nmsTopK = 400 +// nmsCompute.keepTopK = 200 +// nmsCompute.nmsEta = 1.0 +// nmsCompute.nmsThreshold = 0.45 +// nmsCompute.background_label = 0; +// +// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])] +// +// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])] +// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else { +// fatalError( " result error " ) +// } +// +// let output: [Float32] = result.map { $0.floatValue } +// +// +// return output + fatalError() } diff --git a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift index 7debb79c737e363940a735c4f74c7c9efbe58c0b..f90ba88d158538b473938cfee57bca89eec208ae 100644 --- a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift +++ b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift @@ -34,46 +34,52 @@ public class MobileNet_ssd_AR: Net{ } } - override public func resultStr(res: [Float]) -> String { + override public func resultStr(res: ResultHolder) -> String { return " \(res)" } - override func fetchResult(paddleMobileRes: ResultHolder) -> [Float32] { - + override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { guard let interRes = paddleMobileRes.intermediateResults else { fatalError(" need have inter result ") } - guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture else { + guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else { fatalError(" need score ") } - guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture else { + guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else { fatalError() } - var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3])) - // print("score: ") - // print(scoreFormatArr.strideArray()) - // - var bboxArr = bbox.metalTexture.float32Array() - // print("bbox: ") - // print(bboxArr.strideArray()) + let startDate = Date.init() + +// print("scoreFormatArr: ") +//print((0..? + public let capacity: Int + + init(inResult: UnsafeMutablePointer?, inCapacity: Int) { + result = inResult + capacity = inCapacity + } + + public func releasePointer() { + result?.deinitialize(count: capacity) + result?.deallocate() + } +} + +public class Net: NSObject { + var except: Int = 0 + var means: [Float] = [] + var scale: Float = 0.0 + var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0) + var preprocessKernel: CusomKernel? = nil + var paramPointer: UnsafeMutableRawPointer? = nil + var paramSize: Int = 0 + var modelPointer: UnsafeMutableRawPointer? = nil + var modelSize: Int = 0 + var modelPath: String = "" + var paramPath: String = "" + var modelDir: String = "" + + public func resultStr(res: ResultHolder) -> String { + fatalError() + } + + func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { + return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity) + } + + @objc public init(device: MTLDevice) { + super.init() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift index 68763feef8e347cdfa3b7be5096aadc67fb93084..af7dc47df2a68068d47f2172a59222cfa41dc904 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift @@ -64,7 +64,8 @@ class OpCreator { gBilinearInterpType : BilinearInterpOp

.creat, gSplit : SplitOp

.creat, gShape : ShapeOp

.creat, - gFlatten : FlattenOp

.creat] + gFlatten : FlattenOp

.creat, + gConvAddPreluType : ConvAddPreluOp

.creat] private init(){} } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift index dded09dae7985829c062eac67f9df47cbcbd6084..01b6692c6e168c4c8636248c232f94a598152f33 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift @@ -19,6 +19,12 @@ protocol Fusion { static func fusionNode() -> Node static func change() -> [String : [(from: String, to: String)]] static func fusionType() -> String + static func needCheck() -> [(Int, String)] +} +extension Fusion { + static func needCheck() -> [(Int, String)] { + return [] + } } protocol Runable { @@ -26,6 +32,7 @@ protocol Runable { func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws func delogOutput() func inputVariant() -> [String : [Variant]] + func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) } extension Runable where Self: OperatorProtocol{ @@ -38,11 +45,16 @@ extension Runable where Self: OperatorProtocol{ } func inputVariant() -> [String : [Variant]] { - return [:] -// fatalError(" op \(type) need implement inputVariant") +// return [:] + fatalError(" op \(type) need implement inputVariant") + } + + func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) { + fatalError(" need implement ") } func delogOutput() { + print(type + ": has no implementation" ) } } @@ -144,6 +156,7 @@ let gBilinearInterpType = "bilinear_interp" let gSplit = "split" let gShape = "shape" let gFlatten = "flatten" +let gConvAddPreluType = "conv_add_prelu" let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]), gBatchNormType : (inputs: ["X"], outputs: ["Y"]), @@ -169,5 +182,7 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out gBilinearInterpType : (inputs: ["X"], outputs: ["Out"]), gSplit : (inputs: ["X"], outputs: ["Out"]), gShape : (inputs: ["Input"], outputs: ["Out"]), - gFlatten : (inputs: ["X"], outputs: ["Out"]) + gFlatten : (inputs: ["X"], outputs: ["Out"]), + gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"]) + ] diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddPreluOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..0a0fcc7d7934e1c3c7a48f6925105b02ec6d8fc9 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddPreluOp.swift @@ -0,0 +1,101 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ConvAddPreluParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) + input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope) + output = try ConvAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope) + stride = try ConvAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs) + paddings = try ConvAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs) + dilations = try ConvAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs) + groups = try ConvAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs) + alpha = try ConvAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) + mode = try ConvAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs) + y = try ConvAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch let error { + throw error + } + } + + let input: Texture

+ let y: Tensor + let filter: Tensor + let mode: String + let alpha: Tensor

+ var output: Texture

+ let stride: [Int32] + let paddings: [Int32] + let dilations: [Int32] + let groups: Int +} + +class ConvAddPreluOp: Operator, ConvAddPreluParam

>, Runable, Creator, InferShaperable, Fusion{ + typealias OpType = ConvAddPreluOp

+ + static func fusionNode() -> Node { + let beginNode = Node.init(inType: gConvType) + _ = beginNode + --> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType) + return beginNode + } + + static func change() -> [String : [(from: String, to: String)]] { + return [:] + } + + static func fusionType() -> String { + return gConvAddPreluType + } + + func inferShape() { + let inDims = para.input.dim + let filterDim = para.filter.dim + let strides = para.stride + let paddings = para.paddings + let dilations = para.dilations + + var outDim = [inDims[0]] + for i in 0..: OpParam{ - var output: Texture

+ var output: FetchHolder let input: Texture

let scope: Scope required init(opDesc: OpDesc, inScope: Scope) throws { scope = inScope do { input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope) - output = input + output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims) + scope.setOutput(output: output) } catch let error { throw error } @@ -34,14 +35,40 @@ class FetchParam: OpParam{ class FetchKernel: Kernel, Computable { func compute(commandBuffer: MTLCommandBuffer, param: FetchParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture) + encoder.endEncoding() } required init(device: MTLDevice, param: FetchParam

) { - super.init(device: device, inFunctionName: "place_holder") + param.output.initBuffer(device: device) + if computePrecision == .Float16 { + if param.input.transpose == [0, 2, 3, 1] { + super.init(device: device, inFunctionName: "fetch_half") + } else { +// fatalError(" not support ") + super.init(device: device, inFunctionName: "fetch_placeholder_half") + print(" not support ") + } + } else if computePrecision == .Float32 { + if param.input.transpose == [0, 2, 3, 1] { + super.init(device: device, inFunctionName: "fetch") + } else { + print(" not support ") + super.init(device: device, inFunctionName: "fetch_placeholder") +// fatalError(" not support ") + } + } else { + fatalError(" not support ") + } } } -class FetchOp: Operator< FetchKernel

, FetchParam

>, Runable, Creator, InferShaperable{ +class FetchOp: Operator< FetchKernel

, FetchParam

>, Runable, Creator, InferShaperable { typealias OpType = FetchOp

@@ -50,7 +77,11 @@ class FetchOp: Operator< FetchKernel

, FetchParam

>, Runab } func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - scope.setOutput(output: para.output) + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..44369f22a9300bd0e5e6ac7c41b2f127bc5b5ff8 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddPreluKernel.swift @@ -0,0 +1,150 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class ConvAddPreluKernel: Kernel, Computable { + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvAddPreluParam

) { + param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) + param.filter.initBuffer(device: device, precision: computePrecision) + param.y.initBuffer(device: device, precision: computePrecision) + param.alpha.initBuffer(device: device, precision: computePrecision) + + if computePrecision == .Float16 { + if param.filter.width == 1 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half") + } + + } else if param.filter.channel == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half") + } + } else if param.filter.width == 3 && param.filter.height == 3 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half") + } + + } else if param.filter.width == 1 && param.filter.height == 5 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half") + } + } else if param.filter.width == 5 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half") + } else { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half") + } + } else { + fatalError(" unsupport yet ") + } + } else if computePrecision == .Float32 { + if param.filter.width == 1 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float") + } + } else if param.filter.channel == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float") + } + } else if param.filter.width == 3 && param.filter.height == 3 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float") + } + + } else if param.filter.width == 1 && param.filter.height == 5 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float") + } + } else if param.filter.width == 5 && param.filter.height == 1 { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float") + } else { + super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float") + } + } else { + fatalError(" unsupport yet ") + } + } else { + fatalError() + } + + let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1]) + + let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0]) + + // print(" function: \(functionName)") + // print("offset x: \(offsetX)") + // print("offset y: \(offsetY)") + + let offsetZ = 0.0 + let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1])) + // print("metal param: ") + // print(inMetalParam) + + metalParam = inMetalParam + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvAddPreluParam

) throws { +// guard let encoder = commandBuffer.makeComputeCommandEncoder() else { +// throw PaddleMobileError.predictError(message: " encode is nil") +// } +// +// encoder.setTexture(param.input.metalTexture, index: 0) +// encoder.setTexture(param.output.metalTexture, index: 1) +// encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) +// encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) +// encoder.setBuffer(param.y.buffer, offset: 0, index: 2) +// encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3) +// encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) +// encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift index 5ee4a5b31c5ee22cf28bf54fd8f7df13d14f9610..3f78efb89e47197ae0af6a1bb53955bc4a937eda 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/MulticlassNMSKernel.swift @@ -15,11 +15,41 @@ import Foundation class MulticlassNMSKernel: Kernel, Computable{ - + let pipline1: MTLComputePipelineState + required init(device: MTLDevice, param: MulticlassNMSParam

) { - super.init(device: device, inFunctionName: "place_holder") + + param.middleOutput.initBuffer(device: device) + param.bboxOutput.initBuffer(device: device) + if computePrecision == .Float32 { + pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true) + super.init(device: device, inFunctionName: "nms_fetch_result") + } else if computePrecision == .Float16 { + pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true) + super.init(device: device, inFunctionName: "nms_fetch_result_half") + } else { + fatalError( " unsupport precision " ) + } + } func compute(commandBuffer: MTLCommandBuffer, param: MulticlassNMSParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoder.setTexture(param.scores.metalTexture, index: 0) + encoder.setBuffer(param.middleOutput.resultBuffer!, offset: 0, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.scores.metalTexture) + encoder.endEncoding() + + guard let encoderBox = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + + encoderBox.setTexture(param.bboxes.metalTexture, index: 0) + encoderBox.setBuffer(param.bboxOutput.resultBuffer!, offset: 0, index: 0) + encoderBox.dispatch(computePipline: pipline1, outTexture: param.bboxes.metalTexture) + encoderBox.endEncoding() } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift index a353c535afcc73bdc0ebee10ae10b2ba93b8a93e..4114d3c3c62054235cd57fe37fe9cd83c5bb58cb 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift @@ -71,7 +71,6 @@ class ReshapeKernel: Kernel, Computable{ } func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam

) throws { - print("reshape compute") guard let encoder = commandBuffer.makeComputeCommandEncoder() else { throw PaddleMobileError.predictError(message: " encoder is nil") } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift index f64d71ff015f47e889728ce502470724a1d2cade..feb052a44fdc7c6134cc90f07f3fc94ad0a497df 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ShapeKernel.swift @@ -19,7 +19,7 @@ struct ShapeMetalParam { class ShapeKernel: Kernel, Computable{ func compute(commandBuffer: MTLCommandBuffer, param: ShapeParam

) throws { - print("shape compute") +// print("shape compute") // guard let encoder = commandBuffer.makeComputeCommandEncoder() else { // throw PaddleMobileError.predictError(message: " encode is nil") // } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift index b9917c643541a531146a55df9c455d78c08696eb..7b872283d45bca4adb5e90a531c936f2ad5534f8 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/TransposeKernel.swift @@ -74,4 +74,6 @@ class TransposeKernel: Kernel, Computable { encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.endEncoding() } + + } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal index ffa66212b16bb6c6180910cae2d0c34f8659c556..87b60a64fc48ab89af274e0b24897e0b411599e0 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddBNReluKernel.metal @@ -17,14 +17,15 @@ using namespace metal; -kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half4 *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { +kernel void conv_add_batch_norm_relu_1x1_half( + texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + const device half4 *new_scale [[buffer(3)]], + const device half4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() || @@ -41,7 +42,7 @@ kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half4 *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { +kernel void conv_add_batch_norm_relu_3x3_half( + texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half4 *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + const device half4 *new_scale [[buffer(3)]], + const device half4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() || @@ -86,7 +87,7 @@ kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device half *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device float4 *new_scale [[buffer(3)]], - const device float4 *new_biase [[buffer(4)]], - uint3 gid [[thread_position_in_grid]]) { +kernel void depthwise_conv_add_batch_norm_relu_3x3_half( + texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device half *weights [[buffer(1)]], + const device half4 *biase [[buffer(2)]], + const device half4 *new_scale [[buffer(3)]], + const device half4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() || @@ -138,7 +139,7 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + VECTOR(P, 4) input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i); + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + + output = output + float4(biase[gid.z]); + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 9; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + ushort dilation_x = param.dilationX; + ushort dilation_y = param.dilationY; + + VECTOR(P, 4) input[9]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i); + + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i); + + for (int j = 0; j < 9; ++j) { + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + output = output + float4(biase[gid.z]); + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(biase[gid.z]);; + + ushort dilation_y = param.dilationY; + VECTOR(P, 4) input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i); + + for (int j = 0; j < 5; ++j) { + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + + +kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device VECTOR(P, 4) *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + const uint kernelHXW = 5; + + uint input_arr_size = inTexture.get_array_size(); + + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(biase[gid.z]); + + ushort dilation_x = param.dilationX; + VECTOR(P, 4) input[5]; + + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i); + + input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i); + + input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + + input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i); + + input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i); + + for (int j = 0; j < 5; ++j) { + VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device P *weights [[buffer(1)]], + const device VECTOR(P, 4) *biase [[buffer(2)]], +#ifdef PRELU_CHANNEL + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_ELEMENT + const device VECTOR(P, 4) *alpha [[buffer(3)]], +#endif +#ifdef PRELU_OTHER + const device P *alpha [[buffer(3)]], +#endif + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(biase[gid.z]); + VECTOR(P, 4) inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + VECTOR(P, 4) input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + +#ifdef PRELU_CHANNEL + VECTOR(P, 4) alpha_value = alpha[gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_ELEMENT + int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size(); + VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z]; + output.x = output.x > 0 ? output.x : (alpha_value.x * output.x); + output.y = output.y > 0 ? output.y : (alpha_value.y * output.y); + output.z = output.z > 0 ? output.z : (alpha_value.z * output.z); + output.w = output.w > 0 ? output.w : (alpha_value.w * output.w); +#endif +#ifdef PRELU_OTHER + P alpha_value = alpha[0]; + output.x = output.x > 0 ? output.x : (alpha_value * output.x); + output.y = output.y > 0 ? output.y : (alpha_value * output.y); + output.z = output.z > 0 ? output.z : (alpha_value * output.z); + output.w = output.w > 0 ? output.w : (alpha_value * output.w); +#endif + outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z); +} + +#endif + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..f03a1d5b625cf01f1f1bc5ac23bebf7dabd968d9 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvAddPreluKernel.metal @@ -0,0 +1,65 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" +using namespace metal; + +#define P float + + #define PRELU_CHANNEL prelu_channel + #define PRELU_TYPE prelu_channel + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_CHANNEL + + #define PRELU_ELEMENT prelu_element + #define PRELU_TYPE prelu_element + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_ELEMENT + + #define PRELU_OTHER prelu_other + #define PRELU_TYPE prelu_other + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_OTHER + +#undef P + +#define P half + + #define PRELU_CHANNEL prelu_channel + #define PRELU_TYPE prelu_channel + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_CHANNEL + + #define PRELU_ELEMENT prelu_element + #define PRELU_TYPE prelu_element + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_ELEMENT + + #define PRELU_OTHER prelu_other + #define PRELU_TYPE prelu_other + #include "ConvAddPrelu.inc.metal" + #undef PRELU_TYPE + #undef PRELU_OTHER + +#undef P + + + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..c9d0624817d8508a3dae174e19a705b953d06101 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/FetchKernel.metal @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void fetch(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + int input_height = inTexture.get_height(); + const float4 input = inTexture.read(gid.xy, gid.z); + int output_to = 4 * input_width * input_height; + output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x; + output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y; + output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z; + output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w; +} + + +kernel void fetch_half(texture2d_array inTexture [[texture(0)]], + device float * output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + int input_height = inTexture.get_height(); + const half4 input = inTexture.read(gid.xy, gid.z); + int output_to = 4 * input_width * input_height; + output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x; + output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y; + output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z; + output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w; + +} + +kernel void fetch_placeholder(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + +} + +kernel void fetch_placeholder_half(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal new file mode 100644 index 0000000000000000000000000000000000000000..950d7d5f0555b841da57554ff61f2f5cdbcae7aa --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Macro.metal @@ -0,0 +1,29 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + +#define CONCAT2(a, b) a ## b +#define CONCAT2_(a, b) a ## _ ## b +#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c +#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d +#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e + +#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p) +#define VECTOR(p, n) CONCAT2(p, n) + +#define FUNC3_(a, b, c) CONCAT3_(a, b, c) + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..44c57440e1ec138717ad1bc569fd772e0d7ede1a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/NMSFetchResultKernel.metal @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void nms_fetch_result(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + const float4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = input.x; + +} + + +kernel void nms_fetch_result_half(texture2d_array inTexture [[texture(0)]], + device float *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); + const half4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = input.x; +} + +kernel void nms_fetch_bbox(texture2d_array inTexture [[texture(0)]], + device float4 *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); +// int input_height = inTexture.get_height(); + const float4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = input; +} + +kernel void nms_fetch_bbox_half(texture2d_array inTexture [[texture(0)]], + device float4 *output [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= inTexture.get_width() || + gid.y >= inTexture.get_height() || + gid.z >= inTexture.get_array_size()) { + return; + } + + int input_width = inTexture.get_width(); +// int input_height = inTexture.get_height(); + const half4 input = inTexture.read(gid.xy, gid.z); + output[gid.y * input_width + gid.x] = float4(input); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift index 6220a584e725042f454b02777a79caeaae2a52e1..bc7a8a64ab6a70ff8a14feb11cf7481d8f10cf7d 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/MulticlassNMSOp.swift @@ -21,10 +21,16 @@ class MulticlassNMSParam: OpParam { scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope) bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope) output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope) + + middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims) + + bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims) } catch let error { throw error } } + var bboxOutput: FetchHolder + var middleOutput: FetchHolder let scores: Texture

let bboxes: Texture

var output: Texture

@@ -33,7 +39,15 @@ class MulticlassNMSParam: OpParam { class MulticlassNMSOp: Operator, MulticlassNMSParam

>, Runable, Creator, InferShaperable{ func inputVariant() -> [String : [Variant]] { - return ["Scores" : [para.scores], "BBoxes" : [para.bboxes]] + return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]] + } + + func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let _ { + fatalError() + } } func inferShape() { @@ -42,11 +56,12 @@ class MulticlassNMSOp: Operator, Multic typealias OpType = MulticlassNMSOp

func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { - do { - try kernel.compute(commandBuffer: buffer, param: para) - } catch let error { - throw error - } + + } + + func delogOutput() { + print(" nms - output: ") + print(para.bboxes.metalTexture.float32Array().strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift index b43ea3742bc23d18b596d7f90935da840342dbfd..3e6b9f2e3d85ff235aa7a6f1c0cb13c519dc53b9 100644 --- a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift +++ b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift @@ -16,31 +16,13 @@ import Foundation class ScaleKernel: CusomKernel { init(device: MTLDevice, shape: Shape) { - super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false) - } -} - -public class Net: NSObject { - var except: Int = 0 - var means: [Float] = [] - var scale: Float = 0.0 - var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0) - var preprocessKernel: CusomKernel? = nil - var paramPointer: UnsafeMutableRawPointer? = nil - var paramSize: Int = 0 - var modelPointer: UnsafeMutableRawPointer? = nil - var modelSize: Int = 0 - var modelPath: String = "" - var paramPath: String = "" - var modelDir: String = "" - public func resultStr(res: [Float]) -> String { - fatalError() - } - func fetchResult(paddleMobileRes: ResultHolder) -> [Float32] { - return paddleMobileRes.resultArr - } - @objc public init(device: MTLDevice) { - super.init() + if computePrecision == .Float32 { + super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false) + } else if computePrecision == .Float16 { + super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false) + } else { + fatalError(" unsupport ") + } } } @@ -118,18 +100,18 @@ public class Runner: NSObject { * texture: 需要预测的 texture 需要做过预处理 * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组 */ - @objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) { + @objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) { do { try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in guard let SSelf = self else { fatalError( " self nil " ) } - let resultArray = SSelf.net.fetchResult(paddleMobileRes: res) - completion(true, resultArray) + let result = SSelf.net.fetchResult(paddleMobileRes: res) + completion(true, result) }, preProcessKernle: self.net.preprocessKernel, except: self.net.except) } catch let error { print(error) - completion(false, []) + completion(false, nil) return } } @@ -139,21 +121,21 @@ public class Runner: NSObject { * cgImage: 需要预测的图片 * ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组 */ - @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) { - if platform == .GPU { - getTexture(image: cgImage) { [weak self] (texture) in - guard let SSelf = self else { - fatalError( "" ) - } - SSelf.predict(texture: texture, completion: completion) - } - } else if platform == .CPU { - let input = preproccess(image: cgImage) - predict(inputPointer: input, completion: completion) - input.deinitialize(count: numel) - input.deallocate() - } - } +// @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) { +// if platform == .GPU { +// getTexture(image: cgImage) { [weak self] (texture) in +// guard let SSelf = self else { +// fatalError( "" ) +// } +// SSelf.predict(texture: texture, completion: completion) +// } +// } else if platform == .CPU { +// let input = preproccess(image: cgImage) +// predict(inputPointer: input, completion: completion) +// input.deinitialize(count: numel) +// input.deallocate() +// } +// } /* * 清理内存, 调用此函数后, 不能再使用, 需重新 load @@ -184,10 +166,10 @@ public class Runner: NSObject { */ @objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error" - scaleTexture(input: texture!, size: (net.dim.w, net.dim.h), complete: getTexture) + scaleTexture(input: texture!, complete: getTexture) } - func scaleTexture(input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) { + public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) { guard let inQueue = queue, let inDevice = device else { fatalError( " queue or devcie nil " ) @@ -197,7 +179,7 @@ public class Runner: NSObject { fatalError( " make buffer error" ) } - let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: size.width, inHeight: size.height, inChannel: 3)) + let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3)) do { try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer) diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift index e744901a5ce7a0ac2363336fababc751024abd61..f6320be4cebcc30bacef9e92a3f40782a13ad68b 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift @@ -15,209 +15,272 @@ import Foundation precedencegroup ChainNode { - associativity: left - higherThan: MultiplicationPrecedence + associativity: left + higherThan: MultiplicationPrecedence } infix operator --> : ChainNode class Node { - var inputs: [Node] = [] - var outputs: [Node] = [] - var type: String - var opDesc: OpDesc? - init(inOpDesc: OpDesc) { - type = inOpDesc.type - opDesc = inOpDesc + var inputs: [Node] = [] + var outputs: [Node] = [] + var type: String + var opDesc: OpDesc? + init(inOpDesc: OpDesc) { + type = inOpDesc.type + opDesc = inOpDesc + } + + init(inType: String) { + type = inType + } + + subscript(index: Int) -> [Node] { + var nodes: [Node] = [] + getNodesWithLocation(index: index, nowIndex: 0, nodes: &nodes) + return nodes + } + + func getNodesWithLocation(index: Int, nowIndex: Int, nodes: inout [Node]) { + if index == nowIndex { + nodes.append(self) } - init(inType: String) { - type = inType + for output in outputs { + output.getNodesWithLocation(index: index, nowIndex: nowIndex + 1, nodes: &nodes) + } + } + + static func -->(lNode: Node, rNode: Node) -> Node { + lNode.outputs.append(rNode) + rNode.inputs.append(lNode) + return rNode + } + + func depth(begin: UInt = 1) -> UInt { + var beginMax: UInt = 1 + for output in outputs { + let subDepth = output.depth(begin: begin + 1) + beginMax = max(begin, subDepth) + } + beginMax = max(begin, beginMax) + return beginMax + } + + func to(depth: UInt) -> Node { + let beginNode = Node.init(inType: type) + to(depth: depth - 1, withNode: beginNode) + return beginNode + } + + func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) { + let fusionNode = fusion.fusionNode() + let change = fusion.change() + let inOutputs = outputs + outputs.removeAll() + opDesc?.outputs.removeAll() + for i in 0..(lNode: Node, rNode: Node) -> Node { - lNode.outputs.append(rNode) - rNode.inputs.append(lNode) - return rNode + for attr in inOpdesc.attrs { + beginNode.opDesc?.attrs[attr.key] = attr.value + // print(beginNode.opDesc?.attrs) } - func depth(begin: UInt = 1) -> UInt { - var beginMax: UInt = 1 - for output in outputs { - let subDepth = output.depth(begin: begin + 1) - beginMax = max(begin, subDepth) + for paraInput in inOpdesc.paraInputs { + if let inChanges = change[type] { + for keyChange in inChanges { + if keyChange.from == paraInput.key { + beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value + } else { + beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value + } } - beginMax = max(begin, beginMax) - return beginMax + } else { + beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value + } } - func to(depth: UInt) -> Node { - let beginNode = Node.init(inType: type) - to(depth: depth - 1, withNode: beginNode) - return beginNode + if matchNode.outputs.count == 0 { + beginNode.outputs.append(contentsOf: outputs) + beginNode.opDesc?.outputs = inOpdesc.outputs + } + removedNodes.append(self) - func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) { - let fusionNode = fusion.fusionNode() - let change = fusion.change() - let inOutputs = outputs - outputs.removeAll() - opDesc?.outputs.removeAll() - for i in 0.. [String : Node]{ + var map: [String : Node] = [:] + relationship(map: &map) + return map + } + + private func relationship(map: inout [String : Node]) { + guard let inOpDesc = opDesc else { + return } + for output in inOpDesc.outputs { + for outputKey in output.value { + map[outputKey] = self + } + } + for output in outputs { + output.relationship(map: &map) + } + } + } extension Node: Equatable { - static func == (lhs: Node, rhs: Node) -> Bool { - if lhs.outputs.count != rhs.outputs.count { - return false - } - - if lhs.type != rhs.type { - return false - } - - for i in 0.. Bool { + if lhs.outputs.count != rhs.outputs.count { + return false } + if lhs.type != rhs.type { + return false + } + + for i in 0.. { - // register fusion - let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp

.self, - ConvAddOp

.self, - ConvBNReluOp

.self, - DwConvBNReluOp

.self] - - func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { - - guard originProgramDesc.blocks.count == 1 else { - fatalError(" not support yet") + // register fusion + let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp

.self, + ConvAddPreluOp

.self, + ConvAddOp

.self, + ConvBNReluOp

.self, + DwConvBNReluOp

.self + ] + + func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { + + guard originProgramDesc.blocks.count == 1 else { + fatalError(" not support yet") + } + + var mapForNodeChain: [String : Node] = [:] + var nodes: [Node] = [] + var typeMapNodes: [String : [(node: Node, output: [String : Node])]] = [:] + let block = originProgramDesc.blocks[0] + for opDesc in block.ops { + guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else { + fatalError() + } + + let node = Node.init(inOpDesc: opDesc) + for inputKey in opInputKeys { + if let inputs = opDesc.inputs[inputKey] { + for input in inputs { + if let inputNode = mapForNodeChain[input] { + _ = inputNode --> node + } + } } - - var mapForNodeChain: [String : Node] = [:] - var nodes: [Node] = [] - var typeMapNodes: [String : [Node]] = [:] - let block = originProgramDesc.blocks[0] - for opDesc in block.ops { - guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else { - fatalError() - } - - let node = Node.init(inOpDesc: opDesc) - for inputKey in opInputKeys { - if let inputs = opDesc.inputs[inputKey] { - for input in inputs { - if let inputNode = mapForNodeChain[input] { - _ = inputNode --> node - } - } - } - } - - for outputKey in outputKeys { - if let outputs = opDesc.outputs[outputKey] { - for output in outputs { - mapForNodeChain[output] = node - } + } + + for outputKey in outputKeys { + if let outputs = opDesc.outputs[outputKey] { + for output in outputs { + mapForNodeChain[output] = node + } + } + } + + nodes.append(node) + + if var inNodes = typeMapNodes[opDesc.type] { + inNodes.append((node, mapForNodeChain)) + typeMapNodes[opDesc.type] = inNodes + } else { + typeMapNodes[opDesc.type] = [(node, mapForNodeChain)] + } + } + + for fusion in fusionOps { + let fusionNode = fusion.fusionNode() + let depth = fusionNode.depth() + if let toMatchNodes = typeMapNodes[fusionNode.type] { + for node in toMatchNodes { + + let toNode = node.node.to(depth: depth) + if toNode == fusionNode { // match + var canFolder = true + let relationshipMap = toNode.relationship() + + for toCheck in fusion.needCheck() { + // let nodes = toCheck + let checkNodes = toNode[toCheck.0] + + for checkNode in checkNodes { + let inputToChecks = checkNode.opDesc?.inputs[toCheck.1] ?? [] + for inputToCheck in inputToChecks { + if node.output[inputToCheck] == nil { + if relationshipMap[inputToCheck] == nil { + canFolder = false } + } } - - nodes.append(node) - - if var inNodes = typeMapNodes[opDesc.type] { - inNodes.append(node) - typeMapNodes[opDesc.type] = inNodes - } else { - typeMapNodes[opDesc.type] = [node] - } + } } - for fusion in fusionOps { - let fusionNode = fusion.fusionNode() - let depth = fusionNode.depth() - if let toMatchNodes = typeMapNodes[fusionNode.type] { - for node in toMatchNodes { - let toNode = node.to(depth: depth) - if toNode == fusionNode { // match - var removeNodes: [Node] = [] - node.folderWith(fusion: fusion, removedNodes: &removeNodes) - for removeNode in removeNodes { - nodes.remove(element: removeNode) - } - } - } - } + if !canFolder { + continue } - - var ops: [OpDesc] = [] - for node in nodes { - ops.append(node.opDesc!) + + var removeNodes: [Node] = [] + node.node.folderWith(fusion: fusion, removedNodes: &removeNodes) + for removeNode in removeNodes { + nodes.remove(element: removeNode) + } + } } - - var newProgramDesc = ProgramDesc.init() - let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops) - newProgramDesc.blocks.append(newBlock) - return newProgramDesc + } } + + var ops: [OpDesc] = [] + for node in nodes { + ops.append(node.opDesc!) + } + + var newProgramDesc = ProgramDesc.init() + let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops) + newProgramDesc.blocks.append(newBlock) + return newProgramDesc + } } diff --git a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift index 85cf80bd092c9d14c9fcb349e6ac46a6df08b162..9890e106e21d3084569f27339d5d54c1ed01b462 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift @@ -14,39 +14,50 @@ import Foundation -let testTo = 113 + +let testTo = 81 + var isTest = false -let computePrecision: ComputePrecision = .Float32 +let computePrecision: ComputePrecision = .Float16 -public class ResultHolder { +public class GPUResultHolder { public let dim: [Int] - public let resultArr: [Float32] + public let capacity: Int + public var resultPointer: UnsafeMutablePointer? public var intermediateResults: [String : [Variant]]? public let elapsedTime: Double - public init(inDim: [Int], inResult: [Float32], inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) { + public init(inDim: [Int], inPointer: UnsafeMutablePointer?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) { dim = inDim - resultArr = inResult + capacity = inCapacity + + if let inInPointer = inPointer { + resultPointer = UnsafeMutablePointer.allocate(capacity: inCapacity) + resultPointer?.initialize(from: inInPointer, count: inCapacity) + } + elapsedTime = inElapsedTime intermediateResults = inIntermediateResults } + } -extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible { +extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible { public var debugDescription: String { - var str = "" - str += "Dim: \(dim) \n value:[ " - if resultArr.count < 20 { - for d in resultArr { - str += " \(d) " - } - } else { - for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) { - str += " \(resultArr[d]) " - } - } - str += " ]" - return str +// var str = "" +// str += "Dim: \(dim) \n value:[ " +// if resultArr.count < 20 { +// for d in resultArr { +// str += " \(d) " +// } +// } else { +// for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) { +// str += " \(resultArr[d]) " +// } +// } +// str += " ]" +// return str + fatalError() } public var description: String { @@ -67,7 +78,7 @@ public class Executor { queue = inQueue for block in inProgram.programDesc.blocks { //block.ops.count - for i in 0...shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) @@ -79,7 +90,7 @@ public class Executor { } } - public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (ResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws { + public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws { guard let buffer = queue.makeCommandBuffer() else { throw PaddleMobileError.predictError(message: "CommandBuffer is nil") } @@ -101,7 +112,7 @@ public class Executor { let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim)) program.scope.setInput(input: inputTexture) //(ops.count - except) - for i in 0.. { var outputTextures: [String : [Variant]]? if except > 0 { - outputTextures = ops[testTo-1].inputVariant() + ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer) + outputTextures = ops[ops.count - except].inputVariant() } buffer.addCompletedHandler { [weak self] (commandbuffer) in - - let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) - print(inputArr.strideArray()) - - print(dim) - writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr) +// let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) +// print(inputArr.strideArray()) +// +//// print(dim) +// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr) +// print(" write done ") - print("write to library done") +// print("write to library done") // return // print(inputArr) // @@ -131,11 +143,11 @@ public class Executor { // print(stridableInput) // // let _: Flo? = input.logDesc(header: "input: ", stridable: true) - for i in 0.. { // return guard let SSelf = self else { +// return fatalError() } let afterDate = Date.init() - var resultHolder: ResultHolder + var resultHolder: GPUResultHolder if except > 0 { - resultHolder = ResultHolder.init(inDim: [], inResult: [], inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures) + resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures) } else { let outputVar: Variant = SSelf.program.scope.output()! - let output: Texture

= outputVar as! Texture

+ let output: FetchHolder = outputVar as! FetchHolder +// let beforeToTensorDate = Date.init() + + resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate)) - resultHolder = ResultHolder.init(inDim: output.dim.dims, inResult: output.toTensor(), inElapsedTime: afterDate.timeIntervalSince(beforeDate)) +// let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate) +// print(timeToTensor) } completionHandle(resultHolder) diff --git a/metal/paddle-mobile/paddle-mobile/framework/Loader.swift b/metal/paddle-mobile/paddle-mobile/framework/Loader.swift index 68ce8c0691cd29a227758dc750315b45c305ed92..6b0098013e9fd371a218a3cba2154ff00bd543b9 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Loader.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Loader.swift @@ -168,7 +168,7 @@ public class Loader { } } else { if varDesc.name == fetchKey { - scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inElapsedTime: 0.0) +// scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0) } else if varDesc.name == feedKey { } }