未验证 提交 e6fed8db 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #983 from codeWorm2015/metal

Metal
......@@ -14,6 +14,8 @@
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; };
FC8CFEE62135452C0094D569 /* genet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEE42135452B0094D569 /* genet_params */; };
FC8CFEE72135452C0094D569 /* genet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEE52135452B0094D569 /* genet_model */; };
FC8CFEF8213551D10094D569 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC8CFEF6213551D00094D569 /* params */; };
......@@ -61,6 +63,8 @@
FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = "<group>"; };
FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; };
FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; };
FC8CFEE42135452B0094D569 /* genet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_params; sourceTree = "<group>"; };
FC8CFEE52135452B0094D569 /* genet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = genet_model; sourceTree = "<group>"; };
FC8CFEF6213551D00094D569 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
......@@ -132,6 +136,7 @@
FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
isa = PBXGroup;
children = (
FC803BCA214D27920094B8E5 /* VideoCapture */,
FC8CFED2213519540094D569 /* Net */,
FC0E2C2020EDC03B009C1FAC /* models */,
FC0E2C1D20EDC030009C1FAC /* images */,
......@@ -172,6 +177,15 @@
path = ../../models;
sourceTree = "<group>";
};
FC803BCA214D27920094B8E5 /* VideoCapture */ = {
isa = PBXGroup;
children = (
FC803BCB214D27920094B8E5 /* FPSCounter.swift */,
FC803BCC214D27920094B8E5 /* VideoCapture.swift */,
);
path = VideoCapture;
sourceTree = "<group>";
};
FC8CFED2213519540094D569 /* Net */ = {
isa = PBXGroup;
children = (
......@@ -345,9 +359,11 @@
buildActionMask = 2147483647;
files = (
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */,
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
FCF437E8214B6DDB00943429 /* Multi-Predict-ViewController.swift in Sources */,
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */,
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */,
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
......@@ -499,7 +515,7 @@
DEVELOPMENT_TEAM = A798K58VVL;
ENABLE_BITCODE = NO;
INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
IPHONEOS_DEPLOYMENT_TARGET = 10.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
......@@ -526,7 +542,7 @@
DEVELOPMENT_TEAM = A798K58VVL;
ENABLE_BITCODE = NO;
INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
IPHONEOS_DEPLOYMENT_TARGET = 10.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
......
......@@ -37,7 +37,7 @@
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="68E-SG-96s" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-1438" y="331"/>
<point key="canvasLocation" x="-559" y="686"/>
</scene>
<!--View Controller-->
<scene sceneID="tne-QT-ifu">
......@@ -48,7 +48,7 @@
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
<rect key="frame" x="0.0" y="20" width="375" height="247"/>
<rect key="frame" x="0.0" y="20" width="225" height="247"/>
</imageView>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
<rect key="frame" x="10" y="538" width="68" height="24"/>
......@@ -170,9 +170,14 @@
<fontDescription key="fontDescription" type="system" pointSize="15"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Cil-py-NiA">
<rect key="frame" x="225" y="20" width="150" height="247"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</view>
</subviews>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<constraints>
<constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="Cil-py-NiA" secondAttribute="bottom" constant="10" id="16p-IK-b5X"/>
<constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="VQn-bS-fWp" secondAttribute="trailing" constant="10" id="1Xg-0h-9SE"/>
<constraint firstItem="avL-VK-Kha" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="2t9-hS-VXa"/>
<constraint firstItem="R90-Yf-S6g" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="76b-Ny-1Og"/>
......@@ -187,11 +192,12 @@
<constraint firstItem="XpL-9M-UOp" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KWW-qT-Rzf"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="centerY" secondItem="avL-VK-Kha" secondAttribute="centerY" id="KZa-YZ-DEs"/>
<constraint firstItem="2EB-m2-a3L" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="Le3-TN-zOL"/>
<constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="MeS-HQ-voE"/>
<constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" constant="-150" id="MeS-HQ-voE"/>
<constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="ZZh-fw-LwK" secondAttribute="bottom" constant="10" id="NUL-Ta-VI8"/>
<constraint firstItem="m5L-O7-P31" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="15" id="RFA-z1-9aB"/>
<constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="a3K-ri-NVs" secondAttribute="width" id="Rp6-Bh-BN3"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="S0W-0G-75m"/>
<constraint firstItem="Cil-py-NiA" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="UNc-Et-9Yv"/>
<constraint firstItem="w7H-Sk-Rai" firstAttribute="leading" secondItem="wUL-9N-u1V" secondAttribute="trailing" id="VBM-8b-jP0"/>
<constraint firstItem="VQn-bS-fWp" firstAttribute="top" secondItem="m5L-O7-P31" secondAttribute="bottom" constant="8" id="VpS-4N-mOo"/>
<constraint firstItem="wUL-9N-u1V" firstAttribute="top" secondItem="2EB-m2-a3L" secondAttribute="bottom" constant="35" id="VpU-j2-gaE"/>
......@@ -203,10 +209,12 @@
<constraint firstItem="ZZh-fw-LwK" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="eIC-fZ-OEE"/>
<constraint firstItem="976-fk-Kx2" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="fFg-pB-eyU"/>
<constraint firstItem="6Tk-OE-BBY" firstAttribute="bottom" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="40" id="fG6-0p-I0P"/>
<constraint firstItem="Cil-py-NiA" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="gGK-DB-ibv"/>
<constraint firstItem="XpL-9M-UOp" firstAttribute="leading" secondItem="w7H-Sk-Rai" secondAttribute="trailing" id="guC-Db-cA9"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="leading" secondItem="avL-VK-Kha" secondAttribute="trailing" constant="10" id="jNW-iC-u7V"/>
<constraint firstItem="4ey-Xr-U4e" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="o1X-q5-P7j"/>
<constraint firstItem="6MG-gv-hD5" firstAttribute="top" secondItem="VQn-bS-fWp" secondAttribute="bottom" constant="8" id="tAE-ss-jlA"/>
<constraint firstItem="Cil-py-NiA" firstAttribute="leading" secondItem="ZZh-fw-LwK" secondAttribute="trailing" id="teJ-PP-h2R"/>
<constraint firstItem="4ey-Xr-U4e" firstAttribute="top" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="10" id="udc-wT-jqd"/>
<constraint firstItem="ZZh-fw-LwK" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" id="vXI-l2-CjL"/>
<constraint firstItem="VQn-bS-fWp" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="wtI-Dl-YPq"/>
......@@ -223,11 +231,12 @@
<outlet property="resultTextView" destination="VQn-bS-fWp" id="306-c7-3vM"/>
<outlet property="selectImageView" destination="ZZh-fw-LwK" id="afR-Bv-6AW"/>
<outlet property="threadPickerView" destination="DlO-dk-RMr" id="Kk4-QV-b5o"/>
<outlet property="videoView" destination="Cil-py-NiA" id="QY2-BP-SNS"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-719" y="-18"/>
<point key="canvasLocation" x="-1543.2" y="-147.07646176911544"/>
</scene>
</scenes>
<resources>
......
......@@ -14,11 +14,53 @@ class Multi_Predict_ViewController: UIViewController {
var runner2: Runner!
override func viewDidLoad() {
super.viewDidLoad()
// let net = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
// runner1 = Runner.init(inNet: <#T##Net#>, commandQueue: <#T##MTLCommandQueue?#>, inPlatform: <#T##Platform#>)
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
}
@IBAction func predictAct(_ sender: Any) {
let success = self.runner2.load()
// DispatchQueue.global().async {
let image1 = UIImage.init(named: "hand.jpg")
// let success = self.runner2.load()
// if success {
// for i in 0..<10000 {
// print(i)
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result1: ")
//// print(res)
// })
// }
// } else {
// print("load failed")
// }
// self.runner1.clear()
// }
// return
// DispatchQueue.global().async {
//// sleep(1)
// let image1 = UIImage.init(named: "banana.jpeg")
//// if success {
// for _ in 0..<10 {
// self.runner2.predict(cgImage: image1!.cgImage!, completion: { (success, res) in
// print("result2: ")
// print(res)
// })
// }
//// } else {
//// print("load failed")
//// }
//// self.runner2.clear()
// }
}
}
import Foundation
import QuartzCore
public class FPSCounter {
private(set) public var fps: Double = 0
var frames = 0
var startTime: CFTimeInterval = 0
public func start() {
frames = 0
startTime = CACurrentMediaTime()
}
public func frameCompleted() {
frames += 1
let now = CACurrentMediaTime()
let elapsed = now - startTime
if elapsed > 0.1 {
let current = Double(frames) / elapsed
let smoothing = 0.75
fps = smoothing*fps + (1 - smoothing)*current
if elapsed > 1 {
frames = 0
startTime = CACurrentMediaTime()
}
}
}
}
import UIKit
import Metal
import CoreVideo
import AVFoundation
@available(iOS 10.0, *)
@objc public protocol VideoCaptureDelegate: NSObjectProtocol {
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureSampleBuffer sampleBuffer: CMSampleBuffer, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhoto previewImage: UIImage?)
@objc optional func videoCapture(_ capture: VideoCapture, didCapturePhotoTexture texture: MTLTexture?)
}
/**
Simple interface to the iPhone's camera.
*/
@available(iOS 10.0, *)
public class VideoCapture: NSObject {
public var previewLayer: AVCaptureVideoPreviewLayer?
public weak var delegate: VideoCaptureDelegate?
public var fps = -1
private let device: MTLDevice?
private let videoOrientation: AVCaptureVideoOrientation
private var textureCache: CVMetalTextureCache?
private let captureSession = AVCaptureSession()
private let videoOutput = AVCaptureVideoDataOutput()
private let photoOutput = AVCapturePhotoOutput()
private let queue = DispatchQueue(label: "net.machinethink.camera-queue")
private var lastTimestamp = CMTime()
private let cameraPosition: AVCaptureDevice.Position
public init(device: MTLDevice? = nil, orientation: AVCaptureVideoOrientation = .portrait, position: AVCaptureDevice.Position = .back) {
self.device = device
self.videoOrientation = orientation
self.cameraPosition = position
super.init()
}
public func setUp(sessionPreset: AVCaptureSession.Preset = .medium,
completion: @escaping (Bool) -> Void) {
queue.async {
let success = self.setUpCamera(sessionPreset: sessionPreset)
DispatchQueue.main.async {
completion(success)
}
}
}
func fontCamera() -> AVCaptureDevice? {
let deveices = AVCaptureDevice.DiscoverySession.init(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .front).devices
return deveices.first
}
func setUpCamera(sessionPreset: AVCaptureSession.Preset) -> Bool {
if let inDevice = device{
guard CVMetalTextureCacheCreate(kCFAllocatorDefault, nil, inDevice, nil, &textureCache) == kCVReturnSuccess else {
print("Error: could not create a texture cache")
return false
}
}
captureSession.beginConfiguration()
captureSession.sessionPreset = sessionPreset
var oCaptureDevice: AVCaptureDevice?
switch cameraPosition {
case .back:
oCaptureDevice = AVCaptureDevice.default(for: AVMediaType.video)
break
case .front:
oCaptureDevice = fontCamera()
break
default:
break
}
guard let captureDevice = oCaptureDevice else {
print("Error: no video devices available")
return false
}
guard let videoInput = try? AVCaptureDeviceInput(device: captureDevice) else {
print("Error: could not create AVCaptureDeviceInput")
return false
}
if captureSession.canAddInput(videoInput) {
captureSession.addInput(videoInput)
}
let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
previewLayer.connection?.videoOrientation = self.videoOrientation
self.previewLayer = previewLayer
let settings: [String : Any] = [
kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)
]
videoOutput.videoSettings = settings
videoOutput.alwaysDiscardsLateVideoFrames = true
videoOutput.setSampleBufferDelegate(self, queue: queue)
if captureSession.canAddOutput(videoOutput) {
captureSession.addOutput(videoOutput)
}
// We want the buffers to be in portrait orientation otherwise they are
// rotated by 90 degrees. Need to set this _after_ addOutput()!
videoOutput.connection(with: AVMediaType.video)?.videoOrientation = self.videoOrientation
if captureSession.canAddOutput(photoOutput) {
captureSession.addOutput(photoOutput)
}
captureSession.commitConfiguration()
return true
}
public func start() {
if !captureSession.isRunning {
captureSession.startRunning()
}
}
public func stop() {
if captureSession.isRunning {
captureSession.stopRunning()
}
}
/* Captures a single frame of the camera input. */
public func capturePhoto() {
let settings = AVCapturePhotoSettings(format: [kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA)])
settings.previewPhotoFormat = [
kCVPixelBufferPixelFormatTypeKey as String: settings.__availablePreviewPhotoPixelFormatTypes[0],
kCVPixelBufferWidthKey as String: 480,
kCVPixelBufferHeightKey as String: 360,
]
photoOutput.capturePhoto(with: settings, delegate: self)
}
func convertToMTLTexture(sampleBuffer: CMSampleBuffer?) -> MTLTexture? {
if let textureCache = textureCache, let sampleBuffer = sampleBuffer, let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
let width = CVPixelBufferGetWidth(imageBuffer)
let height = CVPixelBufferGetHeight(imageBuffer)
var texture: CVMetalTexture?
CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache, imageBuffer, nil, .bgra8Unorm, width, height, 0, &texture)
if let texture = texture {
return CVMetalTextureGetTexture(texture)
}
}
return nil
}
func convertToUIImage(sampleBuffer: CMSampleBuffer?) -> UIImage? {
if let sampleBuffer = sampleBuffer,
let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
let width = CVPixelBufferGetWidth(imageBuffer)
let height = CVPixelBufferGetHeight(imageBuffer)
let rect = CGRect(x: 0, y: 0, width: CGFloat(width), height: CGFloat(height))
let ciImage = CIImage(cvPixelBuffer: imageBuffer)
let ciContext = CIContext(options: nil)
if let cgImage = ciContext.createCGImage(ciImage, from: rect) {
return UIImage(cgImage: cgImage)
}
}
return nil
}
}
extension VideoCapture: AVCaptureVideoDataOutputSampleBufferDelegate {
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// Because lowering the capture device's FPS looks ugly in the preview,
// we capture at full speed but only call the delegate at its desired
// framerate. If `fps` is -1, we run at the full framerate.
let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
let deltaTime = timestamp - lastTimestamp
if fps == -1 || deltaTime >= CMTimeMake(1, Int32(fps)) {
lastTimestamp = timestamp
self.delegate?.videoCapture?(self, didCaptureSampleBuffer: sampleBuffer, timestamp: timestamp)
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCaptureVideoTexture:timestamp:))) ?? false{
let texture = convertToMTLTexture(sampleBuffer: sampleBuffer)
delegate?.videoCapture?(self, didCaptureVideoTexture: texture, timestamp: timestamp)
}
}
}
public func captureOutput(_ output: AVCaptureOutput, didDrop sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
print("dropped frame")
}
}
extension VideoCapture: AVCapturePhotoCaptureDelegate {
public func photoOutput(_ captureOutput: AVCapturePhotoOutput,
didFinishProcessingPhoto photoSampleBuffer: CMSampleBuffer?,
previewPhoto previewPhotoSampleBuffer: CMSampleBuffer?,
resolvedSettings: AVCaptureResolvedPhotoSettings,
bracketSettings: AVCaptureBracketedStillImageSettings?,
error: Error?) {
var imageTexture: MTLTexture?
var previewImage: UIImage?
if error == nil {
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhotoTexture:))) ?? false{
imageTexture = convertToMTLTexture(sampleBuffer: photoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhotoTexture: imageTexture)
}
if self.delegate?.responds(to: #selector(VideoCaptureDelegate.videoCapture(_:didCapturePhoto:))) ?? false{
previewImage = convertToUIImage(sampleBuffer: previewPhotoSampleBuffer)
self.delegate?.videoCapture?(self, didCapturePhoto: previewImage)
}
}
}
}
......@@ -14,13 +14,15 @@
import UIKit
import MetalKit
import CoreMedia
import paddle_mobile
import MetalPerformanceShaders
let platform: Platform = .GPU
let threadSupport = [1]
let modelHelperMap: [SupportModel : Runner] = [.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
let modelHelperMap: [SupportModel : Runner] = [
.genet : Runner.init(inNet: Genet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
.mobilenet_ssd_ar : Runner.init(inNet: MobileNet_ssd_AR.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)]
//, .genet : Genet.init()
......@@ -28,13 +30,14 @@ let modelHelperMap: [SupportModel : Runner] = [.mobilenet_ssd : Runner.init(inNe
enum SupportModel: String{
// case mobilenet = "mobilenet"
case mobilenet_ssd = "mobilenetssd"
// case mobilenet_ssd = "mobilenetssd"
case genet = "genet"
case mobilenet_ssd_ar = "mobilenetssd_ar"
static func supportedModels() -> [SupportModel] {
//.mobilenet,
return [.mobilenet_ssd, .genet, .mobilenet_ssd_ar]
// .mobilenet,
// .mobilenet_ssd,
return [.genet, .mobilenet_ssd_ar]
}
}
......@@ -44,14 +47,15 @@ class ViewController: UIViewController {
@IBOutlet weak var elapsedTimeLabel: UILabel!
@IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView!
var videoCapture: VideoCapture!
var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture?
var runner: Runner {
get {
return modelHelperMap[modelType] ?! " has no this type "
}
......@@ -81,7 +85,7 @@ class ViewController: UIViewController {
}
@IBAction func predictAct(_ sender: Any) {
let max = 1
let max = 50
switch platform {
case .GPU:
guard let inTexture = toPredictTexture else {
......@@ -91,7 +95,7 @@ class ViewController: UIViewController {
let startDate = Date.init()
for i in 0..<max {
runner.predict(texture: inTexture) { [weak self] (success, res) in
runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else {
fatalError()
}
......@@ -99,11 +103,18 @@ class ViewController: UIViewController {
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
// print(resultHolder!.result![0])
// sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
DispatchQueue.main.async {
resultHolder?.releasePointer()
}
// print("释放")
}
// print("sleep before ")
// usleep(33000)
......@@ -129,7 +140,7 @@ class ViewController: UIViewController {
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
// sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
......@@ -141,6 +152,13 @@ class ViewController: UIViewController {
override func viewDidLoad() {
super.viewDidLoad()
// if runner.load() {
// print(" load success ! ")
// } else {
// print(" load error ! ")
// }
//
modelPickerView.delegate = self
modelPickerView.dataSource = self
threadPickerView.delegate = self
......@@ -158,6 +176,20 @@ class ViewController: UIViewController {
} else {
fatalError( " unsupport " )
}
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
}
}
......@@ -218,4 +250,32 @@ extension ViewController: UIImagePickerControllerDelegate, UINavigationControll
}
}
var bool1 = false
extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0])
resultHolder?.releasePointer()
})
}
}
func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
// if !bool1 {
// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
self.predictTexture(texture: texture!)
// }
// bool1 = true
// }
}
}
......@@ -75,6 +75,12 @@
FC4FD97E2140F2C30073E130 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97D2140F2C30073E130 /* libstdc++.tbd */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; };
FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */; };
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */; };
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */; };
FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; };
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; };
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */; };
FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
......@@ -116,6 +122,8 @@
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B6214F869000B520C3 /* Net.swift */; };
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
......@@ -196,6 +204,12 @@
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluOp.swift; sourceTree = "<group>"; };
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddPreluKernel.swift; sourceTree = "<group>"; };
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPreluKernel.metal; sourceTree = "<group>"; };
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddPrelu.inc.metal; sourceTree = "<group>"; };
FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = "<group>"; };
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = "<group>"; };
FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; };
FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
......@@ -237,6 +251,8 @@
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
FCE9D7B6214F869000B520C3 /* Net.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Net.swift; sourceTree = "<group>"; };
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
......@@ -297,6 +313,7 @@
FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
isa = PBXGroup;
children = (
FCE9D7B6214F869000B520C3 /* Net.swift */,
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */,
FC33B0EF2147659000714A93 /* MobileNet.swift */,
FC292C862142624800CF622F /* Genet.swift */,
......@@ -372,6 +389,7 @@
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */,
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
FCEB684B212F093800D2448E /* PreluOp.swift */,
FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */,
);
path = Operators;
sourceTree = "<group>";
......@@ -420,6 +438,7 @@
4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */,
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
);
path = Kernels;
sourceTree = "<group>";
......@@ -482,6 +501,11 @@
FC0226552138F33800F395E2 /* TransposeKernel.metal */,
4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */,
FC0226572138F38D00F395E2 /* PoolKernel.metal */,
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */,
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */,
FC803BC6214CBA820094B8E5 /* Macro.metal */,
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
);
path = metal;
sourceTree = "<group>";
......@@ -594,6 +618,7 @@
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */,
FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */,
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */,
4AF9287921341661005B6C3A /* Softmax.metal in Sources */,
4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */,
......@@ -603,6 +628,7 @@
FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */,
4AA1EAAE214F5FD900D0F791 /* TransposeKernel.inc.metal in Sources */,
4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */,
FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */,
FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */,
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
......@@ -622,6 +648,7 @@
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */,
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */,
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */,
......@@ -653,10 +680,12 @@
FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */,
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
FC292C872142624800CF622F /* Genet.swift in Sources */,
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */,
......@@ -669,7 +698,9 @@
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
FCA3A1652132A5EB00084FE5 /* Common.metal in Sources */,
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */,
FC803BC1214CB77A0094B8E5 /* ConvAddPreluKernel.swift in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
......@@ -689,6 +720,7 @@
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */,
......
......@@ -16,6 +16,12 @@
#import <Foundation/Foundation.h>
@interface CPUResult: NSObject
@property (assign, nonatomic) float *output;
@property (assign, nonatomic) int outputSize;
@end
@interface NMSCompute: NSObject
@property (assign, nonatomic) float scoreThredshold;
......@@ -34,6 +40,6 @@
@property (strong, nonatomic) NSArray<NSNumber *> *bboxDim;
-(NSArray<NSNumber *> *)computeWithScore:(float *)score andBBoxs:(float *)bbox;
-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox;
@end
......@@ -21,6 +21,8 @@
#import <algorithm>
struct NMSParam {
float *score_data;
......@@ -282,9 +284,12 @@ void MultiClassNMSCompute(NMSParam *param) {
param->output_size = output_size;
}
@implementation CPUResult
@end
@implementation NMSCompute
-(NSArray<NSNumber *> *)computeWithScore:(float *)score andBBoxs:(float *)bbox {
-(CPUResult *)computeWithScore:(float *)score andBBoxs:(float *)bbox {
NMSParam param;
param.box_data = bbox;
param.score_data = score;
......@@ -306,12 +311,10 @@ void MultiClassNMSCompute(NMSParam *param) {
}
param.box_dim = box_dim;
MultiClassNMSCompute(&param);
NSMutableArray<NSNumber *> *output = [NSMutableArray arrayWithCapacity:param.output_size];
for (int i = 0; i < param.output_size; ++i) {
[output addObject:[NSNumber numberWithFloat:param.output[i]]];
}
delete param.output;
return output;
CPUResult *cr = [[CPUResult alloc] init];
cr.output = param.output;
cr.outputSize = param.output_size;
return cr;
}
@end
......
......@@ -243,7 +243,7 @@ extension Tensor: Variant {
extension Texture: Variant {
}
extension ResultHolder: Variant {
extension GPUResultHolder: Variant {
}
extension InputTexture: Variant {
......@@ -252,3 +252,43 @@ extension InputTexture: Variant {
extension MTLTexture where Self: Variant {
}
class FetchHolder: Variant {
var resultBuffer: MTLBuffer?
var dim: [Int]
var capacity: Int
init(inCapacity: Int, inDim: [Int]) {
capacity = inCapacity
dim = inDim
}
func initBuffer(device: MTLDevice) {
resultBuffer = device.makeBuffer(length: capacity * 4, options: [])
}
var result: UnsafeMutablePointer<Float32> {
guard let inResultBuffer = resultBuffer else {
fatalError()
}
return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity)
}
}
extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
fatalError()
// return "\(result)"
}
var debugDescription: String {
fatalError()
// return "\(result)"
}
}
......@@ -34,8 +34,9 @@ public class Genet: Net {
}
}
override public func resultStr(res: [Float]) -> String {
return " \(Array<Float>(res.suffix(10))) ... "
override public func resultStr(res: ResultHolder) -> String {
fatalError()
// return " \(Array<Float>(res.suffix(10))) ... "
}
}
......@@ -42,9 +42,12 @@ class MobileNet: Net{
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: [Float]) -> String {
override public func resultStr(res: ResultHolder) -> String {
guard let resPointer = res.result else {
fatalError()
}
var s: [String] = []
res.top(r: 5).enumerated().forEach{
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
......
......@@ -34,51 +34,52 @@ public class MobileNet_ssd_hand: Net{
}
}
override public func resultStr(res: [Float]) -> String {
override public func resultStr(res: ResultHolder) -> String {
return " \(res)"
}
override func fetchResult(paddleMobileRes: ResultHolder) -> [Float32] {
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let interRes = paddleMobileRes.intermediateResults else {
fatalError(" need have inter result ")
}
guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
fatalError(" need score ")
}
guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
fatalError()
}
var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
// print("score: ")
// print(scoreFormatArr.strideArray())
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
var bboxArr = bbox.metalTexture.float32Array()
// print("bbox: ")
// print(bboxArr.strideArray())
let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.01
nmsCompute.nmsTopK = 400
nmsCompute.keepTopK = 200
nmsCompute.nmsEta = 1.0
nmsCompute.nmsThreshold = 0.45
nmsCompute.background_label = 0;
nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
fatalError( " result error " )
}
let output: [Float32] = result.map { $0.floatValue }
return output
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
// fatalError()
// }
//
// var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
//// print("score: ")
//// print(scoreFormatArr.strideArray())
////
// var bboxArr = bbox.metalTexture.float32Array()
//// print("bbox: ")
//// print(bboxArr.strideArray())
//
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.01
// nmsCompute.nmsTopK = 400
// nmsCompute.keepTopK = 200
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.45
// nmsCompute.background_label = 0;
//
// nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
//
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
// guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
// fatalError( " result error " )
// }
//
// let output: [Float32] = result.map { $0.floatValue }
//
//
// return output
fatalError()
}
......
......@@ -34,46 +34,52 @@ public class MobileNet_ssd_AR: Net{
}
}
override public func resultStr(res: [Float]) -> String {
override public func resultStr(res: ResultHolder) -> String {
return " \(res)"
}
override func fetchResult(paddleMobileRes: ResultHolder) -> [Float32] {
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let interRes = paddleMobileRes.intermediateResults else {
fatalError(" need have inter result ")
}
guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? Texture<Float32> else {
guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
fatalError(" need score ")
}
guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? Texture<Float32> else {
guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
fatalError()
}
var scoreFormatArr: [Float32] = score.metalTexture.realNHWC(dim: (n: score.padToFourDim[0], h: score.padToFourDim[1], w: score.padToFourDim[2], c: score.padToFourDim[3]))
// print("score: ")
// print(scoreFormatArr.strideArray())
//
var bboxArr = bbox.metalTexture.float32Array()
// print("bbox: ")
// print(bboxArr.strideArray())
let startDate = Date.init()
// print("scoreFormatArr: ")
//print((0..<score.capacity).map{ score.result[$0] }.strideArray())
//
// print("bbox arr: ")
//
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.01
nmsCompute.nmsTopK = 400
nmsCompute.keepTopK = 200
nmsCompute.scoreThredshold = 0.25
nmsCompute.nmsTopK = 100
nmsCompute.keepTopK = 100
nmsCompute.nmsEta = 1.0
nmsCompute.nmsThreshold = 0.45
nmsCompute.nmsThreshold = 0.449999988
nmsCompute.background_label = 0;
nmsCompute.scoreDim = [NSNumber.init(value: score.tensorDim[0]), NSNumber.init(value: score.tensorDim[1]), NSNumber.init(value: score.tensorDim[2])]
nmsCompute.bboxDim = [NSNumber.init(value: bbox.tensorDim[0]), NSNumber.init(value: bbox.tensorDim[1]), NSNumber.init(value: bbox.tensorDim[2])]
guard let result = nmsCompute.compute(withScore: &scoreFormatArr, andBBoxs: &bboxArr) else {
nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
fatalError( " result error " )
}
let output: [Float32] = result.map { $0.floatValue }
return output
let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) {
//
// print("i \(i) : \(result.output[i])")
// }
// print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0])
return resultHolder
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
public class ResultHolder: NSObject {
public let result: UnsafeMutablePointer<Float32>?
public let capacity: Int
init(inResult: UnsafeMutablePointer<Float32>?, inCapacity: Int) {
result = inResult
capacity = inCapacity
}
public func releasePointer() {
result?.deinitialize(count: capacity)
result?.deallocate()
}
}
public class Net: NSObject {
var except: Int = 0
var means: [Float] = []
var scale: Float = 0.0
var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0)
var preprocessKernel: CusomKernel? = nil
var paramPointer: UnsafeMutableRawPointer? = nil
var paramSize: Int = 0
var modelPointer: UnsafeMutableRawPointer? = nil
var modelSize: Int = 0
var modelPath: String = ""
var paramPath: String = ""
var modelDir: String = ""
public func resultStr(res: ResultHolder) -> String {
fatalError()
}
func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity)
}
@objc public init(device: MTLDevice) {
super.init()
}
}
......@@ -64,7 +64,8 @@ class OpCreator<P: PrecisionType> {
gBilinearInterpType : BilinearInterpOp<P>.creat,
gSplit : SplitOp<P>.creat,
gShape : ShapeOp<P>.creat,
gFlatten : FlattenOp<P>.creat]
gFlatten : FlattenOp<P>.creat,
gConvAddPreluType : ConvAddPreluOp<P>.creat]
private init(){}
}
......@@ -19,6 +19,12 @@ protocol Fusion {
static func fusionNode() -> Node
static func change() -> [String : [(from: String, to: String)]]
static func fusionType() -> String
static func needCheck() -> [(Int, String)]
}
extension Fusion {
static func needCheck() -> [(Int, String)] {
return []
}
}
protocol Runable {
......@@ -26,6 +32,7 @@ protocol Runable {
func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
func delogOutput()
func inputVariant() -> [String : [Variant]]
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer)
}
extension Runable where Self: OperatorProtocol{
......@@ -38,11 +45,16 @@ extension Runable where Self: OperatorProtocol{
}
func inputVariant() -> [String : [Variant]] {
return [:]
// fatalError(" op \(type) need implement inputVariant")
// return [:]
fatalError(" op \(type) need implement inputVariant")
}
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
fatalError(" need implement ")
}
func delogOutput() {
print(type + ": has no implementation" )
}
}
......@@ -144,6 +156,7 @@ let gBilinearInterpType = "bilinear_interp"
let gSplit = "split"
let gShape = "shape"
let gFlatten = "flatten"
let gConvAddPreluType = "conv_add_prelu"
let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Output"]),
gBatchNormType : (inputs: ["X"], outputs: ["Y"]),
......@@ -169,5 +182,7 @@ let opInfos = [gConvType : (inputs: ["Input"], outputs: ["Out
gBilinearInterpType : (inputs: ["X"], outputs: ["Out"]),
gSplit : (inputs: ["X"], outputs: ["Out"]),
gShape : (inputs: ["Input"], outputs: ["Out"]),
gFlatten : (inputs: ["X"], outputs: ["Out"])
gFlatten : (inputs: ["X"], outputs: ["Out"]),
gConvAddPreluType : (inputs: ["Input"], outputs: ["Out"])
]
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddPreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddPreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
stride = try ConvAddPreluParam.getAttr(key: "strides", attrs: opDesc.attrs)
paddings = try ConvAddPreluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
dilations = try ConvAddPreluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
groups = try ConvAddPreluParam.getAttr(key: "groups", attrs: opDesc.attrs)
alpha = try ConvAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ConvAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
y = try ConvAddPreluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
} catch let error {
throw error
}
}
let input: Texture<P>
let y: Tensor<ParamPrecisionType>
let filter: Tensor<ParamPrecisionType>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
let groups: Int
}
class ConvAddPreluOp<P: PrecisionType>: Operator<ConvAddPreluKernel<P>, ConvAddPreluParam<P>>, Runable, Creator, InferShaperable, Fusion{
typealias OpType = ConvAddPreluOp<P>
static func fusionNode() -> Node {
let beginNode = Node.init(inType: gConvType)
_ = beginNode
--> Node.init(inType: gElementwiseAddType) --> Node.init(inType: gPreluType)
return beginNode
}
static func change() -> [String : [(from: String, to: String)]] {
return [:]
}
static func fusionType() -> String {
return gConvAddPreluType
}
func inferShape() {
let inDims = para.input.dim
let filterDim = para.filter.dim
let strides = para.stride
let paddings = para.paddings
let dilations = para.dilations
var outDim = [inDims[0]]
for i in 0..<strides.count {
let dilation: Int = Int(dilations[i])
let filterSize: Int = filterDim[i + 1]
let inputSize: Int = inDims[i + 1]
let padding: Int = Int(paddings[i])
let stride: Int = Int(strides[i])
let dKernel = dilation * (filterSize - 1) + 1
let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
outDim.append(outputSize)
}
outDim.append(filterDim[0])
para.output.dim = Dim.init(inDim: outDim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
......@@ -15,14 +15,15 @@
import Foundation
class FetchParam<P: PrecisionType>: OpParam{
var output: Texture<P>
var output: FetchHolder
let input: Texture<P>
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = input
output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims)
scope.setOutput(output: output)
} catch let error {
throw error
}
......@@ -34,14 +35,40 @@ class FetchParam<P: PrecisionType>: OpParam{
class FetchKernel<P: PrecisionType>: Kernel, Computable {
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: FetchParam<P>) {
super.init(device: device, inFunctionName: "place_holder")
param.output.initBuffer(device: device)
if computePrecision == .Float16 {
if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch_half")
} else {
// fatalError(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder_half")
print(" not support ")
}
} else if computePrecision == .Float32 {
if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch")
} else {
print(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder")
// fatalError(" not support ")
}
} else {
fatalError(" not support ")
}
}
}
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable{
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FetchOp<P>
......@@ -50,7 +77,11 @@ class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runab
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
scope.setOutput(output: para.output)
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ConvAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
if computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
}
} else {
fatalError(" unsupport yet ")
}
} else {
fatalError()
}
let offsetY = (Int(param.dilations[1]) * (param.filter.height - 1) + 1)/2 - Int(param.paddings[1])
let offsetX = (Int(param.dilations[0]) * (param.filter.width - 1) + 1)/2 - Int(param.paddings[0])
// print(" function: \(functionName)")
// print("offset x: \(offsetX)")
// print("offset y: \(offsetY)")
let offsetZ = 0.0
let inMetalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), dilationX: UInt16(param.dilations[0]), dilationY: UInt16(param.dilations[1]))
// print("metal param: ")
// print(inMetalParam)
metalParam = inMetalParam
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddPreluParam<P>) throws {
// guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
// throw PaddleMobileError.predictError(message: " encode is nil")
// }
//
// encoder.setTexture(param.input.metalTexture, index: 0)
// encoder.setTexture(param.output.metalTexture, index: 1)
// encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
// encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
// encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
// encoder.setBuffer(param.alpha.buffer, offset: 0, index: 3)
// encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
// encoder.endEncoding()
}
}
......@@ -15,11 +15,41 @@
import Foundation
class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{
let pipline1: MTLComputePipelineState
required init(device: MTLDevice, param: MulticlassNMSParam<P>) {
super.init(device: device, inFunctionName: "place_holder")
param.middleOutput.initBuffer(device: device)
param.bboxOutput.initBuffer(device: device)
if computePrecision == .Float32 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true)
super.init(device: device, inFunctionName: "nms_fetch_result")
} else if computePrecision == .Float16 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true)
super.init(device: device, inFunctionName: "nms_fetch_result_half")
} else {
fatalError( " unsupport precision " )
}
}
func compute(commandBuffer: MTLCommandBuffer, param: MulticlassNMSParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.scores.metalTexture, index: 0)
encoder.setBuffer(param.middleOutput.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.scores.metalTexture)
encoder.endEncoding()
guard let encoderBox = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoderBox.setTexture(param.bboxes.metalTexture, index: 0)
encoderBox.setBuffer(param.bboxOutput.resultBuffer!, offset: 0, index: 0)
encoderBox.dispatch(computePipline: pipline1, outTexture: param.bboxes.metalTexture)
encoderBox.endEncoding()
}
}
......@@ -71,7 +71,6 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
}
func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
print("reshape compute")
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
......
......@@ -19,7 +19,7 @@ struct ShapeMetalParam {
class ShapeKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: ShapeParam<P>) throws {
print("shape compute")
// print("shape compute")
// guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
// throw PaddleMobileError.predictError(message: " encode is nil")
// }
......
......@@ -74,4 +74,6 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable {
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
......@@ -17,14 +17,15 @@
using namespace metal;
kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
kernel void conv_add_batch_norm_relu_1x1_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
......@@ -41,7 +42,7 @@ kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::samp
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = half4(0.0);
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
......@@ -58,19 +59,19 @@ kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::samp
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
outTexture.write(output, gid.xy, gid.z);
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
kernel void conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
......@@ -86,7 +87,7 @@ kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::samp
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = half4(0.0);
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
......@@ -113,19 +114,19 @@ kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::samp
output.w += dot(input[j], weight_w);
}
}
output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
outTexture.write(output, gid.xy, gid.z);
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
......@@ -138,7 +139,7 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, ac
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = half4(0.0);
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
......@@ -156,11 +157,12 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, ac
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
outTexture.write(output, gid.xy, gid.z);
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/*---------------------------------------------*/
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include "Macro.metal"
#pragma mark - convAdd
kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
VECTOR(P, 4) input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i);
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(biase[gid.z]);;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(biase[gid.z]);
ushort dilation_x = param.dilationX;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device P *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(biase[gid.z]);
VECTOR(P, 4) inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void fetch(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float * output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void nms_fetch_result(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_result_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input;
}
kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input);
}
......@@ -21,10 +21,16 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam {
scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope)
middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims)
bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims)
} catch let error {
throw error
}
}
var bboxOutput: FetchHolder
var middleOutput: FetchHolder
let scores: Texture<P>
let bboxes: Texture<P>
var output: Texture<P>
......@@ -33,7 +39,15 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam {
class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
func inputVariant() -> [String : [Variant]] {
return ["Scores" : [para.scores], "BBoxes" : [para.bboxes]]
return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]]
}
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let _ {
fatalError()
}
}
func inferShape() {
......@@ -42,11 +56,12 @@ class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, Multic
typealias OpType = MulticlassNMSOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print(" nms - output: ")
print(para.bboxes.metalTexture.float32Array().strideArray())
}
}
......
......@@ -16,31 +16,13 @@ import Foundation
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape) {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
}
}
public class Net: NSObject {
var except: Int = 0
var means: [Float] = []
var scale: Float = 0.0
var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0)
var preprocessKernel: CusomKernel? = nil
var paramPointer: UnsafeMutableRawPointer? = nil
var paramSize: Int = 0
var modelPointer: UnsafeMutableRawPointer? = nil
var modelSize: Int = 0
var modelPath: String = ""
var paramPath: String = ""
var modelDir: String = ""
public func resultStr(res: [Float]) -> String {
fatalError()
}
func fetchResult(paddleMobileRes: ResultHolder) -> [Float32] {
return paddleMobileRes.resultArr
}
@objc public init(device: MTLDevice) {
super.init()
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
} else {
fatalError(" unsupport ")
}
}
}
......@@ -118,18 +100,18 @@ public class Runner: NSObject {
* texture: 需要预测的 texture 需要做过预处理
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
@objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
@objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) {
do {
try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in
guard let SSelf = self else {
fatalError( " self nil " )
}
let resultArray = SSelf.net.fetchResult(paddleMobileRes: res)
completion(true, resultArray)
let result = SSelf.net.fetchResult(paddleMobileRes: res)
completion(true, result)
}, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
} catch let error {
print(error)
completion(false, [])
completion(false, nil)
return
}
}
......@@ -139,21 +121,21 @@ public class Runner: NSObject {
* cgImage: 需要预测的图片
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
@objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
if platform == .GPU {
getTexture(image: cgImage) { [weak self] (texture) in
guard let SSelf = self else {
fatalError( "" )
}
SSelf.predict(texture: texture, completion: completion)
}
} else if platform == .CPU {
let input = preproccess(image: cgImage)
predict(inputPointer: input, completion: completion)
input.deinitialize(count: numel)
input.deallocate()
}
}
// @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
// if platform == .GPU {
// getTexture(image: cgImage) { [weak self] (texture) in
// guard let SSelf = self else {
// fatalError( "" )
// }
// SSelf.predict(texture: texture, completion: completion)
// }
// } else if platform == .CPU {
// let input = preproccess(image: cgImage)
// predict(inputPointer: input, completion: completion)
// input.deinitialize(count: numel)
// input.deallocate()
// }
// }
/*
* 清理内存, 调用此函数后, 不能再使用, 需重新 load
......@@ -184,10 +166,10 @@ public class Runner: NSObject {
*/
@objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
scaleTexture(input: texture!, size: (net.dim.w, net.dim.h), complete: getTexture)
scaleTexture(input: texture!, complete: getTexture)
}
func scaleTexture(input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else {
fatalError( " queue or devcie nil " )
......@@ -197,7 +179,7 @@ public class Runner: NSObject {
fatalError( " make buffer error" )
}
let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: size.width, inHeight: size.height, inChannel: 3))
let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3))
do {
try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
......
......@@ -15,209 +15,272 @@
import Foundation
precedencegroup ChainNode {
associativity: left
higherThan: MultiplicationPrecedence
associativity: left
higherThan: MultiplicationPrecedence
}
infix operator --> : ChainNode
class Node {
var inputs: [Node] = []
var outputs: [Node] = []
var type: String
var opDesc: OpDesc?
init(inOpDesc: OpDesc) {
type = inOpDesc.type
opDesc = inOpDesc
var inputs: [Node] = []
var outputs: [Node] = []
var type: String
var opDesc: OpDesc?
init(inOpDesc: OpDesc) {
type = inOpDesc.type
opDesc = inOpDesc
}
init(inType: String) {
type = inType
}
subscript(index: Int) -> [Node] {
var nodes: [Node] = []
getNodesWithLocation(index: index, nowIndex: 0, nodes: &nodes)
return nodes
}
func getNodesWithLocation(index: Int, nowIndex: Int, nodes: inout [Node]) {
if index == nowIndex {
nodes.append(self)
}
init(inType: String) {
type = inType
for output in outputs {
output.getNodesWithLocation(index: index, nowIndex: nowIndex + 1, nodes: &nodes)
}
}
static func -->(lNode: Node, rNode: Node) -> Node {
lNode.outputs.append(rNode)
rNode.inputs.append(lNode)
return rNode
}
func depth(begin: UInt = 1) -> UInt {
var beginMax: UInt = 1
for output in outputs {
let subDepth = output.depth(begin: begin + 1)
beginMax = max(begin, subDepth)
}
beginMax = max(begin, beginMax)
return beginMax
}
func to(depth: UInt) -> Node {
let beginNode = Node.init(inType: type)
to(depth: depth - 1, withNode: beginNode)
return beginNode
}
func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
let fusionNode = fusion.fusionNode()
let change = fusion.change()
let inOutputs = outputs
outputs.removeAll()
opDesc?.outputs.removeAll()
for i in 0..<inOutputs.count {
inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
}
opDesc?.type = fusion.fusionType()
type = fusion.fusionType()
}
private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
guard let inOpdesc = opDesc else {
fatalError()
}
static func -->(lNode: Node, rNode: Node) -> Node {
lNode.outputs.append(rNode)
rNode.inputs.append(lNode)
return rNode
for attr in inOpdesc.attrs {
beginNode.opDesc?.attrs[attr.key] = attr.value
// print(beginNode.opDesc?.attrs)
}
func depth(begin: UInt = 1) -> UInt {
var beginMax: UInt = 1
for output in outputs {
let subDepth = output.depth(begin: begin + 1)
beginMax = max(begin, subDepth)
for paraInput in inOpdesc.paraInputs {
if let inChanges = change[type] {
for keyChange in inChanges {
if keyChange.from == paraInput.key {
beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
}
beginMax = max(begin, beginMax)
return beginMax
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
}
func to(depth: UInt) -> Node {
let beginNode = Node.init(inType: type)
to(depth: depth - 1, withNode: beginNode)
return beginNode
if matchNode.outputs.count == 0 {
beginNode.outputs.append(contentsOf: outputs)
beginNode.opDesc?.outputs = inOpdesc.outputs
}
removedNodes.append(self)
func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
let fusionNode = fusion.fusionNode()
let change = fusion.change()
let inOutputs = outputs
outputs.removeAll()
opDesc?.outputs.removeAll()
for i in 0..<inOutputs.count {
inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
}
opDesc?.type = fusion.fusionType()
type = fusion.fusionType()
for i in 0..<matchNode.outputs.count {
outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
}
private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
guard let inOpdesc = opDesc else {
fatalError()
}
for attr in inOpdesc.attrs {
beginNode.opDesc?.attrs[attr.key] = attr.value
// print(beginNode.opDesc?.attrs)
}
for paraInput in inOpdesc.paraInputs {
if let inChanges = change[type] {
for keyChange in inChanges {
if keyChange.from == paraInput.key {
beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
}
} else {
beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
}
}
if matchNode.outputs.count == 0 {
beginNode.outputs.append(contentsOf: outputs)
beginNode.opDesc?.outputs = inOpdesc.outputs
}
removedNodes.append(self)
for i in 0..<matchNode.outputs.count {
outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
}
}
private func to(depth: UInt, withNode: Node) {
if depth < 1 {
return
}
private func to(depth: UInt, withNode: Node) {
if depth < 1 {
return
}
for output in outputs {
let node = Node.init(inType: output.type)
withNode.outputs.append(node)
output.to(depth: depth - 1, withNode: node)
}
for output in outputs {
let node = Node.init(inType: output.type)
withNode.outputs.append(node)
output.to(depth: depth - 1, withNode: node)
}
}
func relationship() -> [String : Node]{
var map: [String : Node] = [:]
relationship(map: &map)
return map
}
private func relationship(map: inout [String : Node]) {
guard let inOpDesc = opDesc else {
return
}
for output in inOpDesc.outputs {
for outputKey in output.value {
map[outputKey] = self
}
}
for output in outputs {
output.relationship(map: &map)
}
}
}
extension Node: Equatable {
static func == (lhs: Node, rhs: Node) -> Bool {
if lhs.outputs.count != rhs.outputs.count {
return false
}
if lhs.type != rhs.type {
return false
}
for i in 0..<lhs.outputs.count {
if lhs.outputs[i] != rhs.outputs[i] {
return false
}
}
return true
static func == (lhs: Node, rhs: Node) -> Bool {
if lhs.outputs.count != rhs.outputs.count {
return false
}
if lhs.type != rhs.type {
return false
}
for i in 0..<lhs.outputs.count {
if lhs.outputs[i] != rhs.outputs[i] {
return false
}
}
return true
}
}
class ProgramOptimize<P: PrecisionType> {
// register fusion
let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self,
ConvAddOp<P>.self,
ConvBNReluOp<P>.self,
DwConvBNReluOp<P>.self]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
guard originProgramDesc.blocks.count == 1 else {
fatalError(" not support yet")
// register fusion
let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self,
ConvAddPreluOp<P>.self,
ConvAddOp<P>.self,
ConvBNReluOp<P>.self,
DwConvBNReluOp<P>.self
]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
guard originProgramDesc.blocks.count == 1 else {
fatalError(" not support yet")
}
var mapForNodeChain: [String : Node] = [:]
var nodes: [Node] = []
var typeMapNodes: [String : [(node: Node, output: [String : Node])]] = [:]
let block = originProgramDesc.blocks[0]
for opDesc in block.ops {
guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
fatalError()
}
let node = Node.init(inOpDesc: opDesc)
for inputKey in opInputKeys {
if let inputs = opDesc.inputs[inputKey] {
for input in inputs {
if let inputNode = mapForNodeChain[input] {
_ = inputNode --> node
}
}
}
var mapForNodeChain: [String : Node] = [:]
var nodes: [Node] = []
var typeMapNodes: [String : [Node]] = [:]
let block = originProgramDesc.blocks[0]
for opDesc in block.ops {
guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
fatalError()
}
let node = Node.init(inOpDesc: opDesc)
for inputKey in opInputKeys {
if let inputs = opDesc.inputs[inputKey] {
for input in inputs {
if let inputNode = mapForNodeChain[input] {
_ = inputNode --> node
}
}
}
}
for outputKey in outputKeys {
if let outputs = opDesc.outputs[outputKey] {
for output in outputs {
mapForNodeChain[output] = node
}
}
for outputKey in outputKeys {
if let outputs = opDesc.outputs[outputKey] {
for output in outputs {
mapForNodeChain[output] = node
}
}
}
nodes.append(node)
if var inNodes = typeMapNodes[opDesc.type] {
inNodes.append((node, mapForNodeChain))
typeMapNodes[opDesc.type] = inNodes
} else {
typeMapNodes[opDesc.type] = [(node, mapForNodeChain)]
}
}
for fusion in fusionOps {
let fusionNode = fusion.fusionNode()
let depth = fusionNode.depth()
if let toMatchNodes = typeMapNodes[fusionNode.type] {
for node in toMatchNodes {
let toNode = node.node.to(depth: depth)
if toNode == fusionNode { // match
var canFolder = true
let relationshipMap = toNode.relationship()
for toCheck in fusion.needCheck() {
// let nodes = toCheck
let checkNodes = toNode[toCheck.0]
for checkNode in checkNodes {
let inputToChecks = checkNode.opDesc?.inputs[toCheck.1] ?? []
for inputToCheck in inputToChecks {
if node.output[inputToCheck] == nil {
if relationshipMap[inputToCheck] == nil {
canFolder = false
}
}
}
nodes.append(node)
if var inNodes = typeMapNodes[opDesc.type] {
inNodes.append(node)
typeMapNodes[opDesc.type] = inNodes
} else {
typeMapNodes[opDesc.type] = [node]
}
}
}
for fusion in fusionOps {
let fusionNode = fusion.fusionNode()
let depth = fusionNode.depth()
if let toMatchNodes = typeMapNodes[fusionNode.type] {
for node in toMatchNodes {
let toNode = node.to(depth: depth)
if toNode == fusionNode { // match
var removeNodes: [Node] = []
node.folderWith(fusion: fusion, removedNodes: &removeNodes)
for removeNode in removeNodes {
nodes.remove(element: removeNode)
}
}
}
}
if !canFolder {
continue
}
var ops: [OpDesc] = []
for node in nodes {
ops.append(node.opDesc!)
var removeNodes: [Node] = []
node.node.folderWith(fusion: fusion, removedNodes: &removeNodes)
for removeNode in removeNodes {
nodes.remove(element: removeNode)
}
}
}
var newProgramDesc = ProgramDesc.init()
let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
newProgramDesc.blocks.append(newBlock)
return newProgramDesc
}
}
var ops: [OpDesc] = []
for node in nodes {
ops.append(node.opDesc!)
}
var newProgramDesc = ProgramDesc.init()
let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
newProgramDesc.blocks.append(newBlock)
return newProgramDesc
}
}
......@@ -14,39 +14,50 @@
import Foundation
let testTo = 113
let testTo = 81
var isTest = false
let computePrecision: ComputePrecision = .Float32
let computePrecision: ComputePrecision = .Float16
public class ResultHolder {
public class GPUResultHolder {
public let dim: [Int]
public let resultArr: [Float32]
public let capacity: Int
public var resultPointer: UnsafeMutablePointer<Float32>?
public var intermediateResults: [String : [Variant]]?
public let elapsedTime: Double
public init(inDim: [Int], inResult: [Float32], inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
dim = inDim
resultArr = inResult
capacity = inCapacity
if let inInPointer = inPointer {
resultPointer = UnsafeMutablePointer<Float32>.allocate(capacity: inCapacity)
resultPointer?.initialize(from: inInPointer, count: inCapacity)
}
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
}
}
extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
public var debugDescription: String {
var str = ""
str += "Dim: \(dim) \n value:[ "
if resultArr.count < 20 {
for d in resultArr {
str += " \(d) "
}
} else {
for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
str += " \(resultArr[d]) "
}
}
str += " ]"
return str
// var str = ""
// str += "Dim: \(dim) \n value:[ "
// if resultArr.count < 20 {
// for d in resultArr {
// str += " \(d) "
// }
// } else {
// for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
// str += " \(resultArr[d]) "
// }
// }
// str += " ]"
// return str
fatalError()
}
public var description: String {
......@@ -67,7 +78,7 @@ public class Executor<P: PrecisionType> {
queue = inQueue
for block in inProgram.programDesc.blocks {
//block.ops.count
for i in 0..<testTo {
for i in 0..<block.ops.count {
let op = block.ops[i]
do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
......@@ -79,7 +90,7 @@ public class Executor<P: PrecisionType> {
}
}
public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (ResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
}
......@@ -101,7 +112,7 @@ public class Executor<P: PrecisionType> {
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim))
program.scope.setInput(input: inputTexture)
//(ops.count - except)
for i in 0..<testTo {
for i in 0..<(ops.count - except) {
let op = ops[i]
do {
try op.run(device: device, buffer: buffer)
......@@ -112,18 +123,19 @@ public class Executor<P: PrecisionType> {
var outputTextures: [String : [Variant]]?
if except > 0 {
outputTextures = ops[testTo-1].inputVariant()
ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer)
outputTextures = ops[ops.count - except].inputVariant()
}
buffer.addCompletedHandler { [weak self] (commandbuffer) in
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(inputArr.strideArray())
print(dim)
writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
// let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
// print(inputArr.strideArray())
//
//// print(dim)
// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
// print(" write done ")
print("write to library done")
// print("write to library done")
// return
// print(inputArr)
//
......@@ -131,11 +143,11 @@ public class Executor<P: PrecisionType> {
// print(stridableInput)
//
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
for i in 0..<testTo {
let op = self!.ops[i]
print(" 第 \(i) 个 op: ")
op.delogOutput()
}
// for i in 0..<self!.ops.count {
// let op = self!.ops[i]
// print(" 第 \(i) 个 op: ")
// op.delogOutput()
// }
// return;
// self!.ops[testTo - 2].delogOutput()
......@@ -145,18 +157,23 @@ public class Executor<P: PrecisionType> {
// return
guard let SSelf = self else {
// return
fatalError()
}
let afterDate = Date.init()
var resultHolder: ResultHolder
var resultHolder: GPUResultHolder
if except > 0 {
resultHolder = ResultHolder.init(inDim: [], inResult: [], inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures)
resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures)
} else {
let outputVar: Variant = SSelf.program.scope.output()!
let output: Texture<P> = outputVar as! Texture<P>
let output: FetchHolder = outputVar as! FetchHolder
// let beforeToTensorDate = Date.init()
resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate))
resultHolder = ResultHolder.init(inDim: output.dim.dims, inResult: output.toTensor(), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
// let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate)
// print(timeToTensor)
}
completionHandle(resultHolder)
......
......@@ -168,7 +168,7 @@ public class Loader<P: PrecisionType> {
}
} else {
if varDesc.name == fetchKey {
scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inElapsedTime: 0.0)
// scope[varDesc.name] = ResultHolder.init(inDim: [], inResult: [], inCapacity: <#Int#>, inElapsedTime: 0.0)
} else if varDesc.name == feedKey {
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册