diff --git a/metal/PreluKernel/PreluKernel.xcodeproj/project.pbxproj b/metal/PreluKernel/PreluKernel.xcodeproj/project.pbxproj new file mode 100644 index 0000000000000000000000000000000000000000..76f0abb36b0129ce1553816def0548fb514f2876 --- /dev/null +++ b/metal/PreluKernel/PreluKernel.xcodeproj/project.pbxproj @@ -0,0 +1,164 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + FCEB6843212F00CC00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6842212F00CC00D2448E /* PreluKernel.metal */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + FCEB683F212F00CC00D2448E /* PreluKernel.metallib */ = {isa = PBXFileReference; explicitFileType = "archive.metal-library"; includeInIndex = 0; path = PreluKernel.metallib; sourceTree = BUILT_PRODUCTS_DIR; }; + FCEB6842212F00CC00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXGroup section */ + FCEB6838212F00CC00D2448E = { + isa = PBXGroup; + children = ( + FCEB6841212F00CC00D2448E /* PreluKernel */, + FCEB6840212F00CC00D2448E /* Products */, + ); + sourceTree = ""; + }; + FCEB6840212F00CC00D2448E /* Products */ = { + isa = PBXGroup; + children = ( + FCEB683F212F00CC00D2448E /* PreluKernel.metallib */, + ); + name = Products; + sourceTree = ""; + }; + FCEB6841212F00CC00D2448E /* PreluKernel */ = { + isa = PBXGroup; + children = ( + FCEB6842212F00CC00D2448E /* PreluKernel.metal */, + ); + path = PreluKernel; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + FCEB683E212F00CC00D2448E /* PreluKernel */ = { + isa = PBXNativeTarget; + buildConfigurationList = FCEB6846212F00CC00D2448E /* Build configuration list for PBXNativeTarget "PreluKernel" */; + buildPhases = ( + FCEB683D212F00CC00D2448E /* Sources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = PreluKernel; + productName = PreluKernel; + productReference = FCEB683F212F00CC00D2448E /* PreluKernel.metallib */; + productType = "com.apple.product-type.metal-library"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + FCEB6839212F00CC00D2448E /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0940; + ORGANIZATIONNAME = orange; + TargetAttributes = { + FCEB683E212F00CC00D2448E = { + CreatedOnToolsVersion = 9.4.1; + }; + }; + }; + buildConfigurationList = FCEB683C212F00CC00D2448E /* Build configuration list for PBXProject "PreluKernel" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = FCEB6838212F00CC00D2448E; + productRefGroup = FCEB6840212F00CC00D2448E /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + FCEB683E212F00CC00D2448E /* PreluKernel */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + FCEB683D212F00CC00D2448E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + FCEB6843212F00CC00D2448E /* PreluKernel.metal in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + FCEB6844212F00CC00D2448E /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + IPHONEOS_DEPLOYMENT_TARGET = 11.4; + MTL_ENABLE_DEBUG_INFO = YES; + SDKROOT = iphoneos; + }; + name = Debug; + }; + FCEB6845212F00CC00D2448E /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + IPHONEOS_DEPLOYMENT_TARGET = 11.4; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = iphoneos; + }; + name = Release; + }; + FCEB6847212F00CC00D2448E /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = Z5M2UUN5YV; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + FCEB6848212F00CC00D2448E /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = Z5M2UUN5YV; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + FCEB683C212F00CC00D2448E /* Build configuration list for PBXProject "PreluKernel" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + FCEB6844212F00CC00D2448E /* Debug */, + FCEB6845212F00CC00D2448E /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + FCEB6846212F00CC00D2448E /* Build configuration list for PBXNativeTarget "PreluKernel" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + FCEB6847212F00CC00D2448E /* Debug */, + FCEB6848212F00CC00D2448E /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = FCEB6839212F00CC00D2448E /* Project object */; +} diff --git a/metal/PreluKernel/PreluKernel.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/metal/PreluKernel/PreluKernel.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000000000000000000000000000000000000..bb481babbf9c954edbc2d32efb8eb41c70beb21c --- /dev/null +++ b/metal/PreluKernel/PreluKernel.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/metal/PreluKernel/PreluKernel.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/metal/PreluKernel/PreluKernel.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 0000000000000000000000000000000000000000..18d981003d68d0546c4804ac2ff47dd97c6e7921 --- /dev/null +++ b/metal/PreluKernel/PreluKernel.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/metal/PreluKernel/PreluKernel.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/PreluKernel/PreluKernel.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 0000000000000000000000000000000000000000..803d380b4d56c2270f2370cc5a1670b428d73793 --- /dev/null +++ b/metal/PreluKernel/PreluKernel.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,14 @@ + + + + + SchemeUserState + + PreluKernel.xcscheme + + orderHint + 0 + + + + diff --git a/metal/PreluKernel/PreluKernel/PreluKernel.metal b/metal/PreluKernel/PreluKernel/PreluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..7aa5e72ba9249ffe19152a9583ff103837d83f0b --- /dev/null +++ b/metal/PreluKernel/PreluKernel/PreluKernel.metal @@ -0,0 +1,12 @@ +// +// PreluKernel.metal +// PreluKernel +// +// Created by liuRuiLong on 2018/8/23. +// Copyright © 2018年 orange. All rights reserved. +// + +#include +using namespace metal; + + diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift index 53a15685fe3a875bc24fda91fad6f5d8cf6e7405..86dda2c5902da1473ded2af1c443a07335ffafe1 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift @@ -15,104 +15,265 @@ import MetalPerformanceShaders let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()] enum SupportModel: String{ - case mobilenet = "mobilenet" - case mobilenet_ssd = "mobilenetssd" - static func supportedModels() -> [SupportModel] { - return [.mobilenet, .mobilenet_ssd] - } + case mobilenet = "mobilenet" + case mobilenet_ssd = "mobilenetssd" + static func supportedModels() -> [SupportModel] { + return [.mobilenet, .mobilenet_ssd] + } } protocol Net { - var dim: [Int] { get } - var modelPath: String { get } - var paramPath: String { get } - var modelDir: String { get } - var preprocessKernel: CusomKernel { get } - func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) - func resultStr(res: [Float]) -> String + var dim: [Int] { get } + var modelPath: String { get } + var paramPath: String { get } + var modelDir: String { get } + var preprocessKernel: CusomKernel { get } + func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) + func resultStr(res: [Float]) -> String } extension Net { - func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { - let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error" - MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in - getTexture(resTexture) - } + func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { + let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error" + MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in + getTexture(resTexture) } + } } struct MobileNet: Net{ - - class MobilenetPreProccess: CusomKernel { - init(device: MTLDevice) { - let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) - super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false) + + class MobilenetPreProccess: CusomKernel { + init(device: MTLDevice) { + let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) + super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false) + } + } + + class PreWords { + var contents: [String] = [] + init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) { + if let filePath = inBundle.path(forResource: fileName, ofType: type) { + let string = try! String.init(contentsOfFile: filePath) + contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{ + String($0[$0.index($0.startIndex, offsetBy: 10)...]) } + }else{ + fatalError("no file call \(fileName)") + } } + subscript(index: Int) -> String { + return contents[index] + } + } + + let labels = PreWords.init(fileName: "synset") + + func resultStr(res: [Float]) -> String { + var s: [String] = [] + res.top(r: 5).enumerated().forEach{ + s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) + } + return s.joined(separator: "\n") + } + + var preprocessKernel: CusomKernel + let dim = [1, 224, 224, 3] + let modelPath: String + let paramPath: String + let modelDir: String + + init() { + modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" + paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null" + modelDir = "" + preprocessKernel = MobilenetPreProccess.init(device: MetalHelper.shared.device) + } +} + +struct MobileNet_ssd_hand: Net{ + class MobilenetssdPreProccess: CusomKernel { + init(device: MTLDevice) { + let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3) + super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false) + } + } + + func resultStr(res: [Float]) -> String { + fatalError() + } + + func bboxArea(box: [Float32], normalized: Bool) -> Float32 { + if box[2] < box[0] || box[3] < box[1] { + return 0.0 + } else { + let w = box[2] - box[0] + let h = box[3] - box[1] + if normalized { + return w * h + } else { + return (w + 1) * (h + 1) + } + } + } + + + func jaccardOverLap(box1: [Float32], box2: [Float32], normalized: Bool) -> Float32 { + if box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || + box2[3] < box1[1] { + return 0.0 + } else { + let interXmin = max(box1[0], box2[0]) + let interYmin = max(box1[1], box2[1]) + let interXmax = min(box1[2], box2[2]) + let interYmax = min(box1[3], box2[3]) + let interW = interXmax - interXmin + let interH = interYmax - interYmin + let interArea = interW * interH + let bbox1Area = bboxArea(box: box1, normalized: normalized) + let bbox2Area = bboxArea(box: box2, normalized: normalized) + return interArea / (bbox1Area + bbox2Area - interArea) + } + } + + func fetchResult(paddleMobileRes: [String : Texture]) -> [Float32]{ + let bbox = paddleMobileRes["box_coder_0.tmp_0"] ?! " no bbox " + let scores = paddleMobileRes["transpose_12.tmp_0"] ?! " no scores " + let score_thredshold: Float32 = 0.01 + let nms_top_k = 400 + let keep_top_k = 200 + let nms_eta: Float32 = 1.0 + var nms_threshold: Float32 = 0.45 - class PreWords { - var contents: [String] = [] - init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) { - if let filePath = inBundle.path(forResource: fileName, ofType: type) { - let string = try! String.init(contentsOfFile: filePath) - contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{ - String($0[$0.index($0.startIndex, offsetBy: 10)...]) - } - }else{ - fatalError("no file call \(fileName)") - } - } - subscript(index: Int) -> String{ - return contents[index] - } + let bboxArr = bbox.metalTexture.floatArray { (f) -> Float32 in + return f } - let labels = PreWords.init(fileName: "synset") - func resultStr(res: [Float]) -> String { - var s: [String] = [] - res.top(r: 5).enumerated().forEach{ - s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) - } - return s.joined(separator: "\n") + let scoresArr = scores.metalTexture.floatArray { (f) -> Float32 in + return f } - var preprocessKernel: CusomKernel - let dim = [1, 224, 224, 3] - let modelPath: String - let paramPath: String - let modelDir: String + var scoreFormatArr: [Float32] = [] + var outputArr: [Float32] = [] - init() { - modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" - paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null" - modelDir = "" - preprocessKernel = MobilenetPreProccess.init(device: MetalHelper.shared.device) + let numOfOneC = (scores.originDim[2] + 3) / 4 // 480 + let cNumOfOneClass = numOfOneC * 4 // 1920 + + let boxSize = bbox.originDim[2] // 4 + let classNum = scores.originDim[1] // 7 + let classNumOneTexture = classNum * 4 // 28 + + for c in 0.. score_thredshold { + scoreThresholdArr.append((sliceScore[i], i)) } + } + + scoreThresholdArr.sort { $0 > $1 } + + if scoreThresholdArr.count > nms_top_k { + scoreThresholdArr.removeLast(scoreThresholdArr.count - nms_top_k) + } + + var selectedIndex: [(Int, Float32)] = [] + + while scoreThresholdArr.count > 0 { + let idx = scoreThresholdArr[0].1 + let score = scoreThresholdArr[0].0 + var keep = true + for j in 0..(bboxArr[(idx * boxSize)..<(idx * boxSize + 4)]) + let box2 = Array(bboxArr[(idx * boxSize)..<(keptIdx * boxSize + 4)]) + + let overlap = jaccardOverLap(box1: box1, box2: box2, normalized: true) + keep = (overlap <= nms_threshold) + } else { + break + } + } + + if keep { + selectedIndex.append((idx, score)) + } + + scoreThresholdArr.removeFirst() + if keep && nms_eta < 1.0 && nms_threshold > 0.5 { + nms_threshold *= nms_eta + } + } + selectedIndexs[i] = selectedIndex + numDet += selectedIndex.count } - func resultStr(res: [Float]) -> String { - fatalError() + var scoreIndexPairs: [(Float32, (Int, Int))] = [] + for selected in selectedIndexs { + for scoreIndex in selected.value { + scoreIndexPairs.append((scoreIndex.1, (selected.key, scoreIndex.0))) + } } - var preprocessKernel: CusomKernel - let dim = [1, 300, 300, 3] - let modelPath: String - let paramPath: String - let modelDir: String + scoreIndexPairs.sort { $0.0 > $1.0 } + + if scoreIndexPairs.count > keep_top_k { + scoreIndexPairs.removeLast(scoreIndexPairs.count - keep_top_k) + } - init() { - modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null" - paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null" - modelDir = "" - preprocessKernel = MobilenetssdPreProccess.init(device: MetalHelper.shared.device) + var newIndices: [Int : [(Int, Float32)]] = [:] + for scoreIndexPair in scoreIndexPairs { + // label: scoreIndexPair.1.0 + let label = scoreIndexPair.1.0 + if newIndices[label] != nil { + newIndices[label]?.append((scoreIndexPair.1.0, scoreIndexPair.0)) + } else { + newIndices[label] = [(scoreIndexPair.1.0, scoreIndexPair.0)] + } } + + for indice in newIndices { + let selectedIndexAndScore = indice.value + for indexAndScore in selectedIndexAndScore { + outputArr.append(Float32(indice.key)) // label + outputArr.append(indexAndScore.1) // score + let subBox = bboxArr[(indexAndScore.0 * boxSize)..<(indexAndScore.0 * boxSize + 4)] + outputArr.append(contentsOf: subBox) + } + } + + return outputArr + } + + var preprocessKernel: CusomKernel + let dim = [1, 300, 300, 3] + let modelPath: String + let paramPath: String + let modelDir: String + + init() { + modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null" + paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null" + modelDir = "" + preprocessKernel = MobilenetssdPreProccess.init(device: MetalHelper.shared.device) + } } diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift index f6d8d6c7c555f4c01deec5302b3b32f1555187e2..fcf6a6cd5fa2c6552f173cf2e83edbe1a20c1fb6 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift @@ -20,158 +20,157 @@ import MetalPerformanceShaders let threadSupport = [1] class ViewController: UIViewController { - @IBOutlet weak var resultTextView: UITextView! - @IBOutlet weak var selectImageView: UIImageView! - @IBOutlet weak var elapsedTimeLabel: UILabel! - @IBOutlet weak var modelPickerView: UIPickerView! - @IBOutlet weak var threadPickerView: UIPickerView! - var selectImage: UIImage? - var program: Program? - var executor: Executor? - var modelType: SupportModel = SupportModel.supportedModels()[0] - var toPredictTexture: MTLTexture? - var modelHelper: Net { - return modelHelperMap[modelType] ?! " has no this type " + @IBOutlet weak var resultTextView: UITextView! + @IBOutlet weak var selectImageView: UIImageView! + @IBOutlet weak var elapsedTimeLabel: UILabel! + @IBOutlet weak var modelPickerView: UIPickerView! + @IBOutlet weak var threadPickerView: UIPickerView! + var selectImage: UIImage? + var program: Program? + var executor: Executor? + var modelType: SupportModel = SupportModel.supportedModels()[0] + var toPredictTexture: MTLTexture? + var modelHelper: Net { + return modelHelperMap[modelType] ?! " has no this type " + } + var threadNum = 1 + + @IBAction func loadAct(_ sender: Any) { + let inModelHelper = modelHelper + let queue = MetalHelper.shared.queue + let loader = Loader.init() + do { + let modelPath = inModelHelper.modelPath + let paraPath = inModelHelper.paramPath + + program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath) + executor = try Executor.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!) + } catch let error { + print(error) } - var threadNum = 1 - - @IBAction func loadAct(_ sender: Any) { - let inModelHelper = modelHelper - let queue = MetalHelper.shared.queue - let loader = Loader.init() - do { - let modelPath = inModelHelper.modelPath - let paraPath = inModelHelper.paramPath - - program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath) - executor = try Executor.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!) - } catch let error { - print(error) - } - } - - @IBAction func selectImageAct(_ sender: Any) { - let imagePicker = UIImagePickerController() - imagePicker.sourceType = .camera - imagePicker.delegate = self - self.present(imagePicker, animated: true, completion: nil) + } + + @IBAction func selectImageAct(_ sender: Any) { + let imagePicker = UIImagePickerController() + imagePicker.sourceType = .camera + imagePicker.delegate = self + self.present(imagePicker, animated: true, completion: nil) + } + + @IBAction func clearAct(_ sender: Any) { + executor?.clear() + program = nil + executor = nil + } + + @IBAction func predictAct(_ sender: Any) { + guard let inTexture = toPredictTexture else { + resultTextView.text = "请选择图片 ! " + return } - @IBAction func clearAct(_ sender: Any) { - executor?.clear() - program = nil - executor = nil - + guard let inExecutor = executor else { + resultTextView.text = "请先 load ! " + return } - @IBAction func predictAct(_ sender: Any) { - guard let inTexture = toPredictTexture else { - resultTextView.text = "请选择图片 ! " - return - } - - guard let inExecutor = executor else { - resultTextView.text = "请先 load ! " - return - } - - do { - let max = 10 - var startDate = Date.init() - for i in 0.. Int { - if pickerView == modelPickerView { - return 1 - } else if pickerView == threadPickerView { - return 1 - } else { - fatalError() - } + func numberOfComponents(in pickerView: UIPickerView) -> Int { + if pickerView == modelPickerView { + return 1 + } else if pickerView == threadPickerView { + return 1 + } else { + fatalError() } - - func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int { - if pickerView == modelPickerView { - return SupportModel.supportedModels().count - } else if pickerView == threadPickerView { - return threadSupport.count - } else { - fatalError() - } + } + + func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int { + if pickerView == modelPickerView { + return SupportModel.supportedModels().count + } else if pickerView == threadPickerView { + return threadSupport.count + } else { + fatalError() } - - public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? { - if pickerView == modelPickerView { - return SupportModel.supportedModels()[row].rawValue - } else if pickerView == threadPickerView { - return "\(threadSupport[row])" - } else { - fatalError() - } + } + + public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? { + if pickerView == modelPickerView { + return SupportModel.supportedModels()[row].rawValue + } else if pickerView == threadPickerView { + return "\(threadSupport[row])" + } else { + fatalError() } - - public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) { - if pickerView == modelPickerView { - self.modelType = SupportModel.supportedModels()[row] - } else if pickerView == threadPickerView { - self.threadNum = threadSupport[row] - } else { - fatalError() - } + } + + public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) { + if pickerView == modelPickerView { + self.modelType = SupportModel.supportedModels()[row] + } else if pickerView == threadPickerView { + self.threadNum = threadSupport[row] + } else { + fatalError() } + } } extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate { - func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { - picker.dismiss(animated: true){[weak self] in - guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ - fatalError("no image") - } - sSelf.selectImage = image - sSelf.selectImageView.image = image - sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in - sSelf.toPredictTexture = texture - }) - } + func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { + picker.dismiss(animated: true){[weak self] in + guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ + fatalError("no image") + } + sSelf.selectImage = image + sSelf.selectImageView.image = image + sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in + sSelf.toPredictTexture = texture + }) } + } } diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift index 6ab6f7c05e30049e850170409efcd6f049c73abe..1cca4f92088da285985af69f915fdb067ff7ff62 100644 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift +++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift @@ -1,10 +1,16 @@ -// -// AppDelegate.swift -// paddle-mobile-unit-test -// -// Created by liuRuiLong on 2018/8/10. -// Copyright © 2018年 orange. All rights reserved. -// +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import UIKit diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift index 80d818a0b54f4159aa4b5c8b61a6c8781782ad96..5c67b666195b5e2795bd31065aaf31dafebf1acb 100644 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift +++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift @@ -1,10 +1,16 @@ -// -// ViewController.swift -// paddle-mobile-unit-test -// -// Created by liuRuiLong on 2018/8/10. -// Copyright © 2018年 orange. All rights reserved. -// +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ import UIKit import Metal diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj index 1b8b51e362a7dbba562a871e80b15263c3ea0506..a7816c4120a3e1820b0bea2747d44efbf3ca8424 100644 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj @@ -69,6 +69,14 @@ FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; }; FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; }; FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; }; + FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */; }; + FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */; }; + FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */; }; + FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; }; + FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; }; + FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; }; + FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; }; + FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; }; FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; }; FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; }; FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; }; @@ -141,9 +149,17 @@ FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = ""; }; FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = ""; }; FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = ""; }; + FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluKernel.swift; sourceTree = ""; }; + FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeKernel.swift; sourceTree = ""; }; + FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BatchNormKernel.metal; sourceTree = ""; }; + FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = ""; }; + FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = ""; }; + FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = ""; }; + FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = ""; }; + FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = ""; }; FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; }; FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = ""; }; - FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; }; + FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -255,6 +271,8 @@ FCBCCC66212306B000D94F7E /* ConcatOp.swift */, FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */, FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */, + FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */, + FCEB684B212F093800D2448E /* PreluOp.swift */, ); path = Operators; sourceTree = ""; @@ -279,15 +297,15 @@ FC086BA520E67E8500D85EF7 /* Kernels */ = { isa = PBXGroup; children = ( + FCDDC6CD212FE02100E5EF74 /* Base */, + FCEB6837212F00B100D2448E /* metal */, + FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */, FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */, - FCF2D73720E64E70007AC5F5 /* Kernel.swift */, - FC1B16B220EC9A4F00678B91 /* Kernels.metal */, FC1B186520ECF1C600678B91 /* ResizeKernel.swift */, FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */, FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */, FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */, FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */, - FC4CB74820F0B954007C0C6D /* ConvKernel.metal */, FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */, FCD04E6720F315020007374F /* PoolKernel.swift */, FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */, @@ -299,6 +317,7 @@ FCBCCC68212306D300D94F7E /* ConcatKernel.swift */, FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */, FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */, + FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */, ); path = Kernels; sourceTree = ""; @@ -313,6 +332,27 @@ path = Base; sourceTree = ""; }; + FCDDC6CD212FE02100E5EF74 /* Base */ = { + isa = PBXGroup; + children = ( + FCF2D73720E64E70007AC5F5 /* Kernel.swift */, + ); + path = Base; + sourceTree = ""; + }; + FCEB6837212F00B100D2448E /* metal */ = { + isa = PBXGroup; + children = ( + FC1B16B220EC9A4F00678B91 /* Kernels.metal */, + FC4CB74820F0B954007C0C6D /* ConvKernel.metal */, + FCEB6849212F00DB00D2448E /* PreluKernel.metal */, + FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */, + FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */, + FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */, + ); + path = metal; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXHeadersBuildPhase section */ @@ -417,6 +457,7 @@ FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */, FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */, FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */, + FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */, FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */, FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, @@ -426,11 +467,15 @@ FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */, FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */, + FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */, + FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */, FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, + FCEB684C212F093800D2448E /* PreluOp.swift in Sources */, FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */, FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */, + FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */, FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, @@ -456,6 +501,7 @@ FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */, FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */, FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */, + FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */, FC82735920E3C04200BE430A /* OpCreator.swift in Sources */, FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */, FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */, @@ -468,6 +514,8 @@ FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */, FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */, FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */, + FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */, + FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */, FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift index fc19f32ebc588fcb2f9dab7b0f92ad0d28f2efa4..2c2db196ad3be7909b127091e0a4a18e8485b309 100644 --- a/metal/paddle-mobile/paddle-mobile/Executor.swift +++ b/metal/paddle-mobile/paddle-mobile/Executor.swift @@ -17,11 +17,13 @@ import Foundation public class ResultHolder { public let dim: [Int] public let resultArr: [P] + public var intermediateResults: [Texture

]? public let elapsedTime: Double - public init(inDim: [Int], inResult: [P], inElapsedTime: Double) { + public init(inDim: [Int], inResult: [P], inElapsedTime: Double, inIntermediateResults: [Texture

]? = nil) { dim = inDim resultArr = inResult elapsedTime = inElapsedTime + intermediateResults = inIntermediateResults } } @@ -69,7 +71,7 @@ public class Executor { } } } - } + } public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder

) -> Void, preProcessKernle: CusomKernel? = nil) throws { guard let buffer = queue.makeCommandBuffer() else { @@ -116,7 +118,6 @@ public class Executor { // self.ops[2].delogOutput() - let afterDate = Date.init() guard let outputVar = self.program.scope.output() else { diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift index 43f095d7008ad14ac71d610728e19ac6f6817800..b972838a53006a7b3a5d9f33e7105dc3aa95e31a 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift @@ -22,147 +22,194 @@ import Foundation */ protocol OpParam { - associatedtype OutputType: Variant - var output: OutputType { get set } - func outputDesc() -> String - - associatedtype ParamPrecisionType: PrecisionType - init(opDesc: OpDesc, inScope: Scope) throws - static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType - static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType - static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType - static func input(inputs: [String : [String]], from: Scope) throws -> VarType - static func output(outputs: [String : [String]], from: Scope) throws -> VarType - static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType - static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType - static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType - static func getAttr(key: String, attrs: [String : Attr]) throws -> T + associatedtype OutputType: Variant + var output: OutputType { get set } + func outputDesc() -> String + + associatedtype ParamPrecisionType: PrecisionType + init(opDesc: OpDesc, inScope: Scope) throws + static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType + static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType + static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType + static func input(inputs: [String : [String]], from: Scope) throws -> VarType + static func output(outputs: [String : [String]], from: Scope) throws -> VarType + static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType + static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType + + static func inputImage(inputs: [String : [String]], from: Scope) throws -> VarType + + static func outputBoxes(outputs: [String : [String]], from: Scope) throws -> VarType + + static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType + + static func outputVariances(outputs: [String : [String]], from: Scope) throws -> VarType + + static func getAttr(key: String, attrs: [String : Attr]) throws -> T + + static func inputAlpha(inputs: [String : [String]], from: Scope) throws -> VarType + } extension OpParam { - func outputDesc() -> String { - return output.debugDescription + func outputDesc() -> String { + return output.debugDescription + } + + static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType { + guard let mapKeys = map[key], mapKeys.count > 0 else { + throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty") } - - static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType { - guard let mapKeys = map[key], mapKeys.count > 0 else { - throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty") - } - guard let variant = from[mapKeys[0]], let v = variant as? VarType else { - throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope") - } - return v + guard let variant = from[mapKeys[0]], let v = variant as? VarType else { + throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope") } - - static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from) - - return tensorX - } catch let error { - throw error - } + return v + } + + static func outputVariances(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorVariances: VarType = try getFirstTensor(key: "Variances", map: outputs, from: from) + return tensorVariances + } catch let error { + throw error } - - static func input(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from) - return tensorInput - } catch let error { - throw error - } + } + + static func inputAlpha(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let alphaTensor: VarType = try getFirstTensor(key: "Alpha", map: inputs, from: from) + return alphaTensor + } catch let error { + throw error } - - static func output(outputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from) - return tensorOutput - } catch let error { - throw error - } - } - static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from) - return tensorOutputY - } catch let error { - throw error - } - } - static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from) - return tensorY - } catch let error { - throw error - } + } + + + static func inputImage(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorImage: VarType = try getFirstTensor(key: "Image", map: inputs, from: from) + return tensorImage + } catch let error { + throw error } - - static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType { - do { - let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from) - return out - } catch let error { - throw error - } - } - static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from) - return tensorFilter - } catch let error { - throw error - } + } + + static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from) + return tensorX + } catch let error { + throw error } - - static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from) - return tensorBias - } catch let error { - throw error - } + } + + static func outputBoxes(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorBox: VarType = try getFirstTensor(key: "Boxes", map: outputs, from: from) + return tensorBox + } catch let error { + throw error } - - static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from) - return tensorMean - } catch let error { - throw error - } + } + + static func input(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from) + return tensorInput + } catch let error { + throw error } - - static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from) - return tensorScale - } catch let error { - throw error - } + } + + static func output(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from) + return tensorOutput + } catch let error { + throw error } - - static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType { - do { - let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from) - return tensorVariance - } catch let error { - throw error - } + } + static func outputY(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from) + return tensorOutputY + } catch let error { + throw error + } + } + static func inputY(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from) + return tensorY + } catch let error { + throw error + } + } + + static func outputOut(outputs: [String : [String]], from: Scope) throws -> VarType { + do { + let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from) + return out + } catch let error { + throw error + } + } + static func inputFilter(paraInputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from) + return tensorFilter + } catch let error { + throw error + } + } + + static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from) + return tensorBias + } catch let error { + throw error + } + } + + static func inputMean(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from) + return tensorMean + } catch let error { + throw error + } + } + + static func inputScale(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from) + return tensorScale + } catch let error { + throw error + } + } + + static func inputVariance(inputs: [String : [String]], from: Scope) throws -> VarType { + do { + let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from) + return tensorVariance + } catch let error { + throw error + } + } + + static func getAttr(key: String, attrs: [String : Attr]) throws -> T{ + guard let attr = attrs[key] else { + throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" ) } - static func getAttr(key: String, attrs: [String : Attr]) throws -> T{ - guard let attr = attrs[key] else { - throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" ) - } - - guard let tAttr = attr as? T else { - throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" ) - } - return tAttr + guard let tAttr = attr as? T else { + throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" ) } + return tAttr + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift index 4814a63f3675fb4f246950dd43b82387cf42cc60..ce47ac434237b463f52e7f84a335a549553082c2 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift @@ -14,10 +14,12 @@ import Foundation + class ConvAddBatchNormReluParam: OpParam { typealias ParamPrecisionType = P required init(opDesc: OpDesc, inScope: Scope) throws { do { + filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope) output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope) @@ -29,6 +31,7 @@ class ConvAddBatchNormReluParam: OpParam { groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs) variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) + scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope) mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope) y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift index 126e8bc9545b4bc6b15364096ac18502b0641a70..f1ab27bc0de2e08c409794e813c86caa0f18ceb6 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift @@ -61,6 +61,7 @@ class ConvAddOp: Operator, ConvAddParam

>, typealias OpType = ConvAddOp

func inferShape() { + let inDims = para.input.dim let filterDim = para.filter.dim let strides = para.stride diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvTransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvTransposeOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..10dbecb7a46f150528df19a9ae1c3069d95ddc6a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvTransposeOp.swift @@ -0,0 +1,48 @@ +///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. */ + + +import Foundation + +class ConvTransposeParam: ConvParam

{ + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + try super.init(opDesc: opDesc, inScope: inScope) + } catch let error { + throw error + } + } +} + +class ConvTransposeOp: Operator, ConvTransposeParam

>, Runable, Creator, InferShaperable{ + + func inferShape() { + // para.output.dim = para.input.dim + } + + typealias OpType = ConvTransposeOp

+ func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + func delogOutput() { + print("conv transpose delog") + let _: P? = para.input.metalTexture.logDesc(header: "conv transpose input: ", stridable: true) + let _: P? = para.output.metalTexture.logDesc(header: "conv transpose output: ", stridable: true) + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift similarity index 100% rename from metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift rename to metal/paddle-mobile/paddle-mobile/Operators/Kernels/Base/Kernel.swift diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift index c7178e2f6f96ae2b4857dfbceabb0dbc4a3e5729..88e5f015684221bd7286e0ee4e608d85acaa325d 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift @@ -109,6 +109,7 @@ class ConvAddBatchNormReluKernel: Kernel, Computable, Testable guard let encoder = commandBuffer.makeComputeCommandEncoder() else { throw PaddleMobileError.predictError(message: " encode is nil") } + encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift index 1f49fd14c724e7e9348a134761ceafda8adfcf0f..d37100b27486adf1ccff6904e355ec3a66abdddb 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift @@ -13,38 +13,44 @@ limitations under the License. */ import Foundation -import MetalPerformanceShaders class ConvAddKernel: Kernel, Computable { - var metalParam: MetalConvParam! - required init(device: MTLDevice, param: ConvAddParam

) { - super.init(device: device, inFunctionName: "conv_add_1x1") - let offsetX = param.filter.width/2 - Int(param.paddings[0]) - let offsetY = param.filter.height/2 - Int(param.paddings[1]) - - param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - - print("offset x: \(offsetX)") - print("offset y: \(offsetY)") - - let offsetZ = 0.0 - metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvAddParam

) { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_add_1x1") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_add_3x3") + } else { + super.init(device: device, inFunctionName: "conv_add_3x3") } - func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam

) throws { - - - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.setBuffer(param.y.buffer, offset: 0, index: 2) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + param.output.initTexture(device: device, transpose: [0, 3, 1, 2]) + + let offsetX = param.filter.width/2 - Int(param.paddings[0]) + let offsetY = param.filter.height/2 - Int(param.paddings[1]) + + param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) + param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) + + print("offset x: \(offsetX)") + print("offset y: \(offsetY)") + + let offsetZ = 0.0 + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.setBuffer(param.y.buffer, offset: 0, index: 2) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift index 5b8726f5596fdf19d4ae1186110d182c78ac0900..721f231dbb5522aaa496481621d5966391825d83 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvBNReluKernel.swift @@ -13,6 +13,7 @@ limitations under the License. */ import Foundation +import MetalPerformanceShaders struct ConvBNReluTestParam: TestParam { let inputTexture: MTLTexture @@ -24,6 +25,7 @@ struct ConvBNReluTestParam: TestParam { let newBiaseBuffer: MTLBuffer let filterSize: (width: Int, height: Int, channel: Int) init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { + inputTexture = inInputTexture outputTexture = inOutputTexture metalParam = inMetalParam diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift index 92c43fe3218aa0c3ecfabd9a8d85c8107ecad273..60ce245a785cbd970e37d7cae6ac318a086c945c 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift @@ -14,38 +14,46 @@ import Foundation - public struct MetalConvParam { - let offsetX: Int16 - let offsetY: Int16 - let offsetZ: Int16 - let strideX: UInt16 - let strideY: UInt16 - let paddedZ: UInt16 + let offsetX: Int16 + let offsetY: Int16 + let offsetZ: Int16 + let strideX: UInt16 + let strideY: UInt16 + let paddedZ: UInt16 } class ConvKernel: Kernel, Computable { - var metalParam: MetalConvParam! - required init(device: MTLDevice, param: ConvParam

) { - super.init(device: device, inFunctionName: "conv_add_1x1") - let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0]) - let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1]) - let offsetZ = 0.0 - param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) - - metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + var metalParam: MetalConvParam! + required init(device: MTLDevice, param: ConvParam

) { + if param.filter.width == 1 && param.filter.height == 1 { + super.init(device: device, inFunctionName: "conv_1x1") + } else if param.filter.channel == 1 { + super.init(device: device, inFunctionName: "depthwise_conv_3x3") + } else { + super.init(device: device, inFunctionName: "conv_3x3") } - func compute(commandBuffer: MTLCommandBuffer, param: ConvParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) - encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0]) + let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1]) + let offsetZ = 0.0 + param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) + + metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3])) + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } } + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..0b11e717693a5c51a58b2c16ebd1ccf4ee135ec9 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +struct MetalConvTransposeParam { + let kernelW: UInt16; + let kernelH: UInt16; + + let strideX: UInt16; + let strideY: UInt16; + + let paddingX: UInt16; + let paddingY: UInt16; + + let dilationX: UInt16; + let dilationY: UInt16; +} + +class ConvTransposeKernel: Kernel, Computable{ + var metalParam: MetalConvTransposeParam! + required init(device: MTLDevice, param: ConvTransposeParam

) { + super.init(device: device, inFunctionName: "conv_transpose") + let kernelWidth = UInt16(param.filter.width) + let kernelHeight = UInt16(param.filter.height) + + let strideX = UInt16(param.stride[0]) + let strideY = UInt16(param.stride[1]) + let paddingX = UInt16(param.paddings[0]) + let paddingY = UInt16(param.paddings[1]) + let dilationX = UInt16(param.dilations[0]) + let dilationY = UInt16(param.dilations[1]) + + metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY) + + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} + + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift new file mode 100644 index 0000000000000000000000000000000000000000..1b54fdde38cb6e98b8edeb454bf40f2f141e560c --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PreluKernel.swift @@ -0,0 +1,33 @@ +// +// PreluKernel.swift +// paddle-mobile +// +// Created by liuRuiLong on 2018/8/24. +// Copyright © 2018年 orange. All rights reserved. +// + +import Foundation + +class PreluKernel: Kernel, Computable{ + required init(device: MTLDevice, param: PreluParam

) { + if param.mode == "channel" { + super.init(device: device, inFunctionName: "prelu_channel") + } else if param.mode == "element" { + super.init(device: device, inFunctionName: "prelu_element") + } else { + super.init(device: device, inFunctionName: "prelu_other") + } + } + + func compute(commandBuffer: MTLCommandBuffer, param: PreluParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") + } + + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setBuffer(param.alpha.buffer, offset: 0, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift index 8e33c19f77675b446b7b08d70475831cfea7d989..6228741ef7c0694dae1c8abf7bdbfb1b7f7b8343 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PriorBoxKernel.swift @@ -14,18 +14,89 @@ import Foundation +struct PriorBoxMetalParam { + let offset: Float32 + let stepWidth: Float32 + let stepHeight: Float32 + let minSize: Float32 + let maxSize: Float32 + let imageWidth: Float32 + let imageHeight: Float32 + let clip: Bool + let numPriors: uint + let aspecRatiosSize: uint + let minSizeSize: uint + let maxSizeSize: uint +} + class PriorBoxKernel: Kernel, Computable{ + var metalParam: PriorBoxMetalParam! + + required init(device: MTLDevice, param: PriorBoxParam

) { + super.init(device: device, inFunctionName: "priorbox") + param.output.initTexture(device: device, transpose: [2, 0, 1, 3]) + param.outputVariances.initTexture(device: device, transpose: [2, 0, 1, 3]) + + let imageWidth = Float32(param.inputImage.originDim[3]) + let imageHeight = Float32(param.inputImage.originDim[2]) + + let featureWidth = param.inputImage.originDim[3] + let featureHeight = param.inputImage.originDim[2] + + if param.stepW == 0 || param.stepH == 0 { + param.stepW = Float32(imageWidth) / Float32(featureWidth) + param.stepH = Float32(imageHeight) / Float32(featureHeight) + } + + var outputAspectRatior: [Float32] = [] + outputAspectRatior.append(1.0) + + let epsilon = 1e-6 + for ar in param.aspectRatios { + var alreadyExist = false + for outputAr in outputAspectRatior { + if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) { + alreadyExist = true + break + } + } + + if !alreadyExist { + outputAspectRatior.append(ar) + } + if param.flip { + outputAspectRatior.append(1.0 / ar) + } + } + + param.newAspectRatios = outputAspectRatior + let aspectRatiosSize = uint(outputAspectRatior.count) + + let maxSizeSize: uint = uint(param.maxSizes.count) + let minSizeSize: uint = uint(param.minSizes.count) + + let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize + + let minSize = param.minSizes.last ?? 0.0 + let maxSize = param.maxSizes.last ?? 0.0 + + metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize) + + } + func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam

) throws { guard let encoder = commandBuffer.makeComputeCommandEncoder() else { throw PaddleMobileError.predictError(message: " encode is nil") } encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) + encoder.setTexture(param.outputVariances.metalTexture, index: 2) + encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) + encoder.setBytes(param.aspectRatios, length: MemoryLayout.size * param.aspectRatios.count, index: 1) + encoder.setBytes(param.variances, length: MemoryLayout.size * param.variances.count, index: 2) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.endEncoding() } - required init(device: MTLDevice, param: PriorBoxParam

) { - super.init(device: device, inFunctionName: "priorbox") - } + } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..2311836eef03ebf13e1793d812f9f28a37a8402b --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal @@ -0,0 +1,42 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +kernel void batchnorm_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device half4 * newScale [[buffer(0)]], + const device half4 * newBias [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + const half4 input = inTexture.read(gid.xy, gid.z); + half4 output = input * newScale[gid.z] + newBias[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + +kernel void batchnorm(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 * newScale [[buffer(0)]], + const device float4 * newBias [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + const float4 input = inTexture.read(gid.xy, gid.z); + float4 output = input * newScale[gid.z] + newBias[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal similarity index 55% rename from metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal rename to metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal index 9d0c6de35ed23b14a05a9c3e6398931556d535a0..273c21c1a27862f6dbdddd0fb45bcb2a6cb1488a 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal @@ -314,49 +314,6 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array outTexture.write(output, gid.xy, gid.z); } -kernel void conv_add_1x1(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvParam ¶m [[buffer(0)]], - const device float4 *weights [[buffer(1)]], - const device float4 *biase [[buffer(2)]], - uint3 gid [[thread_position_in_grid]]) { - - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - ushort2 stride = ushort2(param.strideX, param.strideY); - ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - const uint kernelHXW = 1; - - uint input_arr_size = inTexture.get_array_size(); - uint weithTo = gid.z * kernelHXW * input_arr_size * 4; - - float4 output = float4(0.0); - - float4 input; - for (uint i = 0; i < input_arr_size; ++i) { - input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); - float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; - output.x += dot(input, weight_x); - - float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; - output.y += dot(input, weight_y); - - float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; - output.z += dot(input, weight_z); - - float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; - output.w += dot(input, weight_w); - } - output = output + biase[gid.z]; - outTexture.write(output, gid.xy, gid.z); -} - kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], constant MetalConvParam ¶m [[buffer(0)]], @@ -398,3 +355,347 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvTransposeParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + int input_array_size = inTexture.get_array_size(); + + uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH; + + uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + float4 output; + + for (int w = 0; w < param.kernelW; ++w) { + int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX; + if (input_x < 0 || input_x >= int(inTexture.get_width())) { + continue; + } + + for (int h = 0; h < param.kernelH; ++h) { + int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY; + if (input_y < 0 || input_y >= int(inTexture.get_height())) { + continue; + } + + uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size(); + + for (int slice = 0; slice < input_array_size; ++slice) { + + float4 input; + float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice]; + float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice]; + + float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice]; + + float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice]; + + input = inTexture.sample(sample, float2(input_x, input_x), slice); + output.x += dot(input, kernel_slice); + output.x += dot(input, kernel_slice1); + output.x += dot(input, kernel_slice2); + output.x += dot(input, kernel_slice3); + } + } + } + + outTexture.write(output, gid.xy, gid.z); +} + + +// conv +#pragma mark -- conv +kernel void conv_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + outTexture.write(output, gid.xy, gid.z); +} + +#pragma mark - convAdd +kernel void conv_add_1x1(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 1; + + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input; + for (uint i = 0; i < input_arr_size; ++i) { + input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i]; + output.x += dot(input, weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i]; + output.y += dot(input, weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i]; + output.z += dot(input, weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i]; + output.w += dot(input, weight_w); + } + output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + +kernel void conv_add_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + ushort2 stride = ushort2(param.strideX, param.strideY); + const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint input_arr_size = inTexture.get_array_size(); + uint weithTo = gid.z * kernelHXW * input_arr_size * 4; + + float4 output = float4(0.0); + + float4 input[9]; + for (uint i = 0; i < input_arr_size; ++i) { + input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); + input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); + input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); + input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); + input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); + input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); + input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); + input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); + input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); + for (int j = 0; j < 9; ++j) { + float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.x += dot(input[j], weight_x); + + float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.y += dot(input[j], weight_y); + + float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.z += dot(input[j], weight_z); + + float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i]; + output.w += dot(input[j], weight_w); + } + } + output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + +kernel void depthwise_conv_add_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvParam ¶m [[buffer(0)]], + const device float *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], + uint3 gid [[thread_position_in_grid]]) { + + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + uint output_slice = gid.z; + ushort2 stride = ushort2(param.strideX, param.strideY); + ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY); + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + const uint kernelHXW = 9; + uint weithTo = gid.z * kernelHXW * 4; + float4 output = float4(0.0); + float4 inputs[9]; + inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice); + inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice); + inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice); + inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice); + inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice); + inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice); + inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice); + inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice); + inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice); + for (int j = 0; j < 9; ++j) { + float4 input = inputs[j]; + output.x += input.x * weights[weithTo + 0 * kernelHXW + j]; + output.y += input.y * weights[weithTo + 1 * kernelHXW + j]; + output.z += input.z * weights[weithTo + 2 * kernelHXW + j]; + output.w += input.w * weights[weithTo + 3 * kernelHXW + j]; + } + output = output + biase[gid.z]; + outTexture.write(output, gid.xy, gid.z); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal similarity index 81% rename from metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal rename to metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal index a00c7a71a466b6754d0aa52f94bf99bb03531373..8c2566d71d9c654b1ddf55ff2769994c370949d8 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal @@ -36,18 +36,6 @@ kernel void resize(texture2d inTexture [[texture(0)]], outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z); } -kernel void relu(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); - const half4 input = inTexture.read(gid.xy, gid.z); - const float4 relu = fmax((float4)input, 0.0); - outTexture.write(half4(relu), gid.xy, gid.z); -} - kernel void elementwise_add(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], const device half4 *biasTerms [[buffer(0)]], @@ -60,18 +48,6 @@ kernel void elementwise_add(texture2d_array inTexture [[text outTexture.write(input, gid.xy, gid.z); } -kernel void batchnorm(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - const device half4 * newScale [[buffer(0)]], - const device half4 * newBias [[buffer(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - const half4 input = inTexture.read(gid.xy, gid.z); - half4 output = input * newScale[gid.z] + newBias[gid.z]; - outTexture.write(output, gid.xy, gid.z); -} //kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], // texture2d_array outTexture [[texture(1)]], @@ -230,76 +206,6 @@ kernel void softmax_half(texture2d_array inTexture [[texture outTexture.write(rr, gid.xy, gid.z); } -kernel void prior_box(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - - int max_sizes_size; - float max_sizes[2]; - - bool clip; - - float img_width; - float img_height; - - float step_width; - float step_height; - float offset; - - float aspect_ratios[2]; - int aspect_ratios_size; - - float center_x = (gid.x + offset) * step_width; - float center_y = (gid.y + offset) * step_width; - - float box_width, box_height; - - int min_sizes_size; - float min_sizes[2]; - - float min_size; - float max_size; - - if (gid.z < aspect_ratios_size) { - float ar = aspect_ratios[gid.z]; - box_width = min_size * sqrt(ar) / 2; - box_height = min_size / sqrt(ar) / 2; - float4 box; - box.x = (center_x - box_width) / img_width; - box.y = (center_y - box_height) / img_height; - box.z = (center_x + box_width) / img_width; - box.w = (center_y + box_height) / img_height; - - float4 res; - if (clip) { - res = min(max(box, 0.0), 1.0); - } else { - res = box; - } - - outTexture.write(res, gid.xy, gid.z); - } else if (gid.z >= aspect_ratios_size) { - int max_index = gid.z - aspect_ratios_size; - if (max_sizes_size > 0 && min_sizes_size > 0) { - box_width = box_height = sqrt(min_size * max_size) / 2; - float4 max_box; - max_box.x = (center_x - box_width) / img_width; - max_box.y = (center_y - box_height) / img_height; - max_box.z = (center_x + box_width) / img_width; - max_box.w = (center_y + box_height) / img_height; - - float4 res; - if (clip) { - res = min(max(max_box, 0.0), 1.0); - } else { - res = max_box; - } - - outTexture.write(max_box, gid.xy, gid.z); - } - } -} - inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) { abcd[2] = xyzn[0]; abcd[1] = xyzn[1]; diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..f07b636052f823bd11e0aaa3f6be6ae17d8ca804 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PreluKernel.metal @@ -0,0 +1,84 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + + +kernel void prelu_channel(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z); + float4 output; + output.x = input.x > 0 ? input.x : alpha[gid.z].x; + output.x = input.y > 0 ? input.y : alpha[gid.z].y; + output.x = input.z > 0 ? input.z : alpha[gid.z].z; + output.x = input.w > 0 ? input.w : alpha[gid.z].w; + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void prelu_element(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float4 *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z); + + int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size(); + + float4 output; + output.x = input.x > 0 ? input.x : alpha[alpha_to + gid.z].x; + output.x = input.y > 0 ? input.y : alpha[alpha_to + gid.z].y; + output.x = input.z > 0 ? input.z : alpha[alpha_to + gid.z].z; + output.x = input.w > 0 ? input.w : alpha[alpha_to + gid.z].w; + outTexture.write(output, gid.xy, gid.z); +} + + +kernel void prelu_other(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + const device float *alpha [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z); + + float4 output; + output.x = input.x > 0 ? input.x : alpha[0]; + output.x = input.y > 0 ? input.y : alpha[0]; + output.x = input.z > 0 ? input.z : alpha[0]; + output.x = input.w > 0 ? input.w : alpha[0]; + outTexture.write(output, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..7c6ab6dd03a512b99f391f7afebe3a8f7999a9d5 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/PriorBoxKernel.metal @@ -0,0 +1,97 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct PriorBoxMetalParam { + float offset; + float stepWidth; + float stepHeight; + float minSize; + float maxSize; + float imageWidth; + float imageHeight; + + bool clip; + + uint numPriors; + uint aspecRatiosSize; + uint minSizeSize; + uint maxSizeSize; +}; + +kernel void prior_box(texture2d_array inTexture [[texture(0)]], + texture2d_array outBoxTexture [[texture(1)]], + texture2d_array varianceTexture [[texture(2)]], + constant PriorBoxMetalParam ¶m [[buffer(0)]], + const device float *aspect_ratios [[buffer(1)]], + const device float4 *variances [[buffer(2)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outBoxTexture.get_width() || + gid.y >= outBoxTexture.get_height() || + gid.z >= outBoxTexture.get_array_size()) return; + + float center_x = (gid.x + param.offset) * param.stepWidth; + float center_y = (gid.y + param.offset) * param.stepHeight; + + float box_width, box_height; + + if (gid.z < param.aspecRatiosSize) { + float ar = aspect_ratios[gid.z]; + box_width = param.minSize * sqrt(ar) / 2; + box_height = param.minSize / sqrt(ar) / 2; + float4 box; + box.x = (center_x - box_width) / param.imageWidth; + box.y = (center_y - box_height) / param.imageHeight; + box.z = (center_x + box_width) / param.imageWidth; + box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(box, 0.0), 1.0); + } else { + res = box; + } + + outBoxTexture.write(res, gid.xy, gid.z); + } else if (gid.z >= param.aspecRatiosSize) { + if (param.maxSizeSize > 0) { + box_width = box_height = sqrt(param.minSize * param.maxSize) / 2; + float4 max_box; + max_box.x = (center_x - box_width) / param.imageWidth; + max_box.y = (center_y - box_height) / param.imageHeight; + max_box.z = (center_x + box_width) / param.imageWidth; + max_box.w = (center_y + box_height) / param.imageHeight; + + float4 res; + if (param.clip) { + res = min(max(max_box, 0.0), 1.0); + } else { + res = max_box; + } + outBoxTexture.write(max_box, gid.xy, gid.z); + } + } + + float4 variance = variances[0]; + if (gid.z < param.numPriors) { + float4 variances_output; + variances_output.x = variance.x; + variances_output.y = variance.y; + variances_output.z = variance.z; + variances_output.w = variance.w; + } +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..e725440bbe997d571f1860bce323516144a94da8 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReluKernel.metal @@ -0,0 +1,41 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + + +kernel void relu_half(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); + const half4 input = inTexture.read(gid.xy, gid.z); + const float4 relu = fmax((float4)input, 0.0); + outTexture.write(half4(relu), gid.xy, gid.z); +} + +kernel void relu(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); + const float4 input = inTexture.read(gid.xy, gid.z); + const float4 relu = fmax((float4)input, 0.0); + outTexture.write(float4(relu), gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift new file mode 100644 index 0000000000000000000000000000000000000000..26d19db2867037441832e028a578cca24769eeb2 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift @@ -0,0 +1,55 @@ +///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. */ + + +import Foundation + +class PreluParam: OpParam { + typealias ParamPrecisionType = P + required init(opDesc: OpDesc, inScope: Scope) throws { + do { + input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope) + output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope) + alpha = try PreluParam.inputAlpha(inputs: opDesc.inputs, from: inScope) + mode = try PreluParam.getAttr(key: "mode", attrs: opDesc.attrs) + } catch let error { + throw error + } + } + let mode: String + let alpha: Tensor

+ let input: Texture

+ var output: Texture

+} + +class PreluOp: Operator, PreluParam

>, Runable, Creator, InferShaperable{ + + func inferShape() { + // para.output.dim = para.input.dim + } + + typealias OpType = PreluOp

+ func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { + do { + try kernel.compute(commandBuffer: buffer, param: para) + } catch let error { + throw error + } + } + func delogOutput() { + print("softmax delog") + let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false) + let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false) + } +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift index da7a60ba5b64ba8b07f3eecfde3ce99f9a31f861..37215dba591ed84c8df1036fdaee94828f0b5534 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift @@ -19,27 +19,49 @@ class PriorBoxParam: OpParam { required init(opDesc: OpDesc, inScope: Scope) throws { do { input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope) - output = try PriorBoxParam.getFirstTensor(key: "Boxes", map: opDesc.outputs, from: inScope) - variances = try PriorBoxParam.getFirstTensor(key: "Variances", map: opDesc.outputs, from: inScope) + output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope) + inputImage = try PriorBoxParam.inputImage(inputs: opDesc.inputs, from: inScope) + outputVariances = try PriorBoxParam.outputVariances(outputs: opDesc.outputs, from: inScope) + minSizes = try PriorBoxParam.getAttr(key: "min_sizes", attrs: opDesc.attrs) + maxSizes = try PriorBoxParam.getAttr(key: "max_sizes", attrs: opDesc.attrs) + aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs) + variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs) + flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs) + clip = try PriorBoxParam.getAttr(key: "clop", attrs: opDesc.attrs) + stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs) + stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs) + offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs) } catch let error { throw error } } + + let minSizes: [Float32] + let maxSizes: [Float32] + let aspectRatios: [Float32] + var newAspectRatios: [Float32]? + let variances: [Float32] + let flip: Bool + let clip: Bool + var stepW: Float32 + var stepH: Float32 + let offset: Float32 + let input: Texture

+ let inputImage: Texture

var output: Texture

- let variances: Texture

+ let outputVariances: Texture

} class PriorBoxOp: Operator, PriorBoxParam

>, Runable, Creator, InferShaperable{ func inferShape() { - para.output.dim = para.input.dim } typealias OpType = PriorBoxOp

func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { do { - // try kernel.compute(commandBuffer: buffer, param: para) + try kernel.compute(commandBuffer: buffer, param: para) } catch let error { throw error } diff --git a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift index d584744aae4b3d135cfb83ab51c7b158d77db45a..7e4a05a8dcfc17be10f183de36575342383bb560 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift @@ -31,11 +31,11 @@ public struct Dim { return dims.reduce(1) { $0 * $1 } } - static func ==(left: Dim, right: Dim) -> Bool { + public static func ==(left: Dim, right: Dim) -> Bool { return left.dims == right.dims; } - subscript(index: Int) -> Int { + public subscript(index: Int) -> Int { return dims[index]; } diff --git a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift index 04e48c7b08ab3165175bb37aeb6e9a2568794a47..fb95ee65f066c4f454781e2767106cc2f06d65fc 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift @@ -41,15 +41,15 @@ extension InputTexture { public class Texture: Tensorial { var dim: Dim var tensorDim: Dim - private(set) var originDim: Dim + private(set) public var originDim: Dim private var textureDesc: MTLTextureDescriptor! - var metalTexture: MTLTexture! + public var metalTexture: MTLTexture! var transpose: [Int] = [0, 1, 2, 3] func initTexture(device: MTLDevice, transpose: [Int] = [0, 1, 2, 3]) { let newDim = transpose.map { originDim[$0] } - let newLayout = transpose.map {layout.layoutWithDim[$0] } + let newLayout = transpose.map { layout.layoutWithDim[$0] } layout = DataLayout.init(newLayout) dim = Dim.init(inDim: newDim)