未验证 提交 ad24d0c7 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #830 from codeWorm2015/metal

add pribox nms convTranspose prelu op
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 50;
objects = {
/* Begin PBXBuildFile section */
FCEB6843212F00CC00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6842212F00CC00D2448E /* PreluKernel.metal */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
FCEB683F212F00CC00D2448E /* PreluKernel.metallib */ = {isa = PBXFileReference; explicitFileType = "archive.metal-library"; includeInIndex = 0; path = PreluKernel.metallib; sourceTree = BUILT_PRODUCTS_DIR; };
FCEB6842212F00CC00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXGroup section */
FCEB6838212F00CC00D2448E = {
isa = PBXGroup;
children = (
FCEB6841212F00CC00D2448E /* PreluKernel */,
FCEB6840212F00CC00D2448E /* Products */,
);
sourceTree = "<group>";
};
FCEB6840212F00CC00D2448E /* Products */ = {
isa = PBXGroup;
children = (
FCEB683F212F00CC00D2448E /* PreluKernel.metallib */,
);
name = Products;
sourceTree = "<group>";
};
FCEB6841212F00CC00D2448E /* PreluKernel */ = {
isa = PBXGroup;
children = (
FCEB6842212F00CC00D2448E /* PreluKernel.metal */,
);
path = PreluKernel;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
FCEB683E212F00CC00D2448E /* PreluKernel */ = {
isa = PBXNativeTarget;
buildConfigurationList = FCEB6846212F00CC00D2448E /* Build configuration list for PBXNativeTarget "PreluKernel" */;
buildPhases = (
FCEB683D212F00CC00D2448E /* Sources */,
);
buildRules = (
);
dependencies = (
);
name = PreluKernel;
productName = PreluKernel;
productReference = FCEB683F212F00CC00D2448E /* PreluKernel.metallib */;
productType = "com.apple.product-type.metal-library";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
FCEB6839212F00CC00D2448E /* Project object */ = {
isa = PBXProject;
attributes = {
LastUpgradeCheck = 0940;
ORGANIZATIONNAME = orange;
TargetAttributes = {
FCEB683E212F00CC00D2448E = {
CreatedOnToolsVersion = 9.4.1;
};
};
};
buildConfigurationList = FCEB683C212F00CC00D2448E /* Build configuration list for PBXProject "PreluKernel" */;
compatibilityVersion = "Xcode 9.3";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
);
mainGroup = FCEB6838212F00CC00D2448E;
productRefGroup = FCEB6840212F00CC00D2448E /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
FCEB683E212F00CC00D2448E /* PreluKernel */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
FCEB683D212F00CC00D2448E /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FCEB6843212F00CC00D2448E /* PreluKernel.metal in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
FCEB6844212F00CC00D2448E /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
IPHONEOS_DEPLOYMENT_TARGET = 11.4;
MTL_ENABLE_DEBUG_INFO = YES;
SDKROOT = iphoneos;
};
name = Debug;
};
FCEB6845212F00CC00D2448E /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
IPHONEOS_DEPLOYMENT_TARGET = 11.4;
MTL_ENABLE_DEBUG_INFO = NO;
SDKROOT = iphoneos;
};
name = Release;
};
FCEB6847212F00CC00D2448E /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = Z5M2UUN5YV;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Debug;
};
FCEB6848212F00CC00D2448E /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = Z5M2UUN5YV;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
FCEB683C212F00CC00D2448E /* Build configuration list for PBXProject "PreluKernel" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCEB6844212F00CC00D2448E /* Debug */,
FCEB6845212F00CC00D2448E /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
FCEB6846212F00CC00D2448E /* Build configuration list for PBXNativeTarget "PreluKernel" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCEB6847212F00CC00D2448E /* Debug */,
FCEB6848212F00CC00D2448E /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = FCEB6839212F00CC00D2448E /* Project object */;
}
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:PreluKernel.xcodeproj">
</FileRef>
</Workspace>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>PreluKernel.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>0</integer>
</dict>
</dict>
</dict>
</plist>
//
// PreluKernel.metal
// PreluKernel
//
// Created by liuRuiLong on 2018/8/23.
// Copyright © 2018年 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
...@@ -15,104 +15,265 @@ import MetalPerformanceShaders ...@@ -15,104 +15,265 @@ import MetalPerformanceShaders
let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()] let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()]
enum SupportModel: String{ enum SupportModel: String{
case mobilenet = "mobilenet" case mobilenet = "mobilenet"
case mobilenet_ssd = "mobilenetssd" case mobilenet_ssd = "mobilenetssd"
static func supportedModels() -> [SupportModel] { static func supportedModels() -> [SupportModel] {
return [.mobilenet, .mobilenet_ssd] return [.mobilenet, .mobilenet_ssd]
} }
} }
protocol Net { protocol Net {
var dim: [Int] { get } var dim: [Int] { get }
var modelPath: String { get } var modelPath: String { get }
var paramPath: String { get } var paramPath: String { get }
var modelDir: String { get } var modelDir: String { get }
var preprocessKernel: CusomKernel { get } var preprocessKernel: CusomKernel { get }
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void)
func resultStr(res: [Float]) -> String func resultStr(res: [Float]) -> String
} }
extension Net { extension Net {
func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error" let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in
getTexture(resTexture) getTexture(resTexture)
}
} }
}
} }
struct MobileNet: Net{ struct MobileNet: Net{
class MobilenetPreProccess: CusomKernel { class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
} }
}else{
fatalError("no file call \(fileName)")
}
} }
subscript(index: Int) -> String {
return contents[index]
}
}
let labels = PreWords.init(fileName: "synset")
func resultStr(res: [Float]) -> String {
var s: [String] = []
res.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
var preprocessKernel: CusomKernel
let dim = [1, 224, 224, 3]
let modelPath: String
let paramPath: String
let modelDir: String
init() {
modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetPreProccess.init(device: MetalHelper.shared.device)
}
}
struct MobileNet_ssd_hand: Net{
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false)
}
}
func resultStr(res: [Float]) -> String {
fatalError()
}
func bboxArea(box: [Float32], normalized: Bool) -> Float32 {
if box[2] < box[0] || box[3] < box[1] {
return 0.0
} else {
let w = box[2] - box[0]
let h = box[3] - box[1]
if normalized {
return w * h
} else {
return (w + 1) * (h + 1)
}
}
}
func jaccardOverLap(box1: [Float32], box2: [Float32], normalized: Bool) -> Float32 {
if box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1] {
return 0.0
} else {
let interXmin = max(box1[0], box2[0])
let interYmin = max(box1[1], box2[1])
let interXmax = min(box1[2], box2[2])
let interYmax = min(box1[3], box2[3])
let interW = interXmax - interXmin
let interH = interYmax - interYmin
let interArea = interW * interH
let bbox1Area = bboxArea(box: box1, normalized: normalized)
let bbox2Area = bboxArea(box: box2, normalized: normalized)
return interArea / (bbox1Area + bbox2Area - interArea)
}
}
func fetchResult(paddleMobileRes: [String : Texture<Float32>]) -> [Float32]{
let bbox = paddleMobileRes["box_coder_0.tmp_0"] ?! " no bbox "
let scores = paddleMobileRes["transpose_12.tmp_0"] ?! " no scores "
let score_thredshold: Float32 = 0.01
let nms_top_k = 400
let keep_top_k = 200
let nms_eta: Float32 = 1.0
var nms_threshold: Float32 = 0.45
class PreWords { let bboxArr = bbox.metalTexture.floatArray { (f) -> Float32 in
var contents: [String] = [] return f
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String{
return contents[index]
}
} }
let labels = PreWords.init(fileName: "synset")
func resultStr(res: [Float]) -> String { let scoresArr = scores.metalTexture.floatArray { (f) -> Float32 in
var s: [String] = [] return f
res.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
} }
var preprocessKernel: CusomKernel var scoreFormatArr: [Float32] = []
let dim = [1, 224, 224, 3] var outputArr: [Float32] = []
let modelPath: String
let paramPath: String
let modelDir: String
init() { let numOfOneC = (scores.originDim[2] + 3) / 4 // 480
modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" let cNumOfOneClass = numOfOneC * 4 // 1920
paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
modelDir = "" let boxSize = bbox.originDim[2] // 4
preprocessKernel = MobilenetPreProccess.init(device: MetalHelper.shared.device) let classNum = scores.originDim[1] // 7
let classNumOneTexture = classNum * 4 // 28
for c in 0..<classNum {
for n in 0..<numOfOneC {
let to = n * classNumOneTexture + c * 4
scoreFormatArr.append(scoresArr[to])
scoreFormatArr.append(scoresArr[to + 1])
scoreFormatArr.append(scoresArr[to + 2])
scoreFormatArr.append(scoresArr[to + 3])
}
} }
}
var selectedIndexs: [Int : [(Int, Float32)]] = [:]
struct MobileNet_ssd_hand: Net{
class MobilenetssdPreProccess: CusomKernel { var numDet: Int = 0
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3) for i in 0..<classNum {
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false) var sliceScore = scoreFormatArr[(i * cNumOfOneClass)..<((i + 1) * cNumOfOneClass)]
var scoreThresholdArr: [(Float32, Int)] = []
for i in 0..<cNumOfOneClass {
if sliceScore[i] > score_thredshold {
scoreThresholdArr.append((sliceScore[i], i))
} }
}
scoreThresholdArr.sort { $0 > $1 }
if scoreThresholdArr.count > nms_top_k {
scoreThresholdArr.removeLast(scoreThresholdArr.count - nms_top_k)
}
var selectedIndex: [(Int, Float32)] = []
while scoreThresholdArr.count > 0 {
let idx = scoreThresholdArr[0].1
let score = scoreThresholdArr[0].0
var keep = true
for j in 0..<selectedIndex.count {
if keep {
let keptIdx = selectedIndex[j].0
let box1 = Array<Float32>(bboxArr[(idx * boxSize)..<(idx * boxSize + 4)])
let box2 = Array<Float32>(bboxArr[(idx * boxSize)..<(keptIdx * boxSize + 4)])
let overlap = jaccardOverLap(box1: box1, box2: box2, normalized: true)
keep = (overlap <= nms_threshold)
} else {
break
}
}
if keep {
selectedIndex.append((idx, score))
}
scoreThresholdArr.removeFirst()
if keep && nms_eta < 1.0 && nms_threshold > 0.5 {
nms_threshold *= nms_eta
}
}
selectedIndexs[i] = selectedIndex
numDet += selectedIndex.count
} }
func resultStr(res: [Float]) -> String { var scoreIndexPairs: [(Float32, (Int, Int))] = []
fatalError() for selected in selectedIndexs {
for scoreIndex in selected.value {
scoreIndexPairs.append((scoreIndex.1, (selected.key, scoreIndex.0)))
}
} }
var preprocessKernel: CusomKernel scoreIndexPairs.sort { $0.0 > $1.0 }
let dim = [1, 300, 300, 3]
let modelPath: String if scoreIndexPairs.count > keep_top_k {
let paramPath: String scoreIndexPairs.removeLast(scoreIndexPairs.count - keep_top_k)
let modelDir: String }
init() { var newIndices: [Int : [(Int, Float32)]] = [:]
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null" for scoreIndexPair in scoreIndexPairs {
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null" // label: scoreIndexPair.1.0
modelDir = "" let label = scoreIndexPair.1.0
preprocessKernel = MobilenetssdPreProccess.init(device: MetalHelper.shared.device) if newIndices[label] != nil {
newIndices[label]?.append((scoreIndexPair.1.0, scoreIndexPair.0))
} else {
newIndices[label] = [(scoreIndexPair.1.0, scoreIndexPair.0)]
}
} }
for indice in newIndices {
let selectedIndexAndScore = indice.value
for indexAndScore in selectedIndexAndScore {
outputArr.append(Float32(indice.key)) // label
outputArr.append(indexAndScore.1) // score
let subBox = bboxArr[(indexAndScore.0 * boxSize)..<(indexAndScore.0 * boxSize + 4)]
outputArr.append(contentsOf: subBox)
}
}
return outputArr
}
var preprocessKernel: CusomKernel
let dim = [1, 300, 300, 3]
let modelPath: String
let paramPath: String
let modelDir: String
init() {
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: MetalHelper.shared.device)
}
} }
...@@ -20,158 +20,157 @@ import MetalPerformanceShaders ...@@ -20,158 +20,157 @@ import MetalPerformanceShaders
let threadSupport = [1] let threadSupport = [1]
class ViewController: UIViewController { class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView! @IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView! @IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel! @IBOutlet weak var elapsedTimeLabel: UILabel!
@IBOutlet weak var modelPickerView: UIPickerView! @IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView! @IBOutlet weak var threadPickerView: UIPickerView!
var selectImage: UIImage? var selectImage: UIImage?
var program: Program? var program: Program?
var executor: Executor<Float32>? var executor: Executor<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0] var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture? var toPredictTexture: MTLTexture?
var modelHelper: Net { var modelHelper: Net {
return modelHelperMap[modelType] ?! " has no this type " return modelHelperMap[modelType] ?! " has no this type "
}
var threadNum = 1
@IBAction func loadAct(_ sender: Any) {
let inModelHelper = modelHelper
let queue = MetalHelper.shared.queue
let loader = Loader<Float32>.init()
do {
let modelPath = inModelHelper.modelPath
let paraPath = inModelHelper.paramPath
program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath)
executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!)
} catch let error {
print(error)
} }
var threadNum = 1 }
@IBAction func loadAct(_ sender: Any) { @IBAction func selectImageAct(_ sender: Any) {
let inModelHelper = modelHelper let imagePicker = UIImagePickerController()
let queue = MetalHelper.shared.queue imagePicker.sourceType = .camera
let loader = Loader<Float32>.init() imagePicker.delegate = self
do { self.present(imagePicker, animated: true, completion: nil)
let modelPath = inModelHelper.modelPath }
let paraPath = inModelHelper.paramPath
@IBAction func clearAct(_ sender: Any) {
program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath) executor?.clear()
executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!) program = nil
} catch let error { executor = nil
print(error) }
}
} @IBAction func predictAct(_ sender: Any) {
guard let inTexture = toPredictTexture else {
@IBAction func selectImageAct(_ sender: Any) { resultTextView.text = "请选择图片 ! "
let imagePicker = UIImagePickerController() return
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
} }
@IBAction func clearAct(_ sender: Any) { guard let inExecutor = executor else {
executor?.clear() resultTextView.text = "请先 load ! "
program = nil return
executor = nil
} }
@IBAction func predictAct(_ sender: Any) { do {
guard let inTexture = toPredictTexture else { let max = 10
resultTextView.text = "请选择图片 ! " var startDate = Date.init()
return for i in 0..<max {
} try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in
guard let sSelf = self else {
guard let inExecutor = executor else { fatalError()
resultTextView.text = "请先 load ! " }
return
} if i == (max / 2 - 1) {
startDate = Date.init()
do { }
let max = 10
var startDate = Date.init() if i == max - 1 {
for i in 0..<max { let time = Date.init().timeIntervalSince(startDate)
try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in DispatchQueue.main.async {
guard let sSelf = self else { sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
fatalError() sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
}
if i == (max / 2 - 1) {
startDate = Date.init()
}
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
}
}
}, preProcessKernle: self.modelHelper.preprocessKernel)
} }
} catch let error { }
print(error) }, preProcessKernle: self.modelHelper.preprocessKernel)
} }
} catch let error {
print(error)
} }
}
override func viewDidLoad() {
super.viewDidLoad() override func viewDidLoad() {
modelPickerView.delegate = self super.viewDidLoad()
modelPickerView.dataSource = self modelPickerView.delegate = self
threadPickerView.delegate = self modelPickerView.dataSource = self
threadPickerView.dataSource = self threadPickerView.delegate = self
threadPickerView.dataSource = self
selectImage = UIImage.init(named: "banana.jpeg")
selectImageView.image = selectImage selectImage = UIImage.init(named: "banana.jpeg")
modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in selectImageView.image = selectImage
self?.toPredictTexture = texture modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
} self?.toPredictTexture = texture
} }
}
} }
extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
func numberOfComponents(in pickerView: UIPickerView) -> Int { func numberOfComponents(in pickerView: UIPickerView) -> Int {
if pickerView == modelPickerView { if pickerView == modelPickerView {
return 1 return 1
} else if pickerView == threadPickerView { } else if pickerView == threadPickerView {
return 1 return 1
} else { } else {
fatalError() fatalError()
}
} }
}
func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
if pickerView == modelPickerView { func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
return SupportModel.supportedModels().count if pickerView == modelPickerView {
} else if pickerView == threadPickerView { return SupportModel.supportedModels().count
return threadSupport.count } else if pickerView == threadPickerView {
} else { return threadSupport.count
fatalError() } else {
} fatalError()
} }
}
public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
if pickerView == modelPickerView { public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
return SupportModel.supportedModels()[row].rawValue if pickerView == modelPickerView {
} else if pickerView == threadPickerView { return SupportModel.supportedModels()[row].rawValue
return "\(threadSupport[row])" } else if pickerView == threadPickerView {
} else { return "\(threadSupport[row])"
fatalError() } else {
} fatalError()
} }
}
public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
if pickerView == modelPickerView { public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
self.modelType = SupportModel.supportedModels()[row] if pickerView == modelPickerView {
} else if pickerView == threadPickerView { self.modelType = SupportModel.supportedModels()[row]
self.threadNum = threadSupport[row] } else if pickerView == threadPickerView {
} else { self.threadNum = threadSupport[row]
fatalError() } else {
} fatalError()
} }
}
} }
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate { extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) { func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{ guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else{
fatalError("no image") fatalError("no image")
} }
sSelf.selectImage = image sSelf.selectImage = image
sSelf.selectImageView.image = image sSelf.selectImageView.image = image
sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture sSelf.toPredictTexture = texture
}) })
}
} }
}
} }
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// AppDelegate.swift
// paddle-mobile-unit-test Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by liuRuiLong on 2018/8/10. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit import UIKit
......
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// ViewController.swift
// paddle-mobile-unit-test Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by liuRuiLong on 2018/8/10. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import UIKit import UIKit
import Metal import Metal
......
...@@ -69,6 +69,14 @@ ...@@ -69,6 +69,14 @@
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; }; FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; }; FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; }; FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */; };
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */; };
FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */; };
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */; };
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */; };
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; }; FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; }; FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; }; FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
...@@ -141,9 +149,17 @@ ...@@ -141,9 +149,17 @@
FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; }; FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; }; FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; }; FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluKernel.swift; sourceTree = "<group>"; };
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeKernel.swift; sourceTree = "<group>"; };
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = BatchNormKernel.metal; sourceTree = "<group>"; };
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ReluKernel.metal; sourceTree = "<group>"; };
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PriorBoxKernel.metal; sourceTree = "<group>"; };
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvTransposeOp.swift; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; }; FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; }; FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; }; FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
...@@ -255,6 +271,8 @@ ...@@ -255,6 +271,8 @@
FCBCCC66212306B000D94F7E /* ConcatOp.swift */, FCBCCC66212306B000D94F7E /* ConcatOp.swift */,
FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */, FCBCCC6A2123071700D94F7E /* BoxcoderOp.swift */,
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */, FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */,
FCDE8A32212A917900F4A8F6 /* ConvTransposeOp.swift */,
FCEB684B212F093800D2448E /* PreluOp.swift */,
); );
path = Operators; path = Operators;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -279,15 +297,15 @@ ...@@ -279,15 +297,15 @@
FC086BA520E67E8500D85EF7 /* Kernels */ = { FC086BA520E67E8500D85EF7 /* Kernels */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCDDC6CD212FE02100E5EF74 /* Base */,
FCEB6837212F00B100D2448E /* metal */,
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */,
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */, FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
FC1B186520ECF1C600678B91 /* ResizeKernel.swift */, FC1B186520ECF1C600678B91 /* ResizeKernel.swift */,
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */, FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */, FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */,
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */, FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */,
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */, FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */,
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */, FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */,
FCD04E6720F315020007374F /* PoolKernel.swift */, FCD04E6720F315020007374F /* PoolKernel.swift */,
FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */, FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */,
...@@ -299,6 +317,7 @@ ...@@ -299,6 +317,7 @@
FCBCCC68212306D300D94F7E /* ConcatKernel.swift */, FCBCCC68212306D300D94F7E /* ConcatKernel.swift */,
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */, FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */,
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */, FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */,
FCDDC6C5212F9FB800E5EF74 /* PreluKernel.swift */,
); );
path = Kernels; path = Kernels;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -313,6 +332,27 @@ ...@@ -313,6 +332,27 @@
path = Base; path = Base;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FCDDC6CD212FE02100E5EF74 /* Base */ = {
isa = PBXGroup;
children = (
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
);
path = Base;
sourceTree = "<group>";
};
FCEB6837212F00B100D2448E /* metal */ = {
isa = PBXGroup;
children = (
FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
FCEB6849212F00DB00D2448E /* PreluKernel.metal */,
FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */,
FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */,
FCDDC6CE212FE14700E5EF74 /* PriorBoxKernel.metal */,
);
path = metal;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */ /* Begin PBXHeadersBuildPhase section */
...@@ -417,6 +457,7 @@ ...@@ -417,6 +457,7 @@
FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */, FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */, FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */, FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */,
FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */, FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */,
FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */, FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
...@@ -426,11 +467,15 @@ ...@@ -426,11 +467,15 @@
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */, FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */, FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */,
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */, FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */, FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */, FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,
FCDDC6CA212FDF6800E5EF74 /* BatchNormKernel.metal in Sources */,
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */, FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
...@@ -456,6 +501,7 @@ ...@@ -456,6 +501,7 @@
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */, FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */, FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */,
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */, FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */, FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */, FCBCCC5D2122F8A100D94F7E /* DepthwiseConvOp.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */, FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
...@@ -468,6 +514,8 @@ ...@@ -468,6 +514,8 @@
FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */, FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */, FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */, FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */, FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
......
...@@ -17,11 +17,13 @@ import Foundation ...@@ -17,11 +17,13 @@ import Foundation
public class ResultHolder<P: PrecisionType> { public class ResultHolder<P: PrecisionType> {
public let dim: [Int] public let dim: [Int]
public let resultArr: [P] public let resultArr: [P]
public var intermediateResults: [Texture<P>]?
public let elapsedTime: Double public let elapsedTime: Double
public init(inDim: [Int], inResult: [P], inElapsedTime: Double) { public init(inDim: [Int], inResult: [P], inElapsedTime: Double, inIntermediateResults: [Texture<P>]? = nil) {
dim = inDim dim = inDim
resultArr = inResult resultArr = inResult
elapsedTime = inElapsedTime elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
} }
} }
...@@ -69,7 +71,7 @@ public class Executor<P: PrecisionType> { ...@@ -69,7 +71,7 @@ public class Executor<P: PrecisionType> {
} }
} }
} }
} }
public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws { public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
guard let buffer = queue.makeCommandBuffer() else { guard let buffer = queue.makeCommandBuffer() else {
...@@ -116,7 +118,6 @@ public class Executor<P: PrecisionType> { ...@@ -116,7 +118,6 @@ public class Executor<P: PrecisionType> {
// self.ops[2].delogOutput() // self.ops[2].delogOutput()
let afterDate = Date.init() let afterDate = Date.init()
guard let outputVar = self.program.scope.output() else { guard let outputVar = self.program.scope.output() else {
......
...@@ -22,147 +22,194 @@ import Foundation ...@@ -22,147 +22,194 @@ import Foundation
*/ */
protocol OpParam { protocol OpParam {
associatedtype OutputType: Variant associatedtype OutputType: Variant
var output: OutputType { get set } var output: OutputType { get set }
func outputDesc() -> String func outputDesc() -> String
associatedtype ParamPrecisionType: PrecisionType associatedtype ParamPrecisionType: PrecisionType
init(opDesc: OpDesc, inScope: Scope) throws init(opDesc: OpDesc, inScope: Scope) throws
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType
static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
static func inputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
} }
extension OpParam { extension OpParam {
func outputDesc() -> String { func outputDesc() -> String {
return output.debugDescription return output.debugDescription
}
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType {
guard let mapKeys = map[key], mapKeys.count > 0 else {
throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
} }
guard let variant = from[mapKeys[0]], let v = variant as? VarType else {
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType { throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
guard let mapKeys = map[key], mapKeys.count > 0 else {
throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
}
guard let variant = from[mapKeys[0]], let v = variant as? VarType else {
throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
}
return v
} }
return v
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType { }
do {
let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from) static func outputVariances<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
return tensorX let tensorVariances: VarType = try getFirstTensor(key: "Variances", map: outputs, from: from)
} catch let error { return tensorVariances
throw error } catch let error {
} throw error
} }
}
static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func inputAlpha<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from) do {
return tensorInput let alphaTensor: VarType = try getFirstTensor(key: "Alpha", map: inputs, from: from)
} catch let error { return alphaTensor
throw error } catch let error {
} throw error
} }
}
static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from) static func inputImage<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
return tensorOutput do {
} catch let error { let tensorImage: VarType = try getFirstTensor(key: "Image", map: inputs, from: from)
throw error return tensorImage
} } catch let error {
} throw error
static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
return tensorOutputY
} catch let error {
throw error
}
}
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
return tensorY
} catch let error {
throw error
}
} }
}
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do { static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from) do {
return out let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
} catch let error { return tensorX
throw error } catch let error {
} throw error
}
static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
return tensorFilter
} catch let error {
throw error
}
} }
}
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func outputBoxes<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from) do {
return tensorBias let tensorBox: VarType = try getFirstTensor(key: "Boxes", map: outputs, from: from)
} catch let error { return tensorBox
throw error } catch let error {
} throw error
} }
}
static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from) do {
return tensorMean let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
} catch let error { return tensorInput
throw error } catch let error {
} throw error
} }
}
static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do { static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from) do {
return tensorScale let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from)
} catch let error { return tensorOutput
throw error } catch let error {
} throw error
} }
}
static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType { static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do { do {
let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from) let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
return tensorVariance return tensorOutputY
} catch let error { } catch let error {
throw error throw error
} }
}
static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
return tensorY
} catch let error {
throw error
}
}
static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
do {
let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from)
return out
} catch let error {
throw error
}
}
static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
return tensorFilter
} catch let error {
throw error
}
}
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from)
return tensorBias
} catch let error {
throw error
}
}
static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from)
return tensorMean
} catch let error {
throw error
}
}
static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from)
return tensorScale
} catch let error {
throw error
}
}
static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
do {
let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from)
return tensorVariance
} catch let error {
throw error
}
}
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{
guard let attr = attrs[key] else {
throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
} }
static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{ guard let tAttr = attr as? T else {
guard let attr = attrs[key] else { throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
}
guard let tAttr = attr as? T else {
throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
}
return tAttr
} }
return tAttr
}
} }
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
import Foundation import Foundation
class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam { class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
...@@ -29,6 +31,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam { ...@@ -29,6 +31,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs) groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope) variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope) bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope) scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope) mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope) y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
......
...@@ -61,6 +61,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, ...@@ -61,6 +61,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
typealias OpType = ConvAddOp<P> typealias OpType = ConvAddOp<P>
func inferShape() { func inferShape() {
let inDims = para.input.dim let inDims = para.input.dim
let filterDim = para.filter.dim let filterDim = para.filter.dim
let strides = para.stride let strides = para.stride
......
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
}
}
}
class ConvTransposeOp<P: PrecisionType>: Operator<ConvTransposeKernel<P>, ConvTransposeParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
// para.output.dim = para.input.dim
}
typealias OpType = ConvTransposeOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print("conv transpose delog")
let _: P? = para.input.metalTexture.logDesc(header: "conv transpose input: ", stridable: true)
let _: P? = para.output.metalTexture.logDesc(header: "conv transpose output: ", stridable: true)
}
}
...@@ -109,6 +109,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable ...@@ -109,6 +109,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
......
...@@ -13,38 +13,44 @@ ...@@ -13,38 +13,44 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import MetalPerformanceShaders
class ConvAddKernel<P: PrecisionType>: Kernel, Computable { class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddParam<P>) { required init(device: MTLDevice, param: ConvAddParam<P>) {
super.init(device: device, inFunctionName: "conv_add_1x1") if param.filter.width == 1 && param.filter.height == 1 {
let offsetX = param.filter.width/2 - Int(param.paddings[0]) super.init(device: device, inFunctionName: "conv_add_1x1")
let offsetY = param.filter.height/2 - Int(param.paddings[1]) } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) } else {
param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) super.init(device: device, inFunctionName: "conv_add_3x3")
print("offset x: \(offsetX)")
print("offset y: \(offsetY)")
let offsetZ = 0.0
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws { param.output.initTexture(device: device, transpose: [0, 3, 1, 2])
let offsetX = param.filter.width/2 - Int(param.paddings[0])
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { let offsetY = param.filter.height/2 - Int(param.paddings[1])
throw PaddleMobileError.predictError(message: " encode is nil")
} param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) print("offset x: \(offsetX)")
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0) print("offset y: \(offsetY)")
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2) let offsetZ = 0.0
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
encoder.endEncoding() }
func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import MetalPerformanceShaders
struct ConvBNReluTestParam: TestParam { struct ConvBNReluTestParam: TestParam {
let inputTexture: MTLTexture let inputTexture: MTLTexture
...@@ -24,6 +25,7 @@ struct ConvBNReluTestParam: TestParam { ...@@ -24,6 +25,7 @@ struct ConvBNReluTestParam: TestParam {
let newBiaseBuffer: MTLBuffer let newBiaseBuffer: MTLBuffer
let filterSize: (width: Int, height: Int, channel: Int) let filterSize: (width: Int, height: Int, channel: Int)
init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) { init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
inputTexture = inInputTexture inputTexture = inInputTexture
outputTexture = inOutputTexture outputTexture = inOutputTexture
metalParam = inMetalParam metalParam = inMetalParam
......
...@@ -14,38 +14,46 @@ ...@@ -14,38 +14,46 @@
import Foundation import Foundation
public struct MetalConvParam { public struct MetalConvParam {
let offsetX: Int16 let offsetX: Int16
let offsetY: Int16 let offsetY: Int16
let offsetZ: Int16 let offsetZ: Int16
let strideX: UInt16 let strideX: UInt16
let strideY: UInt16 let strideY: UInt16
let paddedZ: UInt16 let paddedZ: UInt16
} }
class ConvKernel<P: PrecisionType>: Kernel, Computable { class ConvKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvParam<P>) { required init(device: MTLDevice, param: ConvParam<P>) {
super.init(device: device, inFunctionName: "conv_add_1x1") if param.filter.width == 1 && param.filter.height == 1 {
let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0]) super.init(device: device, inFunctionName: "conv_1x1")
let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1]) } else if param.filter.channel == 1 {
let offsetZ = 0.0 super.init(device: device, inFunctionName: "depthwise_conv_3x3")
param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32) } else {
super.init(device: device, inFunctionName: "conv_3x3")
metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
} }
func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws { let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
throw PaddleMobileError.predictError(message: " encode is nil") let offsetZ = 0.0
} param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
encoder.setTexture(param.input.metalTexture, index: 0) metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
encoder.setTexture(param.output.metalTexture, index: 1) }
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1) func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
encoder.endEncoding() throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
struct MetalConvTransposeParam {
let kernelW: UInt16;
let kernelH: UInt16;
let strideX: UInt16;
let strideY: UInt16;
let paddingX: UInt16;
let paddingY: UInt16;
let dilationX: UInt16;
let dilationY: UInt16;
}
class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: MetalConvTransposeParam!
required init(device: MTLDevice, param: ConvTransposeParam<P>) {
super.init(device: device, inFunctionName: "conv_transpose")
let kernelWidth = UInt16(param.filter.width)
let kernelHeight = UInt16(param.filter.height)
let strideX = UInt16(param.stride[0])
let strideY = UInt16(param.stride[1])
let paddingX = UInt16(param.paddings[0])
let paddingY = UInt16(param.paddings[1])
let dilationX = UInt16(param.dilations[0])
let dilationY = UInt16(param.dilations[1])
metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY)
}
func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvTransposeParam>.size, index: 0)
encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
//
// PreluKernel.swift
// paddle-mobile
//
// Created by liuRuiLong on 2018/8/24.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
class PreluKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: PreluParam<P>) {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel")
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element")
} else {
super.init(device: device, inFunctionName: "prelu_other")
}
}
func compute(commandBuffer: MTLCommandBuffer, param: PreluParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encoder is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBuffer(param.alpha.buffer, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
...@@ -14,18 +14,89 @@ ...@@ -14,18 +14,89 @@
import Foundation import Foundation
struct PriorBoxMetalParam {
let offset: Float32
let stepWidth: Float32
let stepHeight: Float32
let minSize: Float32
let maxSize: Float32
let imageWidth: Float32
let imageHeight: Float32
let clip: Bool
let numPriors: uint
let aspecRatiosSize: uint
let minSizeSize: uint
let maxSizeSize: uint
}
class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) {
super.init(device: device, inFunctionName: "priorbox")
param.output.initTexture(device: device, transpose: [2, 0, 1, 3])
param.outputVariances.initTexture(device: device, transpose: [2, 0, 1, 3])
let imageWidth = Float32(param.inputImage.originDim[3])
let imageHeight = Float32(param.inputImage.originDim[2])
let featureWidth = param.inputImage.originDim[3]
let featureHeight = param.inputImage.originDim[2]
if param.stepW == 0 || param.stepH == 0 {
param.stepW = Float32(imageWidth) / Float32(featureWidth)
param.stepH = Float32(imageHeight) / Float32(featureHeight)
}
var outputAspectRatior: [Float32] = []
outputAspectRatior.append(1.0)
let epsilon = 1e-6
for ar in param.aspectRatios {
var alreadyExist = false
for outputAr in outputAspectRatior {
if fabs(Double(ar) - Double(outputAr)) < Double(epsilon) {
alreadyExist = true
break
}
}
if !alreadyExist {
outputAspectRatior.append(ar)
}
if param.flip {
outputAspectRatior.append(1.0 / ar)
}
}
param.newAspectRatios = outputAspectRatior
let aspectRatiosSize = uint(outputAspectRatior.count)
let maxSizeSize: uint = uint(param.maxSizes.count)
let minSizeSize: uint = uint(param.minSizes.count)
let numPriors = aspectRatiosSize * minSizeSize + maxSizeSize
let minSize = param.minSizes.last ?? 0.0
let maxSize = param.maxSizes.last ?? 0.0
metalParam = PriorBoxMetalParam.init(offset: param.offset, stepWidth: param.stepW, stepHeight: param.stepH, minSize: minSize, maxSize: maxSize, imageWidth: imageWidth, imageHeight: imageHeight, clip: param.clip, numPriors: numPriors, aspecRatiosSize: aspectRatiosSize, minSizeSize: minSizeSize, maxSizeSize: maxSizeSize)
}
func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: PriorBoxParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
} }
encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setTexture(param.outputVariances.metalTexture, index: 2)
encoder.setBytes(&metalParam, length: MemoryLayout<PriorBoxMetalParam>.size, index: 0)
encoder.setBytes(param.aspectRatios, length: MemoryLayout<Float32>.size * param.aspectRatios.count, index: 1)
encoder.setBytes(param.variances, length: MemoryLayout<Float32>.size * param.variances.count, index: 2)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: PriorBoxParam<P>) {
super.init(device: device, inFunctionName: "priorbox")
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * newScale [[buffer(0)]],
const device float4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
...@@ -314,49 +314,6 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> ...@@ -314,49 +314,6 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample>
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]], kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]], texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]], constant MetalConvParam &param [[buffer(0)]],
...@@ -398,3 +355,347 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access ...@@ -398,3 +355,347 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output;
for (int w = 0; w < param.kernelW; ++w) {
int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX;
if (input_x < 0 || input_x >= int(inTexture.get_width())) {
continue;
}
for (int h = 0; h < param.kernelH; ++h) {
int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY;
if (input_y < 0 || input_y >= int(inTexture.get_height())) {
continue;
}
uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
for (int slice = 0; slice < input_array_size; ++slice) {
float4 input;
float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
input = inTexture.sample(sample, float2(input_x, input_x), slice);
output.x += dot(input, kernel_slice);
output.x += dot(input, kernel_slice1);
output.x += dot(input, kernel_slice2);
output.x += dot(input, kernel_slice3);
}
}
}
outTexture.write(output, gid.xy, gid.z);
}
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - convAdd
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
...@@ -36,18 +36,6 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]], ...@@ -36,18 +36,6 @@ kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z); outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
} }
kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]], kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *biasTerms [[buffer(0)]], const device half4 *biasTerms [[buffer(0)]],
...@@ -60,18 +48,6 @@ kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[text ...@@ -60,18 +48,6 @@ kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[text
outTexture.write(input, gid.xy, gid.z); outTexture.write(input, gid.xy, gid.z);
} }
kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]], //kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
// texture2d_array<half, access::write> outTexture [[texture(1)]], // texture2d_array<half, access::write> outTexture [[texture(1)]],
...@@ -230,76 +206,6 @@ kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture ...@@ -230,76 +206,6 @@ kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture
outTexture.write(rr, gid.xy, gid.z); outTexture.write(rr, gid.xy, gid.z);
} }
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
int max_sizes_size;
float max_sizes[2];
bool clip;
float img_width;
float img_height;
float step_width;
float step_height;
float offset;
float aspect_ratios[2];
int aspect_ratios_size;
float center_x = (gid.x + offset) * step_width;
float center_y = (gid.y + offset) * step_width;
float box_width, box_height;
int min_sizes_size;
float min_sizes[2];
float min_size;
float max_size;
if (gid.z < aspect_ratios_size) {
float ar = aspect_ratios[gid.z];
box_width = min_size * sqrt(ar) / 2;
box_height = min_size / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / img_width;
box.y = (center_y - box_height) / img_height;
box.z = (center_x + box_width) / img_width;
box.w = (center_y + box_height) / img_height;
float4 res;
if (clip) {
res = min(max(box, 0.0), 1.0);
} else {
res = box;
}
outTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= aspect_ratios_size) {
int max_index = gid.z - aspect_ratios_size;
if (max_sizes_size > 0 && min_sizes_size > 0) {
box_width = box_height = sqrt(min_size * max_size) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / img_width;
max_box.y = (center_y - box_height) / img_height;
max_box.z = (center_x + box_width) / img_width;
max_box.w = (center_y + box_height) / img_height;
float4 res;
if (clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outTexture.write(max_box, gid.xy, gid.z);
}
}
}
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) { inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0]; abcd[2] = xyzn[0];
abcd[1] = xyzn[1]; abcd[1] = xyzn[1];
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z);
float4 output;
output.x = input.x > 0 ? input.x : alpha[gid.z].x;
output.x = input.y > 0 ? input.y : alpha[gid.z].y;
output.x = input.z > 0 ? input.z : alpha[gid.z].z;
output.x = input.w > 0 ? input.w : alpha[gid.z].w;
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
float4 output;
output.x = input.x > 0 ? input.x : alpha[alpha_to + gid.z].x;
output.x = input.y > 0 ? input.y : alpha[alpha_to + gid.z].y;
output.x = input.z > 0 ? input.z : alpha[alpha_to + gid.z].z;
output.x = input.w > 0 ? input.w : alpha[alpha_to + gid.z].w;
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, gid.x, gid.y, gid.z);
float4 output;
output.x = input.x > 0 ? input.x : alpha[0];
output.x = input.y > 0 ? input.y : alpha[0];
output.x = input.z > 0 ? input.z : alpha[0];
output.x = input.w > 0 ? input.w : alpha[0];
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct PriorBoxMetalParam {
float offset;
float stepWidth;
float stepHeight;
float minSize;
float maxSize;
float imageWidth;
float imageHeight;
bool clip;
uint numPriors;
uint aspecRatiosSize;
uint minSizeSize;
uint maxSizeSize;
};
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
constant PriorBoxMetalParam &param [[buffer(0)]],
const device float *aspect_ratios [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
float ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(max_box, gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void relu_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void relu(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const float4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(float4(relu), gid.xy, gid.z);
}
///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License. */
import Foundation
class PreluParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
alpha = try PreluParam.inputAlpha(inputs: opDesc.inputs, from: inScope)
mode = try PreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
} catch let error {
throw error
}
}
let mode: String
let alpha: Tensor<P>
let input: Texture<P>
var output: Texture<P>
}
class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
// para.output.dim = para.input.dim
}
typealias OpType = PreluOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print("softmax delog")
let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
}
}
...@@ -19,27 +19,49 @@ class PriorBoxParam<P: PrecisionType>: OpParam { ...@@ -19,27 +19,49 @@ class PriorBoxParam<P: PrecisionType>: OpParam {
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope) input = try PriorBoxParam.input(inputs: opDesc.inputs, from: inScope)
output = try PriorBoxParam.getFirstTensor(key: "Boxes", map: opDesc.outputs, from: inScope) output = try PriorBoxParam.outputBoxes(outputs: opDesc.outputs, from: inScope)
variances = try PriorBoxParam.getFirstTensor(key: "Variances", map: opDesc.outputs, from: inScope) inputImage = try PriorBoxParam.inputImage(inputs: opDesc.inputs, from: inScope)
outputVariances = try PriorBoxParam.outputVariances(outputs: opDesc.outputs, from: inScope)
minSizes = try PriorBoxParam.getAttr(key: "min_sizes", attrs: opDesc.attrs)
maxSizes = try PriorBoxParam.getAttr(key: "max_sizes", attrs: opDesc.attrs)
aspectRatios = try PriorBoxParam.getAttr(key: "aspect_ratios", attrs: opDesc.attrs)
variances = try PriorBoxParam.getAttr(key: "variances", attrs: opDesc.attrs)
flip = try PriorBoxParam.getAttr(key: "flip", attrs: opDesc.attrs)
clip = try PriorBoxParam.getAttr(key: "clop", attrs: opDesc.attrs)
stepW = try PriorBoxParam.getAttr(key: "step_w", attrs: opDesc.attrs)
stepH = try PriorBoxParam.getAttr(key: "step_h", attrs: opDesc.attrs)
offset = try PriorBoxParam.getAttr(key: "offset", attrs: opDesc.attrs)
} catch let error { } catch let error {
throw error throw error
} }
} }
let minSizes: [Float32]
let maxSizes: [Float32]
let aspectRatios: [Float32]
var newAspectRatios: [Float32]?
let variances: [Float32]
let flip: Bool
let clip: Bool
var stepW: Float32
var stepH: Float32
let offset: Float32
let input: Texture<P> let input: Texture<P>
let inputImage: Texture<P>
var output: Texture<P> var output: Texture<P>
let variances: Texture<P> let outputVariances: Texture<P>
} }
class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
func inferShape() { func inferShape() {
para.output.dim = para.input.dim
} }
typealias OpType = PriorBoxOp<P> typealias OpType = PriorBoxOp<P>
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do { do {
// try kernel.compute(commandBuffer: buffer, param: para) try kernel.compute(commandBuffer: buffer, param: para)
} catch let error { } catch let error {
throw error throw error
} }
......
...@@ -31,11 +31,11 @@ public struct Dim { ...@@ -31,11 +31,11 @@ public struct Dim {
return dims.reduce(1) { $0 * $1 } return dims.reduce(1) { $0 * $1 }
} }
static func ==(left: Dim, right: Dim) -> Bool { public static func ==(left: Dim, right: Dim) -> Bool {
return left.dims == right.dims; return left.dims == right.dims;
} }
subscript(index: Int) -> Int { public subscript(index: Int) -> Int {
return dims[index]; return dims[index];
} }
......
...@@ -41,15 +41,15 @@ extension InputTexture { ...@@ -41,15 +41,15 @@ extension InputTexture {
public class Texture<P: PrecisionType>: Tensorial { public class Texture<P: PrecisionType>: Tensorial {
var dim: Dim var dim: Dim
var tensorDim: Dim var tensorDim: Dim
private(set) var originDim: Dim private(set) public var originDim: Dim
private var textureDesc: MTLTextureDescriptor! private var textureDesc: MTLTextureDescriptor!
var metalTexture: MTLTexture! public var metalTexture: MTLTexture!
var transpose: [Int] = [0, 1, 2, 3] var transpose: [Int] = [0, 1, 2, 3]
func initTexture(device: MTLDevice, transpose: [Int] = [0, 1, 2, 3]) { func initTexture(device: MTLDevice, transpose: [Int] = [0, 1, 2, 3]) {
let newDim = transpose.map { originDim[$0] } let newDim = transpose.map { originDim[$0] }
let newLayout = transpose.map {layout.layoutWithDim[$0] } let newLayout = transpose.map { layout.layoutWithDim[$0] }
layout = DataLayout.init(newLayout) layout = DataLayout.init(newLayout)
dim = Dim.init(inDim: newDim) dim = Dim.init(inDim: newDim)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册